]> git.donarmstrong.com Git - mothur.git/blobdiff - sharedchao1.cpp
changed random forest output filename
[mothur.git] / sharedchao1.cpp
index 0536528e38a55402fbafcf5b1d13ad1ecdfa47ec..8d47ad2fca1d307bd150a985224b1099e492626a 100644 (file)
 #include "sharedchao1.h"
 
 /***********************************************************************/
-EstOutput SharedChao1::getValues(SharedRAbundVector* sharedA, SharedRAbundVector* sharedB){
+EstOutput SharedChao1::getValues(vector<SharedRAbundVector*> shared){
        try {
-               data.resize(1,0);
+               data.resize(1,0);               
+               vector<int> temp; 
+               int numGroups = shared.size();
+               float Chao = 0.0; float leftvalue, rightvalue;
+                               
+               // IntNode is defined in mothur.h
+               // The tree used here is a binary tree used to represent the f1+++, f+1++, f++1+, f+++1, f11++, f1+1+... 
+               // combinations required to solve the chao estimator equation for any number of groups.  Conceptually, think
+               // of each node as having a 1 and a + value, or for f2 values a 2 and a + value, and 2 pointers to intnodes, and 2 coeffient values.
+               // The coeffient value is how many times you chose branch 1 to get to that fvalue.
+               // If you choose left you are selecting the 1 or 2 value and right means the + value.  For instance, to find
+               // the number of bins that have f1+1+ you would start at the root, go left, right, left, and select the rightvalue.
+               // the coeffient is 2.  Note: we only set the coeffient in f2 values.
+               
+               //create and initialize trees to 0.
+               initialTree(numGroups);
                
-               int f11, f1A, f2A, f1B, f2B, S12, tempA, tempB;
-               f11 = 0; f1A = 0; f2A = 0; f1B = 0; f2B = 0; S12 = 0;
-               float ChaoAB, part1, part2, part3;
-
-       /*      f11 = number of shared OTUs with one observed individual in A and B 
-               f1A, f2A = number of shared OTUs with one or two individuals observed in A 
-               f1B, f2B = number of shared OTUs with one or two individuals observed in B 
-               S12 = number of shared OTUs in A and B  */
-
-               //loop through vectors calculating the f11, f1A, f2A, f1B, f2B, S12 values
-               for (int i = 0; i< sharedA->size(); i++) {
-                       tempA = sharedA->getAbundance(i); //store in temps to avoid calling getAbundance multiple times
-                       tempB = sharedB->getAbundance(i);
-                       if ((tempA != 0) && (tempB != 0)) {//they are shared
-                               S12++; //they are shared
-                               //do both A and B have one
-                               if ((tempA == 1) && (tempB == 1)) { f11++; }
+               for (int i = 0; i < shared[0]->getNumBins(); i++) {
+                       //get bin values and calc shared 
+                       bool sharedByAll = true;
+                       temp.clear();
+                       for (int j = 0; j < numGroups; j++) {
+                               temp.push_back(shared[j]->getAbundance(i));
+                               if (temp[j] == 0) { sharedByAll = false; }
+                       }
+                       
+                       //they are shared
+                       if (sharedByAll == true) { 
+                               //find f1 and f2values
+                               updateTree(temp);
+                       }
+               }
+
+               
+               //calculate chao1, (numleaves-1) because numleaves contains the ++ values.
+               bool bias = false;
+               for(int i=0;i<numLeaves;i++){
+                       if (f2leaves[i]->lvalue == 0 || f2leaves[i]->rvalue == 0) { bias = true;}// break;}
+               }
+
+               if(bias){
+                       for (int i = 0; i < numLeaves; i++) {
                                
-                               //does A have one or two
-                               if (tempA == 1)                 { f1A++; }
-                               else if (tempA == 2)    { f2A++; }
+                               leftvalue = (float)(f1leaves[i]->lvalue * (f1leaves[i]->lvalue - 1)) / (float)((pow(2, (float)f2leaves[i]->lcoef)) * (f2leaves[i]->lvalue + 1));
+                               if (i != (numLeaves-1)) {
+                                       rightvalue = (float)(f1leaves[i]->rvalue * (f1leaves[i]->rvalue - 1)) / (float)((pow(2, (float)f2leaves[i]->rcoef)) * (f2leaves[i]->rvalue + 1));
+                               }else{
+                                       //add in sobs
+                                       rightvalue = (float)(f1leaves[i]->rvalue);
+                               }
+                               Chao += leftvalue + rightvalue;
+                       }
+               }
+               else{
+                       
+                       for (int i = 0; i < numLeaves; i++) {
                                
-                               //does B have one or two
-                               if (tempB == 1)                 { f1B++; }
-                               else if (tempB == 2)    { f2B++; }
+                               leftvalue = (float)(f1leaves[i]->lvalue * f1leaves[i]->lvalue) / (float)((pow(2, (float)f2leaves[i]->lcoef)) * f2leaves[i]->lvalue);
+                               if (i != (numLeaves-1)) {
+                                       rightvalue = (float)(f1leaves[i]->rvalue * f1leaves[i]->rvalue) / (float)((pow(2, (float)f2leaves[i]->rcoef)) * f2leaves[i]->rvalue);
+                               }else{
+                                       //add in sobs
+                                       rightvalue = (float)(f1leaves[i]->rvalue);
+                               }
+                               Chao += leftvalue + rightvalue;
                        }
                }
                
-               //checks for divide by zero error
-               if ((f2A == 0) || (f2B == 0)) {
-                       part1 = ((float)(f1A*f1B)/(float)(4*(f2A+1)*(f2B+1)));
-                       part2 = ((float)(f1A*(f1A-1))/(float)(2*f2A+2));
-                       part3 = ((float)(f1B*(f1B-1))/(float)(2*f2B+2));
-               }else {
-                       part1 = ((float)(f1A*f1B)/(float)(4*f2A*f2B));
-                       part2 = ((float)(f1A*f1A)/(float)(2*f2A));
-                       part3 = ((float)(f1B*f1B)/(float)(2*f2B));
+               for (int i = 0; i < numNodes; i++) {
+                       delete f1leaves[i];
+                       delete f2leaves[i];
                }
                
-               ChaoAB = (float)S12 + (float)(f11*part1) + part2 + part3;
-               data[0] = ChaoAB;
                
+               data[0] = Chao;
                return data;
        }
        catch(exception& e) {
-               cout << "Standard Error: " << e.what() << " has occurred in the SharedChao1 class Function getValues. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               m->errorOut(e, "SharedChao1", "getValues");
                exit(1);
        }
-       catch(...) {
-               cout << "An unknown error has occurred in the SharedChao1 class function getValues. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+}
+
+/***********************************************************************/
+//builds trees structure with n leaf nodes initialized to 0.
+void SharedChao1::initialTree(int n) {  
+       try {
+               // (2^n) / 2. Divide by 2 because each leaf node contains 2 values. One for + and one for 1 or 2.
+               numLeaves = pow(2, (float)n) / 2;
+               numNodes = 2*numLeaves - 1;
+               int countleft = 0;
+               int countright = 1;
+               
+               f1leaves.resize(numNodes);
+               f2leaves.resize(numNodes);
+       
+               //initialize leaf values
+               for (int i = 0; i < numLeaves; i++) {
+                       f1leaves[i] = new IntNode(0, 0, NULL, NULL);
+                       f2leaves[i] = new IntNode(0, 0, NULL, NULL);
+               }
+               
+               //set pointers to children
+               for (int j = numLeaves; j < numNodes; j++) {
+                       f1leaves[j] = new IntNode();
+                       f1leaves[j]->left = f1leaves[countleft];
+                       f1leaves[j]->right = f1leaves[countright];
+                                               
+                       f2leaves[j] = new IntNode();
+                       f2leaves[j]->left = f2leaves[countleft];
+                       f2leaves[j]->right =f2leaves[countright];
+                       
+                       countleft = countleft + 2;
+                       countright = countright + 2;
+               }
+               
+               //point to root
+               f1root = f1leaves[numNodes-1];
+       
+               //point to root
+               f2root = f2leaves[numNodes-1];
+               
+               //set coeffients
+               setCoef(f2root, 0);
+       }
+       catch(exception& e) {
+               if ((toString(e.what()) == "vector::_M_fill_insert") || (toString(e.what()) == "St9bad_alloc")) { m->mothurOut("You are using " + toString(n) + " groups which creates 2^" + toString(n+1) + " nodes. Try reducing the number of groups you selected. "); m->mothurOutEndLine(); exit(1); }
+               m->errorOut(e, "SharedChao1", "initialTree");
                exit(1);
-       }       
+       }
+}
 
+/***********************************************************************/
+//take vector containing the abundance info. for a bin and updates trees.
+void SharedChao1::updateTree(vector<int> bin) { 
+       try {
+               updateBranchf1(f1root, bin, 0);  
+               updateBranchf2(f2root, bin, 0); 
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SharedChao1", "updateTree");
+               exit(1);
+       }
 }
-/***********************************************************************/
\ No newline at end of file
+
+/***********************************************************************/
+void SharedChao1::updateBranchf1(IntNode* node, vector<int> bin, int index) {
+       try {
+               //if you have more than one group
+               if (index == (bin.size()-1)) {
+                       if (bin[index] == 1) { node->lvalue++; node->rvalue++; }
+                       else { node->rvalue++;  }
+               }else {
+                       if (bin[index] == 1) {
+                               //follow path as if you are 1
+                               updateBranchf1(node->left, bin, index+1);
+                       }
+                       //follow path as if you are +
+                       updateBranchf1(node->right, bin, index+1);
+               }
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SharedChao1", "updateBranchf1");                
+               exit(1);
+       }
+}
+
+/***********************************************************************/
+void SharedChao1::updateBranchf2(IntNode* node, vector<int> bin, int index) {
+       try {
+               //if you have more than one group
+               if (index == (bin.size()-1)) {
+                       if (bin[index] == 2) { node->lvalue++; node->rvalue++; }
+                       else { node->rvalue++;  }
+               }else {
+                       if (bin[index] == 2) {
+                               //follow path as if you are 1
+                               updateBranchf2(node->left, bin, index+1);
+                       }
+                       //follow path as if you are +
+                       updateBranchf2(node->right, bin, index+1);
+               }
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SharedChao1", "updateBranchf2");        
+               exit(1);
+       }
+}
+
+/***********************************************************************/
+void SharedChao1::setCoef(IntNode* node, int coef) {
+       try {
+               if (node->left != NULL) {
+                       setCoef(node->left, coef+1);
+                       setCoef(node->right, coef);
+               }else {
+                       node->lcoef = coef+1;
+                       node->rcoef = coef;
+               }
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SharedChao1", "setCoef");       
+               exit(1);
+       }
+}
+
+/***********************************************************************/
+//for debugging purposes
+void SharedChao1::printTree() {
+       
+       m->mothurOut("F1 leaves"); m->mothurOutEndLine();
+       printBranch(f1root);
+       
+       m->mothurOut("F2 leaves"); m->mothurOutEndLine();
+       printBranch(f2root);
+
+
+}
+/*****************************************************************/
+void SharedChao1::printBranch(IntNode* node) {
+       try {
+               
+               // you are not a leaf
+               if (node->left != NULL) {
+                       printBranch(node->left);
+                       printBranch(node->right);
+               }else { //you are a leaf
+                       m->mothurOut(toString(node->lvalue)); m->mothurOutEndLine();
+                       m->mothurOut(toString(node->rvalue)); m->mothurOutEndLine();
+               }
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SharedChao1", "printBranch");   
+               exit(1);
+       }
+}
+
+/*****************************************************************/
+
+
+
+