X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=blobdiff_plain;f=sharedchao1.cpp;h=8d47ad2fca1d307bd150a985224b1099e492626a;hp=02e241180d2d4855309a8f8aa01a279963b506e5;hb=050a3ff02473a3d4c0980964e1a9ebe52e55d6b8;hpb=477e76a8a79b60f6cd4253324dd830bdea25e3e9 diff --git a/sharedchao1.cpp b/sharedchao1.cpp index 02e2411..8d47ad2 100644 --- a/sharedchao1.cpp +++ b/sharedchao1.cpp @@ -10,64 +10,248 @@ #include "sharedchao1.h" /***********************************************************************/ -EstOutput SharedChao1::getValues(SharedRAbundVector* sharedA, SharedRAbundVector* sharedB){ +EstOutput SharedChao1::getValues(vector shared){ try { - data.resize(1,0); + data.resize(1,0); + vector temp; + int numGroups = shared.size(); + float Chao = 0.0; float leftvalue, rightvalue; + + // IntNode is defined in mothur.h + // The tree used here is a binary tree used to represent the f1+++, f+1++, f++1+, f+++1, f11++, f1+1+... + // combinations required to solve the chao estimator equation for any number of groups. Conceptually, think + // of each node as having a 1 and a + value, or for f2 values a 2 and a + value, and 2 pointers to intnodes, and 2 coeffient values. + // The coeffient value is how many times you chose branch 1 to get to that fvalue. + // If you choose left you are selecting the 1 or 2 value and right means the + value. For instance, to find + // the number of bins that have f1+1+ you would start at the root, go left, right, left, and select the rightvalue. + // the coeffient is 2. Note: we only set the coeffient in f2 values. + + //create and initialize trees to 0. + initialTree(numGroups); - int f11, f1A, f2A, f1B, f2B, S12, tempA, tempB; - f11 = 0; f1A = 0; f2A = 0; f1B = 0; f2B = 0; S12 = 0; - float ChaoAB, part1, part2, part3; - - /* f11 = number of shared OTUs with one observed individual in A and B - f1A, f2A = number of shared OTUs with one or two individuals observed in A - f1B, f2B = number of shared OTUs with one or two individuals observed in B - S12 = number of shared OTUs in A and B */ - - //loop through vectors calculating the f11, f1A, f2A, f1B, f2B, S12 values - for (int i = 0; i< sharedA->size(); i++) { - tempA = sharedA->getAbundance(i); //store in temps to avoid calling getAbundance multiple times - tempB = sharedB->getAbundance(i); - if ((tempA != 0) && (tempB != 0)) {//they are shared - S12++; //they are shared - //do both A and B have one - if ((tempA == 1) && (tempB == 1)) { f11++; } + for (int i = 0; i < shared[0]->getNumBins(); i++) { + //get bin values and calc shared + bool sharedByAll = true; + temp.clear(); + for (int j = 0; j < numGroups; j++) { + temp.push_back(shared[j]->getAbundance(i)); + if (temp[j] == 0) { sharedByAll = false; } + } + + //they are shared + if (sharedByAll == true) { + //find f1 and f2values + updateTree(temp); + } + } + + + //calculate chao1, (numleaves-1) because numleaves contains the ++ values. + bool bias = false; + for(int i=0;ilvalue == 0 || f2leaves[i]->rvalue == 0) { bias = true;}// break;} + } + + if(bias){ + for (int i = 0; i < numLeaves; i++) { - //does A have one or two - if (tempA == 1) { f1A++; } - else if (tempA == 2) { f2A++; } + leftvalue = (float)(f1leaves[i]->lvalue * (f1leaves[i]->lvalue - 1)) / (float)((pow(2, (float)f2leaves[i]->lcoef)) * (f2leaves[i]->lvalue + 1)); + if (i != (numLeaves-1)) { + rightvalue = (float)(f1leaves[i]->rvalue * (f1leaves[i]->rvalue - 1)) / (float)((pow(2, (float)f2leaves[i]->rcoef)) * (f2leaves[i]->rvalue + 1)); + }else{ + //add in sobs + rightvalue = (float)(f1leaves[i]->rvalue); + } + Chao += leftvalue + rightvalue; + } + } + else{ + + for (int i = 0; i < numLeaves; i++) { - //does B have one or two - if (tempB == 1) { f1B++; } - else if (tempB == 2) { f2B++; } - + leftvalue = (float)(f1leaves[i]->lvalue * f1leaves[i]->lvalue) / (float)((pow(2, (float)f2leaves[i]->lcoef)) * f2leaves[i]->lvalue); + if (i != (numLeaves-1)) { + rightvalue = (float)(f1leaves[i]->rvalue * f1leaves[i]->rvalue) / (float)((pow(2, (float)f2leaves[i]->rcoef)) * f2leaves[i]->rvalue); + }else{ + //add in sobs + rightvalue = (float)(f1leaves[i]->rvalue); + } + Chao += leftvalue + rightvalue; } } - //checks for divide by zero error - if ((f2A == 0) || (f2B == 0)) { - part1 = ((float)(f1A*f1B)/(float)(4*(f2A+1)*(f2B+1))); - part2 = ((float)(f1A*(f1A-1))/(float)(2*f2A+2)); - part3 = ((float)(f1B*(f1B-1))/(float)(2*f2B+2)); - }else { - part1 = ((float)(f1A*f1B)/(float)(4*f2A*f2B)); - part2 = ((float)(f1A*f1A)/(float)(2*f2A)); - part3 = ((float)(f1B*f1B)/(float)(2*f2B)); + for (int i = 0; i < numNodes; i++) { + delete f1leaves[i]; + delete f2leaves[i]; } - ChaoAB = (float)S12 + (float)(f11*part1) + part2 + part3; - data[0] = ChaoAB; + data[0] = Chao; return data; } catch(exception& e) { - cout << "Standard Error: " << e.what() << " has occurred in the SharedChao1 class Function getValues. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + m->errorOut(e, "SharedChao1", "getValues"); + exit(1); + } +} + +/***********************************************************************/ +//builds trees structure with n leaf nodes initialized to 0. +void SharedChao1::initialTree(int n) { + try { + // (2^n) / 2. Divide by 2 because each leaf node contains 2 values. One for + and one for 1 or 2. + numLeaves = pow(2, (float)n) / 2; + numNodes = 2*numLeaves - 1; + int countleft = 0; + int countright = 1; + + f1leaves.resize(numNodes); + f2leaves.resize(numNodes); + + //initialize leaf values + for (int i = 0; i < numLeaves; i++) { + f1leaves[i] = new IntNode(0, 0, NULL, NULL); + f2leaves[i] = new IntNode(0, 0, NULL, NULL); + } + + //set pointers to children + for (int j = numLeaves; j < numNodes; j++) { + f1leaves[j] = new IntNode(); + f1leaves[j]->left = f1leaves[countleft]; + f1leaves[j]->right = f1leaves[countright]; + + f2leaves[j] = new IntNode(); + f2leaves[j]->left = f2leaves[countleft]; + f2leaves[j]->right =f2leaves[countright]; + + countleft = countleft + 2; + countright = countright + 2; + } + + //point to root + f1root = f1leaves[numNodes-1]; + + //point to root + f2root = f2leaves[numNodes-1]; + + //set coeffients + setCoef(f2root, 0); + } + catch(exception& e) { + if ((toString(e.what()) == "vector::_M_fill_insert") || (toString(e.what()) == "St9bad_alloc")) { m->mothurOut("You are using " + toString(n) + " groups which creates 2^" + toString(n+1) + " nodes. Try reducing the number of groups you selected. "); m->mothurOutEndLine(); exit(1); } + m->errorOut(e, "SharedChao1", "initialTree"); + exit(1); + } +} + +/***********************************************************************/ +//take vector containing the abundance info. for a bin and updates trees. +void SharedChao1::updateTree(vector bin) { + try { + updateBranchf1(f1root, bin, 0); + updateBranchf2(f2root, bin, 0); + } + catch(exception& e) { + m->errorOut(e, "SharedChao1", "updateTree"); + exit(1); + } +} + +/***********************************************************************/ +void SharedChao1::updateBranchf1(IntNode* node, vector bin, int index) { + try { + //if you have more than one group + if (index == (bin.size()-1)) { + if (bin[index] == 1) { node->lvalue++; node->rvalue++; } + else { node->rvalue++; } + }else { + if (bin[index] == 1) { + //follow path as if you are 1 + updateBranchf1(node->left, bin, index+1); + } + //follow path as if you are + + updateBranchf1(node->right, bin, index+1); + } + } + catch(exception& e) { + m->errorOut(e, "SharedChao1", "updateBranchf1"); exit(1); } - catch(...) { - cout << "An unknown error has occurred in the SharedChao1 class function getValues. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; +} + +/***********************************************************************/ +void SharedChao1::updateBranchf2(IntNode* node, vector bin, int index) { + try { + //if you have more than one group + if (index == (bin.size()-1)) { + if (bin[index] == 2) { node->lvalue++; node->rvalue++; } + else { node->rvalue++; } + }else { + if (bin[index] == 2) { + //follow path as if you are 1 + updateBranchf2(node->left, bin, index+1); + } + //follow path as if you are + + updateBranchf2(node->right, bin, index+1); + } + } + catch(exception& e) { + m->errorOut(e, "SharedChao1", "updateBranchf2"); exit(1); - } + } +} +/***********************************************************************/ +void SharedChao1::setCoef(IntNode* node, int coef) { + try { + if (node->left != NULL) { + setCoef(node->left, coef+1); + setCoef(node->right, coef); + }else { + node->lcoef = coef+1; + node->rcoef = coef; + } + } + catch(exception& e) { + m->errorOut(e, "SharedChao1", "setCoef"); + exit(1); + } } /***********************************************************************/ +//for debugging purposes +void SharedChao1::printTree() { + + m->mothurOut("F1 leaves"); m->mothurOutEndLine(); + printBranch(f1root); + + m->mothurOut("F2 leaves"); m->mothurOutEndLine(); + printBranch(f2root); + + +} +/*****************************************************************/ +void SharedChao1::printBranch(IntNode* node) { + try { + + // you are not a leaf + if (node->left != NULL) { + printBranch(node->left); + printBranch(node->right); + }else { //you are a leaf + m->mothurOut(toString(node->lvalue)); m->mothurOutEndLine(); + m->mothurOut(toString(node->rvalue)); m->mothurOutEndLine(); + } + + } + catch(exception& e) { + m->errorOut(e, "SharedChao1", "printBranch"); + exit(1); + } +} + +/*****************************************************************/ + + + +