+/**************************************************************************************************/
+void Ccode::findVarianceRef (int query) {
+ try {
+
+ varRef[query].resize(windows[query].size(), 0);
+ sdRef[query].resize(windows[query].size(), 0);
+
+ //for each window
+ for (int i = 0; i < windows[query].size(); i++) {
+ varRef[query][i] = (sumSquaredRef[query][i] - ((sumRef[query][i]*sumRef[query][i])/(float)refCombo[query])) / (float)(refCombo[query]-1);
+ sdRef[query][i] = sqrt(varRef[query][i]);
+
+ //set minimum error rate to 0.001 - to avoid potential divide by zero - not sure if this is necessary but it follows ccode implementation
+ if (averageRef[query][i] < 0.001) { averageRef[query][i] = 0.001; }
+ if (sumRef[query][i] < 0.001) { sumRef[query][i] = 0.001; }
+ if (varRef[query][i] < 0.001) { varRef[query][i] = 0.001; }
+ if (sumSquaredRef[query][i] < 0.001) { sumSquaredRef[query][i] = 0.001; }
+ if (sdRef[query][i] < 0.001) { sdRef[query][i] = 0.001; }
+
+ }
+ }
+ catch(exception& e) {
+ errorOut(e, "Ccode", "findVarianceRef");
+ exit(1);
+ }
+}
+/**************************************************************************************************/
+void Ccode::findVarianceQuery (int query) {
+ try {
+ varQuery[query].resize(windows[query].size(), 0);
+ sdQuery[query].resize(windows[query].size(), 0);
+
+ //for each window
+ for (int i = 0; i < windows[query].size(); i++) {
+ varQuery[query][i] = (sumSquaredQuery[query][i] - ((sumQuery[query][i]*sumQuery[query][i])/(float) closest[query].size())) / (float) (closest[query].size()-1);
+ sdQuery[query][i] = sqrt(varQuery[query][i]);
+
+ //set minimum error rate to 0.001 - to avoid potential divide by zero - not sure if this is necessary but it follows ccode implementation
+ if (averageQuery[query][i] < 0.001) { averageQuery[query][i] = 0.001; }
+ if (sumQuery[query][i] < 0.001) { sumQuery[query][i] = 0.001; }
+ if (varQuery[query][i] < 0.001) { varQuery[query][i] = 0.001; }
+ if (sumSquaredQuery[query][i] < 0.001) { sumSquaredQuery[query][i] = 0.001; }
+ if (sdQuery[query][i] < 0.001) { sdQuery[query][i] = 0.001; }
+ }
+
+ }
+ catch(exception& e) {
+ errorOut(e, "Ccode", "findVarianceQuery");
+ exit(1);
+ }
+}
+/**************************************************************************************************/
+void Ccode::determineChimeras (int query) {
+ try {
+
+ isChimericConfidence[query].resize(windows[query].size(), false);
+ isChimericTStudent[query].resize(windows[query].size(), false);
+ isChimericANOVA[query].resize(windows[query].size(), false);
+ anova[query].resize(windows[query].size());
+
+
+ //for each window
+ for (int i = 0; i < windows[query].size(); i++) {
+
+ //get confidence limits
+ float t = getT(closest[query].size()-1); //how many seqs you are comparing to this querySeq
+ float dsUpper = (averageQuery[query][i] + (t * sdQuery[query][i])) / averageRef[query][i];
+ float dsLower = (averageQuery[query][i] - (t * sdQuery[query][i])) / averageRef[query][i];
+//cout << t << '\t' << "ds upper = " << dsUpper << " dsLower = " << dsLower << endl;
+ if ((dsUpper > 1.0) && (dsLower > 1.0) && (averageQuery[query][i] > averageRef[query][i])) { /* range does not include 1 */
+ isChimericConfidence[query][i] = true; /* significantly higher at P<0.05 */
+//cout << i << " made it here" << endl;
+ }
+
+ //student t test
+ int degreeOfFreedom = refCombo[query] + closest[query].size() - 2;
+ float denomForT = (((refCombo[query]-1) * varQuery[query][i] + (closest[query].size() - 1) * varRef[query][i]) / (float) degreeOfFreedom) * ((refCombo[query] + closest[query].size()) / (float) (refCombo[query] * closest[query].size())); /* denominator, without sqrt(), for ts calculations */
+
+ float ts = fabs((averageQuery[query][i] - averageRef[query][i]) / (sqrt(denomForT))); /* value of ts for t-student test */
+ t = getT(degreeOfFreedom);
+//cout << i << '\t' << t << '\t' << ts << endl;
+ if ((ts >= t) && (averageQuery[query][i] > averageRef[query][i])) {
+ isChimericTStudent[query][i] = true; /* significantly higher at P<0.05 */
+ }
+
+ //anova test
+ float value1 = sumQuery[query][i] + sumRef[query][i];
+ float value2 = sumSquaredQuery[query][i] + sumSquaredRef[query][i];
+ float value3 = ((sumQuery[query][i]*sumQuery[query][i]) / (float) (closest[query].size())) + ((sumRef[query][i] * sumRef[query][i]) / (float) refCombo[query]);
+ float value4 = (value1 * value1) / ( (float) (closest[query].size() + refCombo[query]) );
+ float value5 = value2 - value4;
+ float value6 = value3 - value4;
+ float value7 = value5 - value6;
+ float value8 = value7 / ((float) degreeOfFreedom);
+ float anovaValue = value6 / value8;
+
+ float f = getF(degreeOfFreedom);
+
+ if ((anovaValue >= f) && (averageQuery[query][i] > averageRef[query][i])) {
+ isChimericANOVA[query][i] = true; /* significant P<0.05 */
+ }
+
+ anova[query][i] = anovaValue;
+ }
+
+ }
+ catch(exception& e) {
+ errorOut(e, "Ccode", "determineChimeras");
+ exit(1);
+ }
+}
+/**************************************************************************************************/
+float Ccode::getT(int numseq) {
+ try {
+
+ float tvalue = 0;
+
+ /* t-student critical values for different degrees of freedom and alpha 0.1 in one-tail tests (equivalent to 0.05) */
+ if (numseq > 120) tvalue = 1.645;
+ else if (numseq > 60) tvalue = 1.658;
+ else if (numseq > 40) tvalue = 1.671;
+ else if (numseq > 30) tvalue = 1.684;
+ else if (numseq > 29) tvalue = 1.697;
+ else if (numseq > 28) tvalue = 1.699;
+ else if (numseq > 27) tvalue = 1.701;
+ else if (numseq > 26) tvalue = 1.703;
+ else if (numseq > 25) tvalue = 1.706;
+ else if (numseq > 24) tvalue = 1.708;
+ else if (numseq > 23) tvalue = 1.711;
+ else if (numseq > 22) tvalue = 1.714;
+ else if (numseq > 21) tvalue = 1.717;
+ else if (numseq > 20) tvalue = 1.721;
+ else if (numseq > 19) tvalue = 1.725;
+ else if (numseq > 18) tvalue = 1.729;
+ else if (numseq > 17) tvalue = 1.734;
+ else if (numseq > 16) tvalue = 1.740;
+ else if (numseq > 15) tvalue = 1.746;
+ else if (numseq > 14) tvalue = 1.753;
+ else if (numseq > 13) tvalue = 1.761;
+ else if (numseq > 12) tvalue = 1.771;
+ else if (numseq > 11) tvalue = 1.782;
+ else if (numseq > 10) tvalue = 1.796;
+ else if (numseq > 9) tvalue = 1.812;
+ else if (numseq > 8) tvalue = 1.833;
+ else if (numseq > 7) tvalue = 1.860;
+ else if (numseq > 6) tvalue = 1.895;
+ else if (numseq > 5) tvalue = 1.943;
+ else if (numseq > 4) tvalue = 2.015;
+ else if (numseq > 3) tvalue = 2.132;
+ else if (numseq > 2) tvalue = 2.353;
+ else if (numseq > 1) tvalue = 2.920;
+ else if (numseq <= 1) {
+ mothurOut("Two or more reference sequences are required, your data will be flawed.\n"); mothurOutEndLine();
+ }
+
+ return tvalue;
+ }
+ catch(exception& e) {
+ errorOut(e, "Ccode", "getT");
+ exit(1);
+ }
+}
+/**************************************************************************************************/
+float Ccode::getF(int numseq) {
+ try {
+
+ float fvalue = 0;
+
+ /* F-Snedecor critical values for v1=1 and different degrees of freedom v2 and alpha 0.05 */
+ if (numseq > 120) fvalue = 3.84;
+ else if (numseq > 60) fvalue = 3.92;
+ else if (numseq > 40) fvalue = 4.00;
+ else if (numseq > 30) fvalue = 4.08;
+ else if (numseq > 29) fvalue = 4.17;
+ else if (numseq > 28) fvalue = 4.18;
+ else if (numseq > 27) fvalue = 4.20;
+ else if (numseq > 26) fvalue = 4.21;
+ else if (numseq > 25) fvalue = 4.23;
+ else if (numseq > 24) fvalue = 4.24;
+ else if (numseq > 23) fvalue = 4.26;
+ else if (numseq > 22) fvalue = 4.28;
+ else if (numseq > 21) fvalue = 4.30;
+ else if (numseq > 20) fvalue = 4.32;
+ else if (numseq > 19) fvalue = 4.35;
+ else if (numseq > 18) fvalue = 4.38;
+ else if (numseq > 17) fvalue = 4.41;
+ else if (numseq > 16) fvalue = 4.45;
+ else if (numseq > 15) fvalue = 4.49;
+ else if (numseq > 14) fvalue = 4.54;
+ else if (numseq > 13) fvalue = 4.60;
+ else if (numseq > 12) fvalue = 4.67;
+ else if (numseq > 11) fvalue = 4.75;
+ else if (numseq > 10) fvalue = 4.84;
+ else if (numseq > 9) fvalue = 4.96;
+ else if (numseq > 8) fvalue = 5.12;
+ else if (numseq > 7) fvalue = 5.32;
+ else if (numseq > 6) fvalue = 5.59;
+ else if (numseq > 5) fvalue = 5.99;
+ else if (numseq > 4) fvalue = 6.61;
+ else if (numseq > 3) fvalue = 7.71;
+ else if (numseq > 2) fvalue = 10.1;
+ else if (numseq > 1) fvalue = 18.5;
+ else if (numseq > 0) fvalue = 161;
+ else if (numseq <= 0) {
+ mothurOut("Two or more reference sequences are required, your data will be flawed.\n"); mothurOutEndLine();
+ }
+
+ return fvalue;
+ }
+ catch(exception& e) {
+ errorOut(e, "Ccode", "getF");
+ exit(1);
+ }
+}
+