X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=mothurmetastats.cpp;h=ae822254c01ccdf7359ca319c44a2db5005f1fc4;hb=8dd3c225255d7084e3aff8740aa4f1f1cabb367a;hp=ed0aaef8b4a8025baa4619d7199a1b2033131152;hpb=19fcbbdba99658f5eca244803280f9ee7f9f6607;p=mothur.git diff --git a/mothurmetastats.cpp b/mothurmetastats.cpp index ed0aaef..ae82225 100644 --- a/mothurmetastats.cpp +++ b/mothurmetastats.cpp @@ -9,6 +9,7 @@ #include "mothurmetastats.h" #include "mothurfisher.h" +#include "spline.h" /***********************************************************/ MothurMetastats::MothurMetastats(double t, int n) { @@ -23,15 +24,7 @@ MothurMetastats::MothurMetastats(double t, int n) { } } /***********************************************************/ -MothurMetastats::~MothurMetastats() { - try { - - - }catch(exception& e) { - m->errorOut(e, "MothurMetastats", "~MothurMetastats"); - exit(1); - } -} +MothurMetastats::~MothurMetastats() {} /***********************************************************/ //main metastats function int MothurMetastats::runMetastats(string outputFileName, vector< vector >& data, int secondGroupingStart) { @@ -63,10 +56,10 @@ int MothurMetastats::runMetastats(string outputFileName, vector< vector } //total for first grouping - for (int i = 0; i < (secondGroupingStart-1); i++) { total1 += total[i]; } + for (int i = 0; i < secondGroupingStart; i++) { total1 += total[i]; } //total for second grouping - for (int i = (secondGroupingStart-1); i < column; i++) { total2 += total[i]; } + for (int i = secondGroupingStart; i < column; i++) { total2 += total[i]; } //Creates the ratios by first finding the minimum of totals double min = total[0]; @@ -113,15 +106,15 @@ int MothurMetastats::runMetastats(string outputFileName, vector< vector if (m->control_pressed) { return 1; } // Start the calculations. - if ( (column == 2) || ((secondGroupingStart-1) < 8) || ((column-secondGroupingStart+1) < 8) ){ + if ( (column == 2) || (secondGroupingStart < 8) || ((column-secondGroupingStart) < 8) ){ vector fish; fish.resize(row, 0.0); vector fish2; fish2.resize(row, 0.0); for(int i = 0; i < row; i++){ - for(int j = 0; j < (secondGroupingStart-1); j++) { fish[i] += data[i][j]; } - for(int j = (secondGroupingStart-1); j < column; j++) { fish2[i] += data[i][j]; } + for(int j = 0; j < secondGroupingStart; j++) { fish[i] += data[i][j]; } + for(int j = secondGroupingStart; j < column; j++) { fish2[i] += data[i][j]; } //vector tempData; tempData.resize(4, 0.0); double f11, f12, f21, f22; @@ -156,12 +149,12 @@ int MothurMetastats::runMetastats(string outputFileName, vector< vector for(int i = 0; i < row; i++){ - for(int j = 0; j < (secondGroupingStart-1); j++){ sparse[i] += data[i][j]; } - if(sparse[i] < (double)(secondGroupingStart-1)){ c++; } + for(int j = 0; j < secondGroupingStart; j++) { sparse[i] += data[i][j]; } + if(sparse[i] < (double)secondGroupingStart) { c++; } // ?<= for col - for(int j = (secondGroupingStart-1); j < column; j++){ sparse2[i] += data[i][j]; } - if( (sparse2[i] < (double)(column-secondGroupingStart+1))) { c++; } + for(int j = secondGroupingStart; j < column; j++) { sparse2[i] += data[i][j]; } + if( (sparse2[i] < (double)(column-secondGroupingStart))) { c++; } if (c == 2) { c=0; @@ -198,11 +191,11 @@ int MothurMetastats::runMetastats(string outputFileName, vector< vector for (int j = 0; j < row; j++){ if (m->control_pressed) { return 1; } - for (int i = 1; i <= (secondGroupingStart-1); i++){ temp[j][0] += data[j][i-1]; } - temp[j][0] /= (double)(secondGroupingStart-1); + for (int i = 0; i < secondGroupingStart; i++){ temp[j][0] += data[j][i]; } + temp[j][0] /= (double)secondGroupingStart; - for(int i = secondGroupingStart; i <= column; i++){ temp[j][1] += data[j][i-1]; } - temp[j][1] /= (double)(column-secondGroupingStart+1); + for(int i = secondGroupingStart; i < column; i++){ temp[j][1] += data[j][i]; } + temp[j][1] /= (double)(column-secondGroupingStart); } for(int i = 0; i < row; i++){ @@ -213,15 +206,17 @@ int MothurMetastats::runMetastats(string outputFileName, vector< vector storage[i][8]=pvalues[i]; } + vector qvalues = calc_qvalues(pvalues); + // BACKUP checks cout.setf(ios::fixed, ios::floatfield); cout.setf(ios::showpoint); for (int i = 0; i < row; i++){ if (m->control_pressed) { return 1; } - if(pvalues[i] < threshold){ - m->mothurOut("Feature " + toString((i+1)) + " is significant, p = "); - cout << pvalues[i]; + if(qvalues[i] < threshold){ + m->mothurOut("Feature " + toString((i+1)) + " is significant, q = "); + cout << qvalues[i]; m->mothurOutJustToLog(toString(pvalues[i])); m->mothurOutEndLine(); } } @@ -241,19 +236,21 @@ int MothurMetastats::runMetastats(string outputFileName, vector< vector //output column headings - not really sure... documentation labels 9 columns, there are 10 in the output file //storage 0 = meanGroup1 - line 529, 1 = varGroup1 - line 532, 2 = err rate1 - line 534, 3 = mean of counts group1?? - line 291, 4 = meanGroup2 - line 536, 5 = varGroup2 - line 539, 6 = err rate2 - line 541, 7 = mean of counts group2?? - line 292, 8 = pvalues - line 293 - out << "OTU\tmean(group1)\tvariance(group1)\tstderr(group1)\tmean_of_counts(group1)\tmean(group2)\tvariance(group2)\tstderr(group2)\tmean_of_counts(group1)\tp-value\n"; + out << "OTU\tmean(group1)\tvariance(group1)\tstderr(group1)\tmean_of_counts(group1)\tmean(group2)\tvariance(group2)\tstderr(group2)\tmean_of_counts(group1)\tp-value\tq-value\n"; for(int i = 0; i < row; i++){ if (m->control_pressed) { out.close(); return 0; } out << (i+1); for(int j = 0; j < 9; j++){ out << '\t' << storage[i][j]; } + out << '\t' << qvalues[i]; out << endl; } out << endl << endl; out.close(); + return 0; }catch(exception& e) { @@ -288,14 +285,14 @@ int MothurMetastats::start(vector& Imatrix, int secondGroupingStart, vec storage[i][0]=C1[i][0]; C1[i][1]=tool[i+row+row]; // var group 1 storage[i][1]=C1[i][1]; - C1[i][2]=C1[i][1]/(secondGroupingStart-1); + C1[i][2]=C1[i][1]/(secondGroupingStart); storage[i][2]=sqrt(C1[i][2]); C2[i][0]=tool[i+row]; // mean group 2 storage[i][4]=C2[i][0]; C2[i][1]=tool[i+row+row+row]; // var group 2 storage[i][5]=C2[i][1]; - C2[i][2]=C2[i][1]/(column-secondGroupingStart+1); + C2[i][2]=C2[i][1]/(column-secondGroupingStart); storage[i][6]=sqrt(C2[i][2]); } @@ -322,7 +319,7 @@ int MothurMetastats::meanvar(vector& pmatrix, int secondGroupingStart, v vector var; var.resize(row, 0.0); vector var2; var2.resize(row, 0.0); - double a = secondGroupingStart-1; + double a = secondGroupingStart; double b = column - a; int m = a * row; int n = row * column; @@ -452,8 +449,8 @@ int MothurMetastats::calc_twosample_ts(vector& Pmatrix, int secondGroupi for (int i = 0; i < C1.size(); i++) { C1[i].resize(3, 0.0); } vector< vector > C2; C2.resize(row); for (int i = 0; i < C2.size(); i++) { C2[i].resize(3, 0.0); } - vector storage; storage.resize(row, 0.0); - vector tool; tool.resize(row, 0.0); + vector storage; storage.resize(a, 0.0); + vector tool; tool.resize(a, 0.0); double xbardiff = 0.0; double denom = 0.0; meanvar(Pmatrix, secondGroupingStart, storage); @@ -467,11 +464,11 @@ int MothurMetastats::calc_twosample_ts(vector& Pmatrix, int secondGroupi if (m->control_pressed) { return 0; } C1[i][0]=tool[i]; C1[i][1]=tool[i+row+row]; - C1[i][2]=C1[i][1]/(secondGroupingStart-1); + C1[i][2]=C1[i][1]/(secondGroupingStart); C2[i][0]=tool[i+row]; C2[i][1]=tool[i+row+row+row]; // var group 2 - C2[i][2]=C2[i][1]/(column-secondGroupingStart+1); + C2[i][2]=C2[i][1]/(column-secondGroupingStart); } for (int i = 0; i < row; i++){ @@ -492,5 +489,245 @@ int MothurMetastats::calc_twosample_ts(vector& Pmatrix, int secondGroupi } } /***********************************************************/ +vector MothurMetastats::calc_qvalues(vector& pValues) { + try { + + int numRows = pValues.size(); + vector qvalues(numRows, 0.0); + + //fill lambdas with 0.00, 0.01, 0.02... 0.95 + vector lambdas(96, 0); + for (int i = 1; i < lambdas.size(); i++) { lambdas[i] = lambdas[i-1] + 0.01; } + + vector pi0_hat(lambdas.size(), 0); + + //calculate pi0_hat + for (int l = 0; l < lambdas.size(); l++){ // for each lambda value + int count = 0; + for (int i = 0; i < numRows; i++){ // for each p-value in order + if (pValues[i] > lambdas[l]){ count++; } + } + pi0_hat[l] = count/(double)(numRows*(1-lambdas[l])); + } + + double pi0 = smoothSpline(lambdas, pi0_hat, 3); + + //order p-values + vector ordered_qs = qvalues; + vector ordered_ps(pValues.size(), 0); + for (int i = 1; i < ordered_ps.size(); i++) { ordered_ps[i] = ordered_ps[i-1] + 1; } + vector tempPvalues = pValues; + OrderPValues(0, numRows-1, tempPvalues, ordered_ps); + + ordered_qs[numRows-1] = min((pValues[ordered_ps[numRows-1]]*pi0), 1.0); + for (int i = (numRows-2); i >= 0; i--){ + double p = pValues[ordered_ps[i]]; + double temp = p*numRows*pi0/(double)(i+1); + + ordered_qs[i] = min(temp, ordered_qs[i+1]); + } + + //re-distribute calculated qvalues to appropriate rows + for (int i = 0; i < numRows; i++){ + qvalues[ordered_ps[i]] = ordered_qs[i]; + } + + return qvalues; + + }catch(exception& e) { + m->errorOut(e, "MothurMetastats", "calc_qvalues"); + exit(1); + } +} +/***********************************************************/ +int MothurMetastats::OrderPValues(int low, int high, vector& p, vector& order) { + try { + + if (low < high) { + int i = low+1; + int j = high; + int pivot = (low+high) / 2; + + swapElements(low, pivot, p, order); //puts pivot in final spot + + /* compare value */ + double key = p[low]; + + /* partition */ + while(i <= j) { + /* find member above ... */ + while((i <= high) && (p[i] <= key)) { i++; } + + /* find element below ... */ + while((j >= low) && (p[j] > key)) { j--; } + + if(i < j) { + swapElements(i, j, p, order); + } + } + + swapElements(low, j, p, order); + + /* recurse */ + OrderPValues(low, j-1, p, order); + OrderPValues(j+1, high, p, order); + } + + return 0; + + }catch(exception& e) { + m->errorOut(e, "MothurMetastats", "OrderPValues"); + exit(1); + } +} +/***********************************************************/ +int MothurMetastats::swapElements(int i, int j, vector& p, vector& order) { + try { + + double z = p[i]; + p[i] = p[j]; + p[j] = z; + + int temp = order[i]; + order[i] = order[j]; + order[j] = temp; + + return 0; + + }catch(exception& e) { + m->errorOut(e, "MothurMetastats", "swapElements"); + exit(1); + } +} +/***********************************************************/ +double MothurMetastats::smoothSpline(vector& x, vector& y, int df) { + try { + + double result = 0.0; + int n = x.size(); + vector w(n, 1); + double* xb = new double[n]; + double* yb = new double[n]; + double* wb = new double[n]; + double yssw = 0.0; + for (int i = 0; i < n; i++) { + wb[i] = w[i]; + yb[i] = w[i]*y[i]; + yssw += (w[i] * y[i] * y[i]) - wb[i] * (yb[i] * yb[i]); + xb[i] = (x[i] - x[0]) / (x[n-1] - x[0]); + } + + vector knot = sknot1(xb, n); + int nk = knot.size() - 4; + + double low = -1.5; double high = 1.5; double tol = 1e-04; double eps = 2e-08; int maxit = 500; + int ispar = 0; int icrit = 3; double dofoff = 3.0; + double penalty = 1.0; + int ld4 = 4; int isetup = 0; int ldnk = 1; int ier; double spar = 1.0; double crit; + + double* knotb = new double[knot.size()]; + double* coef1 = new double[nk]; + double* lev1 = new double[n]; + double* sz1 = new double[n]; + for (int i = 0; i < knot.size(); i++) { knotb[i] = knot[i]; } + + Spline spline; + spline.sbart(&penalty, &dofoff, &xb[0], &yb[0], &wb[0], &yssw, &n, &knotb[0], &nk, &coef1[0], &sz1[0], &lev1[0], &crit, + &icrit, &spar, &ispar, &maxit, &low, &high, &tol, &eps, &isetup, &ld4, &ldnk, &ier); + + result = coef1[nk-1]; + + //free memory + delete [] xb; + delete [] yb; + delete [] wb; + delete [] knotb; + delete [] coef1; + delete [] lev1; + delete [] sz1; + + return result; + + }catch(exception& e) { + m->errorOut(e, "MothurMetastats", "smoothSpline"); + exit(1); + } +} +/***********************************************************/ +vector MothurMetastats::sknot1(double* x, int n) { + try { + vector knots; + int nk = nkn(n); + + //R equivalent - rep(x[1L], 3L) + knots.push_back(x[0]); knots.push_back(x[0]); knots.push_back(x[0]); + + //generate a sequence of nk equally spaced values from 1 to n. R equivalent = seq.int(1, n, length.out = nk) + vector indexes = getSequence(0, n-1, nk); + for (int i = 0; i < indexes.size(); i++) { knots.push_back(x[indexes[i]]); } + + //R equivalent - rep(x[n], 3L) + knots.push_back(x[n-1]); knots.push_back(x[n-1]); knots.push_back(x[n-1]); + + return knots; + + }catch(exception& e) { + m->errorOut(e, "MothurMetastats", "sknot1"); + exit(1); + } +} +/***********************************************************/ +vector MothurMetastats::getSequence(int start, int end, int length) { + try { + vector sequence; + double increment = (end-start) / (double) (length-1); + + sequence.push_back(start); + for (int i = 1; i < length-1; i++) { + sequence.push_back(int(i*increment)); + } + sequence.push_back(end); + + return sequence; + + }catch(exception& e) { + m->errorOut(e, "MothurMetastats", "getSequence"); + exit(1); + } +} +/***********************************************************/ +//not right, havent fully figured out the variable types yet... +int MothurMetastats::nkn(int n) { + try { + + if (n < 50) { return n; } + else { + double a1 = log2(50); + double a2 = log2(100); + double a3 = log2(140); + double a4 = log2(200); + + if (n < 200) { + return (int)pow(2.0, (a1 + (a2 - a1) * (n - 50)/(float)150)); + }else if (n < 800) { + return (int)pow(2.0, (a2 + (a3 - a2) * (n - 200)/(float)600)); + }else if (n < 3200) { + return (int)pow(2.0, (a3 + (a4 - a3) * (n - 800)/(float)2400)); + }else { + return (int)pow((double)(200 + (n - 3200)), 0.2); + } + } + + return 0; + + }catch(exception& e) { + m->errorOut(e, "MothurMetastats", "nkn"); + exit(1); + } +} +/***********************************************************/ + + +