X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=blobdiff_plain;f=mothurmetastats.cpp;h=56789736e5b0d8d9bbf273baa4a07c36efb86ec8;hp=ed0aaef8b4a8025baa4619d7199a1b2033131152;hb=df7e3ff9f68ef157b0328a2d353c3258c5d45d89;hpb=19fcbbdba99658f5eca244803280f9ee7f9f6607 diff --git a/mothurmetastats.cpp b/mothurmetastats.cpp index ed0aaef..5678973 100644 --- a/mothurmetastats.cpp +++ b/mothurmetastats.cpp @@ -9,6 +9,7 @@ #include "mothurmetastats.h" #include "mothurfisher.h" +#include "spline.h" /***********************************************************/ MothurMetastats::MothurMetastats(double t, int n) { @@ -23,210 +24,184 @@ MothurMetastats::MothurMetastats(double t, int n) { } } /***********************************************************/ -MothurMetastats::~MothurMetastats() { - try { - - - }catch(exception& e) { - m->errorOut(e, "MothurMetastats", "~MothurMetastats"); - exit(1); - } -} +MothurMetastats::~MothurMetastats() {} /***********************************************************/ -//main metastats function -int MothurMetastats::runMetastats(string outputFileName, vector< vector >& data, int secondGroupingStart) { - try { - int bflag = 0; - row = data.size(); //numBins + //main metastats function +int MothurMetastats::runMetastats(string outputFileName, vector< vector >& data, int secGroupingStart) { + try { + row = data.size(); //numBins column = data[0].size(); //numGroups in subset - int size = row*column; - - //consistent with original, but this should never be true - if ((secondGroupingStart >= column) || (secondGroupingStart <= 0)) { m->mothurOut("[ERROR]: Check your g value."); m->mothurOutEndLine(); return 0; } - - //Initialize the matrices - vector pmatrix; pmatrix.resize(size, 0.0); - vector permuted; permuted.resize(size, 0.0); - vector< vector > storage; storage.resize(row); - for (int i = 0; i < storage.size(); i++) { storage[i].resize(9, 0.0); } - - //Produces the sum of each column - vector total; total.resize(column, 0.0); - vector ratio; ratio.resize(column, 0.0); - double total1 = 0.0; double total2 = 0.0; - - //total[i] = total abundance for group[i] + secondGroupingStart = secGroupingStart; //g + + vector< vector > Pmatrix; Pmatrix.resize(row); + for (int i = 0; i < row; i++) { Pmatrix[i].resize(column, 0.0); } // the relative proportion matrix + vector< vector > C1; C1.resize(row); + for (int i = 0; i < row; i++) { C1[i].resize(3, 0.0); } // statistic profiles for class1 and class 2 + vector< vector > C2; C2.resize(row); // mean[1], variance[2], standard error[3] + for (int i = 0; i < row; i++) { C2[i].resize(3, 0.0); } + vector T_statistics; T_statistics.resize(row, 1); // a place to store the true t-statistics + vector pvalues; pvalues.resize(row, 1); // place to store pvalues + vector qvalues; qvalues.resize(row, 1); // stores qvalues + + //************************************* + // convert to proportions + // generate Pmatrix + //************************************* + vector totals; totals.resize(column, 0); // sum of columns + //total[i] = total abundance for group[i] for (int i = 0; i < column; i++) { for (int j = 0; j < row; j++) { - total[i] += data[j][i]; + totals[i] += data[j][i]; } - } - - //total for first grouping - for (int i = 0; i < (secondGroupingStart-1); i++) { total1 += total[i]; } - - //total for second grouping - for (int i = (secondGroupingStart-1); i < column; i++) { total2 += total[i]; } - - //Creates the ratios by first finding the minimum of totals - double min = total[0]; - for (int i = 0; i < total.size(); i++) { - if (total[i] < min) { min = total[i]; } - } - - //sanity check - if (min <= 0.0) { m->mothurOut("[ERROR]: the sum of one of the columns <= 0."); m->mothurOutEndLine(); return 0; } - - //Ratio time... - for(int i = 0; i < ratio.size(); i++){ ratio[i] = total[i] / min; } - - //Change matrix into an array as received by R for compatibility - kept to be consistent with original - int count = 0; - for(int i = 0; i < column; i++){ - for(int j = 0; j < row; j++){ - pmatrix[count]=data[j][i]; - count++; + } + + for (int i = 0; i < column; i++) { + for (int j = 0; j < row; j++) { + Pmatrix[j][i] = data[j][i]/totals[i]; + } - } - - if(row == 1){ - for (int i =0; i < column; i++){ pmatrix[i] /= ratio[i]; } - }else { - count = 0; int j=-1; - - for (int i=0; i < size; i++) { - if (count % row == 0) { j++; } - pmatrix[i] /= ratio[j]; - count++; - } - } - - vector permuted_ttests; permuted_ttests.resize(row, 0.0); - vector pvalues; pvalues.resize(row, 0.0); - vector tinitial; tinitial.resize(row, 0.0); - - if (m->control_pressed) { return 1; } - - //Find the initial values for the matrix. - start(pmatrix, secondGroupingStart, tinitial, storage); - - if (m->control_pressed) { return 1; } - - // Start the calculations. - if ( (column == 2) || ((secondGroupingStart-1) < 8) || ((column-secondGroupingStart+1) < 8) ){ - - vector fish; fish.resize(row, 0.0); + } + + //#******************************************************************************** + //# ************************** STATISTICAL TESTING ******************************** + //#******************************************************************************** + + if (column == 2){ //# then we have a two sample comparison + //#************************************************************ + //# generate p values fisher's exact test + //#************************************************************ + double total1, total2; + //total for first grouping + for (int i = 0; i < secondGroupingStart; i++) { total1 += totals[i]; } + + //total for second grouping + for (int i = secondGroupingStart; i < column; i++) { total2 += totals[i]; } + + vector fish; fish.resize(row, 0.0); vector fish2; fish2.resize(row, 0.0); - + for(int i = 0; i < row; i++){ - for(int j = 0; j < (secondGroupingStart-1); j++) { fish[i] += data[i][j]; } - for(int j = (secondGroupingStart-1); j < column; j++) { fish2[i] += data[i][j]; } + for(int j = 0; j < secondGroupingStart; j++) { fish[i] += data[i][j]; } + for(int j = secondGroupingStart; j < column; j++) { fish2[i] += data[i][j]; } - //vector tempData; tempData.resize(4, 0.0); double f11, f12, f21, f22; f11 = fish[i]; f12 = fish2[i]; f21 = total1 - fish[i]; f22 = total2 - fish2[i]; - double pre = 0.0; - MothurFisher fisher; - pre = fisher.fexact(f11, f12, f21, f22); - + double pre = fisher.fexact(f11, f12, f21, f22); + if (pre > 0.999999999) { pre = 1.0; } + if (m->control_pressed) { return 1; } - if (pre > 0.999999999) { pre = 1.0; } - storage[i][8] = pre; pvalues[i] = pre; } - - }else { - - testp(permuted_ttests, permuted, pmatrix, secondGroupingStart, tinitial, pvalues); - - if (m->control_pressed) { return 1; } - - // Checks to make sure the matrix isn't sparse. - vector sparse; sparse.resize(row, 0.0); - vector sparse2; sparse2.resize(row, 0.0); - - int c = 0; - + + //#************************************* + //# calculate q values from p values + //#************************************* + qvalues = calc_qvalues(pvalues); + + }else { //we have multiple subjects per population + + //#************************************* + //# generate statistics mean, var, stderr + //#************************************* + for(int i = 0; i < row; i++){ // for each taxa + //# find the mean of each group + double g1Total = 0.0; double g2Total = 0.0; + for (int j = 0; j < secondGroupingStart; j++) { g1Total += Pmatrix[i][j]; } + C1[i][0] = g1Total/(double)(secondGroupingStart); + for (int j = secondGroupingStart; j < column; j++) { g2Total += Pmatrix[i][j]; } + C2[i][0] = g2Total/(double)(column-secondGroupingStart); + + //# find the variance of each group + double g1Var = 0.0; double g2Var = 0.0; + for (int j = 0; j < secondGroupingStart; j++) { g1Var += pow((Pmatrix[i][j]-C1[i][0]), 2); } + C1[i][1] = g1Var/(double)(secondGroupingStart-1); + for (int j = secondGroupingStart; j < column; j++) { g2Var += pow((Pmatrix[i][j]-C2[i][0]), 2); } + C2[i][1] = g2Var/(double)(column-secondGroupingStart-1); + + //# find the std error of each group -std err^2 (will change to std err at end) + C1[i][2] = C1[i][1]/(double)(secondGroupingStart); + C2[i][2] = C2[i][1]/(double)(column-secondGroupingStart); + } + + //#************************************* + //# two sample t-statistics + //#************************************* + for(int i = 0; i < row; i++){ // # for each taxa + double xbar_diff = C1[i][0] - C2[i][0]; + double denom = sqrt(C1[i][2] + C2[i][2]); + T_statistics[i] = xbar_diff/denom; // calculate two sample t-statistic + } + + /*for (int i = 0; i < row; i++) { + for (int j = 0; j < 3; j++) { + cout << "C1[" << i+1 << "," << j+1 << "]=" << C1[i][j] << ";" << endl; + cout << "C2[" << i+1 << "," << j+1 << "]=" << C2[i][j] << ";" << endl; + } + cout << "T_statistics[" << i+1 << "]=" << T_statistics[i] << ";" << endl; + }*/ + //#************************************* + //# generate initial permuted p-values + //#************************************* + pvalues = permuted_pvalues(Pmatrix, T_statistics, data); + + //#************************************* + //# generate p values for sparse data + //# using fisher's exact test + //#************************************* + double total1, total2; + //total for first grouping + for (int i = 0; i < secondGroupingStart; i++) { total1 += totals[i]; } + + //total for second grouping + for (int i = secondGroupingStart; i < column; i++) { total2 += totals[i]; } + + vector fish; fish.resize(row, 0.0); + vector fish2; fish2.resize(row, 0.0); + for(int i = 0; i < row; i++){ - for(int j = 0; j < (secondGroupingStart-1); j++){ sparse[i] += data[i][j]; } - if(sparse[i] < (double)(secondGroupingStart-1)){ c++; } + for(int j = 0; j < secondGroupingStart; j++) { fish[i] += data[i][j]; } + for(int j = secondGroupingStart; j < column; j++) { fish2[i] += data[i][j]; } + + if ((fish[1] < secondGroupingStart) && (fish2[i] < (column-secondGroupingStart))) { + double f11, f12, f21, f22; + f11 = fish[i]; + f12 = fish2[i]; + f21 = total1 - fish[i]; + f22 = total2 - fish2[i]; - // ?<= for col - for(int j = (secondGroupingStart-1); j < column; j++){ sparse2[i] += data[i][j]; } - if( (sparse2[i] < (double)(column-secondGroupingStart+1))) { c++; } + MothurFisher fisher; + double pre = fisher.fexact(f11, f12, f21, f22); + if (pre > 0.999999999) { pre = 1.0; } + + if (m->control_pressed) { return 1; } - if (c == 2) { - c=0; - double f11,f12,f21,f22; - - f11=sparse[i]; sparse[i]=0; - f12=sparse2[i]; sparse2[i]=0; - f21 = total1 - f11; - f22 = total2 - f12; - - double pre = 0.0; - - MothurFisher fisher; - pre = fisher.fexact(f11, f12, f21, f22); - - if (m->control_pressed) { return 1; } - - if (pre > 0.999999999){ - pre = 1.0; - } - - storage[i][8] = pre; - pvalues[i] = pre; - } + pvalues[i] = pre; + } } - - bflag = 1; - } - // Calculates the mean of counts (not normalized) - vector< vector > temp; temp.resize(row); - for (int i = 0; i < temp.size(); i++) { temp[i].resize(2, 0.0); } - - for (int j = 0; j < row; j++){ - if (m->control_pressed) { return 1; } - - for (int i = 1; i <= (secondGroupingStart-1); i++){ temp[j][0] += data[j][i-1]; } - temp[j][0] /= (double)(secondGroupingStart-1); - - for(int i = secondGroupingStart; i <= column; i++){ temp[j][1] += data[j][i-1]; } - temp[j][1] /= (double)(column-secondGroupingStart+1); - } - - for(int i = 0; i < row; i++){ - if (m->control_pressed) { return 1; } - - storage[i][3]=temp[i][0]; - storage[i][7]=temp[i][1]; - storage[i][8]=pvalues[i]; - } - - // BACKUP checks - cout.setf(ios::fixed, ios::floatfield); cout.setf(ios::showpoint); - for (int i = 0; i < row; i++){ - - if (m->control_pressed) { return 1; } - - if(pvalues[i] < threshold){ - m->mothurOut("Feature " + toString((i+1)) + " is significant, p = "); - cout << pvalues[i]; - m->mothurOutJustToLog(toString(pvalues[i])); m->mothurOutEndLine(); - } - } - - // And now we write the files to a text file. + //#************************************* + //# calculate q values from p values + //#************************************* + qvalues = calc_qvalues(pvalues); + + //#************************************* + //# convert stderr^2 to std error + //#************************************* + for(int i = 0; i < row; i++){ + C1[i][2] = sqrt(C1[i][2]); + C2[i][2] = sqrt(C2[i][2]); + } + } + + // And now we write the files to a text file. struct tm *local; time_t t; t = time(NULL); local = localtime(&t); @@ -234,263 +209,396 @@ int MothurMetastats::runMetastats(string outputFileName, vector< vector ofstream out; m->openOutputFile(outputFileName, out); out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint); - + out << "Local time and date of test: " << asctime(local) << endl; out << "# rows = " << row << ", # col = " << column << ", g = " << secondGroupingStart << endl << endl; - if (bflag == 1){ out << numPermutations << " permutations" << endl << endl; } + out << numPermutations << " permutations" << endl << endl; //output column headings - not really sure... documentation labels 9 columns, there are 10 in the output file //storage 0 = meanGroup1 - line 529, 1 = varGroup1 - line 532, 2 = err rate1 - line 534, 3 = mean of counts group1?? - line 291, 4 = meanGroup2 - line 536, 5 = varGroup2 - line 539, 6 = err rate2 - line 541, 7 = mean of counts group2?? - line 292, 8 = pvalues - line 293 - out << "OTU\tmean(group1)\tvariance(group1)\tstderr(group1)\tmean_of_counts(group1)\tmean(group2)\tvariance(group2)\tstderr(group2)\tmean_of_counts(group1)\tp-value\n"; + out << "OTU\tmean(group1)\tvariance(group1)\tstderr(group1)\tmean(group2)\tvariance(group2)\tstderr(group2)\tp-value\tq-value\n"; for(int i = 0; i < row; i++){ if (m->control_pressed) { out.close(); return 0; } - out << (i+1); - for(int j = 0; j < 9; j++){ out << '\t' << storage[i][j]; } - out << endl; + //if there are binlabels use them otherwise count. + if (m->binLabelsInFile.size() == row) { out << m->binLabelsInFile[i] << '\t'; } + else { out << (i+1) << '\t'; } + + out << C1[i][0] << '\t' << C1[i][1] << '\t' << C1[i][2] << '\t' << C2[i][0] << '\t' << C2[i][1] << '\t' << C2[i][2] << '\t' << pvalues[i] << '\t' << qvalues[i] << endl; } out << endl << endl; out.close(); - return 0; - - }catch(exception& e) { - m->errorOut(e, "MothurMetastats", "runMetastats"); - exit(1); - } + + + return 0; + + }catch(exception& e) { + m->errorOut(e, "MothurMetastats", "runMetastats"); + exit(1); + } +} +/***********************************************************/ +vector MothurMetastats::permuted_pvalues(vector< vector >& Imatrix, vector& tstats, vector< vector >& Fmatrix) { + try { + //# matrix stores tstats for each taxa(row) for each permuted trial(column) + vector ps; ps.resize(row, 0.0); //# to store the pvalues + vector< vector > permuted_ttests; permuted_ttests.resize(numPermutations); + for (int i = 0; i < numPermutations; i++) { permuted_ttests[i].resize(row, 0.0); } + + //# calculate null version of tstats using B permutations. + for (int i = 0; i < numPermutations; i++) { + permuted_ttests[i] = permute_and_calc_ts(Imatrix); + } + + //# calculate each pvalue using the null ts + if ((secondGroupingStart) < 8 || (column-secondGroupingStart) < 8){ + vector< vector > cleanedpermuted_ttests; cleanedpermuted_ttests.resize(numPermutations); //# the array pooling just the frequently observed ts + //# then pool the t's together! + //# count how many high freq taxa there are + int hfc = 1; + for (int i = 0; i < row; i++) { // # for each taxa + double group1Total = 0.0; double group2Total = 0.0; + for(int j = 0; j < secondGroupingStart; j++) { group1Total += Fmatrix[i][j]; } + for(int j = secondGroupingStart; j < column; j++) { group2Total += Fmatrix[i][j]; } + + if (group1Total >= secondGroupingStart || group2Total >= (column-secondGroupingStart)){ + hfc++; + for (int j = 0; j < numPermutations; j++) { cleanedpermuted_ttests[j].push_back(permuted_ttests[j][i]); } + } + } + + //#now for each taxa + for (int i = 0; i < row; i++) { + //number of cleanedpermuted_ttests greater than tstat[i] + int numGreater = 0; + for (int j = 0; j < numPermutations; j++) { + for (int k = 0; k < hfc; k++) { + if (cleanedpermuted_ttests[j][k] > abs(tstats[i])) { numGreater++; } + } + } + + ps[i] = (1/(double)(numPermutations*hfc))*numGreater; + } + }else{ + for (int i = 0; i < row; i++) { + //number of permuted_ttests[i] greater than tstat[i] //(sum(permuted_ttests[i,] > abs(tstats[i]))+1) + int numGreater = 1; + for (int j = 0; j < numPermutations; j++) { if (permuted_ttests[j][i] > abs(tstats[i])) { numGreater++; } } + ps[i] = (1/(double)(numPermutations+1))*numGreater; + } + } + + return ps; + + }catch(exception& e) { + m->errorOut(e, "MothurMetastats", "permuted_pvalues"); + exit(1); + } +} +/***********************************************************/ +vector MothurMetastats::permute_and_calc_ts(vector< vector >& Imatrix) { + try { + vector< vector > permutedMatrix = Imatrix; + + //randomize columns, ie group abundances. + for (int i = 0; i < permutedMatrix.size(); i++) { random_shuffle(permutedMatrix[i].begin(), permutedMatrix[i].end()); } + + //calc ts + vector< vector > C1; C1.resize(row); + for (int i = 0; i < row; i++) { C1[i].resize(3, 0.0); } // statistic profiles for class1 and class 2 + vector< vector > C2; C2.resize(row); // mean[1], variance[2], standard error[3] + for (int i = 0; i < row; i++) { C2[i].resize(3, 0.0); } + vector Ts; Ts.resize(row, 0.0); // a place to store the true t-statistics + + //#************************************* + //# generate statistics mean, var, stderr + //#************************************* + for(int i = 0; i < row; i++){ // for each taxa + //# find the mean of each group + double g1Total = 0.0; double g2Total = 0.0; + for (int j = 0; j < secondGroupingStart; j++) { g1Total += permutedMatrix[i][j]; } + C1[i][0] = g1Total/(double)(secondGroupingStart); + for (int j = secondGroupingStart; j < column; j++) { g2Total += permutedMatrix[i][j]; } + C2[i][0] = g2Total/(double)(column-secondGroupingStart); + + //# find the variance of each group + double g1Var = 0.0; double g2Var = 0.0; + for (int j = 0; j < secondGroupingStart; j++) { g1Var += pow((permutedMatrix[i][j]-C1[i][0]), 2); } + C1[i][1] = g1Var/(double)(secondGroupingStart-1); + for (int j = secondGroupingStart; j < column; j++) { g2Var += pow((permutedMatrix[i][j]-C2[i][0]), 2); } + C2[i][1] = g2Var/(double)(column-secondGroupingStart-1); + + //# find the std error of each group -std err^2 (will change to std err at end) + C1[i][2] = C1[i][1]/(double)(secondGroupingStart); + C2[i][2] = C2[i][1]/(double)(column-secondGroupingStart); + } + + //#************************************* + //# two sample t-statistics + //#************************************* + for(int i = 0; i < row; i++){ // # for each taxa + double xbar_diff = C1[i][0] - C2[i][0]; + double denom = sqrt(C1[i][2] + C2[i][2]); + Ts[i] = abs(xbar_diff/denom); // calculate two sample t-statistic + } + + return Ts; + + + }catch(exception& e) { + m->errorOut(e, "MothurMetastats", "permuted_ttests"); + exit(1); + } } /***********************************************************/ -//Find the initial values for the matrix -int MothurMetastats::start(vector& Imatrix, int secondGroupingStart, vector& initial, vector< vector >& storage) { +vector MothurMetastats::calc_qvalues(vector& pValues) { try { - int a = row; a*=4; + /* cout << "x <- c(" << pValues[0]; + for (int l = 1; l < pValues.size(); l++){ + cout << ", " << pValues[l]; + } + cout << ")\n";*/ + + int numRows = pValues.size(); + vector qvalues(numRows, 0.0); + + //fill lambdas with 0.00, 0.01, 0.02... 0.95 + vector lambdas(96, 0); + for (int i = 1; i < lambdas.size(); i++) { lambdas[i] = lambdas[i-1] + 0.01; } - double xbardiff = 0.0; double denom = 0.0; - vector store; store.resize(a, 0.0); - vector tool; tool.resize(a, 0.0); - vector< vector > C1; C1.resize(row); - for (int i = 0; i < C1.size(); i++) { C1[i].resize(3, 0.0); } - vector< vector > C2; C2.resize(row); - for (int i = 0; i < C2.size(); i++) { C2[i].resize(3, 0.0); } + vector pi0_hat(lambdas.size(), 0); - meanvar(Imatrix, secondGroupingStart, store); + //calculate pi0_hat + for (int l = 0; l < lambdas.size(); l++){ // for each lambda value + int count = 0; + for (int i = 0; i < numRows; i++){ // for each p-value in order + if (pValues[i] > lambdas[l]){ count++; } + } + pi0_hat[l] = count/(double)(numRows*(1-lambdas[l])); + } - if (m->control_pressed) { return 0; } + double pi0 = smoothSpline(lambdas, pi0_hat, 3); - //copy store into tool - tool = store; + //order p-values + vector ordered_qs = qvalues; + vector ordered_ps(pValues.size(), 0); + for (int i = 1; i < ordered_ps.size(); i++) { ordered_ps[i] = ordered_ps[i-1] + 1; } + vector tempPvalues = pValues; + OrderPValues(0, numRows-1, tempPvalues, ordered_ps); - for (int i = 0; i < row; i++){ - C1[i][0]=tool[i]; //mean group 1 - storage[i][0]=C1[i][0]; - C1[i][1]=tool[i+row+row]; // var group 1 - storage[i][1]=C1[i][1]; - C1[i][2]=C1[i][1]/(secondGroupingStart-1); - storage[i][2]=sqrt(C1[i][2]); - - C2[i][0]=tool[i+row]; // mean group 2 - storage[i][4]=C2[i][0]; - C2[i][1]=tool[i+row+row+row]; // var group 2 - storage[i][5]=C2[i][1]; - C2[i][2]=C2[i][1]/(column-secondGroupingStart+1); - storage[i][6]=sqrt(C2[i][2]); + ordered_qs[numRows-1] = min((pValues[ordered_ps[numRows-1]]*pi0), 1.0); + for (int i = (numRows-2); i >= 0; i--){ + double p = pValues[ordered_ps[i]]; + double temp = p*numRows*pi0/(double)(i+1); + + ordered_qs[i] = min(temp, ordered_qs[i+1]); } - if (m->control_pressed) { return 0; } + //re-distribute calculated qvalues to appropriate rows + for (int i = 0; i < numRows; i++){ + qvalues[ordered_ps[i]] = ordered_qs[i]; + } - for (int i = 0; i < row; i++){ - xbardiff = C1[i][0]-C2[i][0]; - denom = sqrt(C1[i][2]+C2[i][2]); - initial[i]=fabs(xbardiff/denom); - } - - return 0; + return qvalues; }catch(exception& e) { - m->errorOut(e, "MothurMetastats", "start"); + m->errorOut(e, "MothurMetastats", "calc_qvalues"); exit(1); - } + } } /***********************************************************/ -int MothurMetastats::meanvar(vector& pmatrix, int secondGroupingStart, vector& store) { +int MothurMetastats::OrderPValues(int low, int high, vector& p, vector& order) { try { - vector temp; temp.resize(row, 0.0); - vector temp2; temp2.resize(row, 0.0); - vector var; var.resize(row, 0.0); - vector var2; var2.resize(row, 0.0); - - double a = secondGroupingStart-1; - double b = column - a; - int m = a * row; - int n = row * column; - - for (int i = 0; i < m; i++) { temp[i%row] += pmatrix[i]; } - for (int i = 0; i < n; i++) { temp2[i%row]+= pmatrix[i]; } - for (int i = 0; i < row; i++) { temp2[i] -= temp[i]; } - for (int i = 0; i <= row-1;i++) { - store[i] = temp[i]/a; - store[i+row]=temp2[i]/b; - } - - //That completes the mean calculations. - - for (int i = 0; i < m; i++) { var[i%row] += pow((pmatrix[i]-store[i%row]),2); } - for (int i = m; i < n; i++) { var2[i%row]+= pow((pmatrix[i]-store[(i%row)+row]),2); } - for (int i = 0; i <= row-1; i++){ - store[i+2*row]=var[i]/(a-1); - store[i+3*row]=var2[i]/(b-1); - } - // That completes var calculations. + if (low < high) { + int i = low+1; + int j = high; + int pivot = (low+high) / 2; + + swapElements(low, pivot, p, order); //puts pivot in final spot + + /* compare value */ + double key = p[low]; + + /* partition */ + while(i <= j) { + /* find member above ... */ + while((i <= high) && (p[i] <= key)) { i++; } + + /* find element below ... */ + while((j >= low) && (p[j] > key)) { j--; } + + if(i < j) { + swapElements(i, j, p, order); + } + } + + swapElements(low, j, p, order); + + /* recurse */ + OrderPValues(low, j-1, p, order); + OrderPValues(j+1, high, p, order); + } return 0; }catch(exception& e) { - m->errorOut(e, "MothurMetastats", "meanvar"); + m->errorOut(e, "MothurMetastats", "OrderPValues"); exit(1); - } + } } /***********************************************************/ -int MothurMetastats::testp(vector& permuted_ttests, vector& permuted, vector& Imatrix, int secondGroupingStart, vector& Tinitial, vector& ps) { +int MothurMetastats::swapElements(int i, int j, vector& p, vector& order) { try { - vector Tvalues; Tvalues.resize(row, 0.0); - vector counter; counter.resize(row, 0.0); - int a, b, n; + double z = p[i]; + p[i] = p[j]; + p[j] = z; - a = numPermutations; - b = row; - n = a*b; - - for (int j = 1; j <= row; j++) { - if (m->control_pressed) { return 0; } - permute_matrix(Imatrix, permuted, secondGroupingStart, Tvalues, Tinitial, counter); - } - - for(int j = 0; j < row; j++) { - if (m->control_pressed) { return 0; } - ps[j] = ((counter[j]+1)/(double)(a+1)); - } + int temp = order[i]; + order[i] = order[j]; + order[j] = temp; return 0; }catch(exception& e) { - m->errorOut(e, "MothurMetastats", "testp"); + m->errorOut(e, "MothurMetastats", "swapElements"); exit(1); - } -} + } +} /***********************************************************/ -int MothurMetastats::permute_matrix(vector& Imatrix, vector& permuted, int secondGroupingStart, vector& trial_ts, vector& Tinitial, vector& counter1){ +double MothurMetastats::smoothSpline(vector& x, vector& y, int df) { try { - - vector y; y.resize(column, 0); - for (int i = 1; i <= column; i++){ y[i-1] = i; } - - permute_array(y); - - int f = 0; int c = 0; int k = 0; - for (int i = 0; i < column; i++){ - - if (m->control_pressed) { return 0; } - - f = y[i]; //column number - c = 1; - c *= (f-1); - c *= row; - if (f == 1){ c = 0; } // starting value position in the Imatrix - - for(int j = 1; j <= row; j++){ - permuted[k] = Imatrix[c]; - c++; k++; - } + + double result = 0.0; + int n = x.size(); + vector w(n, 1); + double* xb = new double[n]; + double* yb = new double[n]; + double* wb = new double[n]; + double yssw = 0.0; + for (int i = 0; i < n; i++) { + wb[i] = w[i]; + yb[i] = w[i]*y[i]; + yssw += (w[i] * y[i] * y[i]) - wb[i] * (yb[i] * yb[i]); + xb[i] = (x[i] - x[0]) / (x[n-1] - x[0]); } - calc_twosample_ts(permuted, secondGroupingStart, trial_ts, Tinitial, counter1); - - return 0; + vector knot = sknot1(xb, n); + int nk = knot.size() - 4; + + double low = -1.5; double high = 1.5; double tol = 1e-04; double eps = 2e-08; int maxit = 500; + int ispar = 0; int icrit = 3; double dofoff = 3.0; + double penalty = 1.0; + int ld4 = 4; int isetup = 0; int ldnk = 1; int ier; double spar = 1.0; double crit; + + double* knotb = new double[knot.size()]; + double* coef1 = new double[nk]; + double* lev1 = new double[n]; + double* sz1 = new double[n]; + for (int i = 0; i < knot.size(); i++) { knotb[i] = knot[i]; } + + Spline spline; + spline.sbart(&penalty, &dofoff, &xb[0], &yb[0], &wb[0], &yssw, &n, &knotb[0], &nk, &coef1[0], &sz1[0], &lev1[0], &crit, + &icrit, &spar, &ispar, &maxit, &low, &high, &tol, &eps, &isetup, &ld4, &ldnk, &ier); + + result = coef1[nk-1]; + + //free memory + delete [] xb; + delete [] yb; + delete [] wb; + delete [] knotb; + delete [] coef1; + delete [] lev1; + delete [] sz1; + + return result; }catch(exception& e) { - m->errorOut(e, "MothurMetastats", "permute_matrix"); + m->errorOut(e, "MothurMetastats", "smoothSpline"); exit(1); - } + } } /***********************************************************/ -int MothurMetastats::permute_array(vector& array) { +vector MothurMetastats::sknot1(double* x, int n) { try { - static int seeded = 0; + vector knots; + int nk = nkn(n); - if (! seeded) { - seeded = 1; - srand(time(NULL)); - } + //R equivalent - rep(x[1L], 3L) + knots.push_back(x[0]); knots.push_back(x[0]); knots.push_back(x[0]); - for (int i = 0; i < array.size(); i++) { - if (m->control_pressed) { return 0; } - - int selection = rand() % (array.size() - i); - int tmp = array[i + selection]; - array[i + selection] = array[i]; - array[i] = tmp; - } + //generate a sequence of nk equally spaced values from 1 to n. R equivalent = seq.int(1, n, length.out = nk) + vector indexes = getSequence(0, n-1, nk); + for (int i = 0; i < indexes.size(); i++) { knots.push_back(x[indexes[i]]); } - return 0; + //R equivalent - rep(x[n], 3L) + knots.push_back(x[n-1]); knots.push_back(x[n-1]); knots.push_back(x[n-1]); + + return knots; }catch(exception& e) { - m->errorOut(e, "MothurMetastats", "permute_array"); + m->errorOut(e, "MothurMetastats", "sknot1"); exit(1); - } + } } /***********************************************************/ -int MothurMetastats::calc_twosample_ts(vector& Pmatrix, int secondGroupingStart, vector& Ts, vector& Tinitial, vector& counter) { +vector MothurMetastats::getSequence(int start, int end, int length) { try { - int a = row * 4; + vector sequence; + double increment = (end-start) / (double) (length-1); - vector< vector > C1; C1.resize(row); - for (int i = 0; i < C1.size(); i++) { C1[i].resize(3, 0.0); } - vector< vector > C2; C2.resize(row); - for (int i = 0; i < C2.size(); i++) { C2[i].resize(3, 0.0); } - vector storage; storage.resize(row, 0.0); - vector tool; tool.resize(row, 0.0); - double xbardiff = 0.0; double denom = 0.0; + sequence.push_back(start); + for (int i = 1; i < length-1; i++) { + sequence.push_back(int(i*increment)); + } + sequence.push_back(end); - meanvar(Pmatrix, secondGroupingStart, storage); + return sequence; - for(int i = 0;i <= (a-1); i++) { - if (m->control_pressed) { return 0; } - tool[i] = storage[i]; - } + }catch(exception& e) { + m->errorOut(e, "MothurMetastats", "getSequence"); + exit(1); + } +} +/***********************************************************/ +//not right, havent fully figured out the variable types yet... +int MothurMetastats::nkn(int n) { + try { - for (int i = 0; i < row; i++){ - if (m->control_pressed) { return 0; } - C1[i][0]=tool[i]; - C1[i][1]=tool[i+row+row]; - C1[i][2]=C1[i][1]/(secondGroupingStart-1); + if (n < 50) { return n; } + else { + double a1 = log2(50); + double a2 = log2(100); + double a3 = log2(140); + double a4 = log2(200); - C2[i][0]=tool[i+row]; - C2[i][1]=tool[i+row+row+row]; // var group 2 - C2[i][2]=C2[i][1]/(column-secondGroupingStart+1); - } - - for (int i = 0; i < row; i++){ - if (m->control_pressed) { return 0; } - xbardiff = C1[i][0]-C2[i][0]; - denom = sqrt(C1[i][2]+C2[i][2]); - Ts[i]=fabs(xbardiff/denom); - if (fabs(Ts[i])>(fabs(Tinitial[i])+.0000000000001)){ //13th place - counter[i]++; + if (n < 200) { + return (int)pow(2.0, (a1 + (a2 - a1) * (n - 50)/(float)150)); + }else if (n < 800) { + return (int)pow(2.0, (a2 + (a3 - a2) * (n - 200)/(float)600)); + }else if (n < 3200) { + return (int)pow(2.0, (a3 + (a4 - a3) * (n - 800)/(float)2400)); + }else { + return (int)pow((double)(200 + (n - 3200)), 0.2); } } - + return 0; }catch(exception& e) { - m->errorOut(e, "MothurMetastats", "calc_twosample_ts"); + m->errorOut(e, "MothurMetastats", "nkn"); exit(1); } } /***********************************************************/ + + +