#include "mothurmetastats.h"
#include "mothurfisher.h"
+#include "spline.h"
/***********************************************************/
MothurMetastats::MothurMetastats(double t, int n) {
/***********************************************************/
MothurMetastats::~MothurMetastats() {}
/***********************************************************/
-//main metastats function
-int MothurMetastats::runMetastats(string outputFileName, vector< vector<double> >& data, int secondGroupingStart) {
- try {
- int bflag = 0;
- row = data.size(); //numBins
+ //main metastats function
+int MothurMetastats::runMetastats(string outputFileName, vector< vector<double> >& data, int secGroupingStart) {
+ try {
+ row = data.size(); //numBins
column = data[0].size(); //numGroups in subset
- int size = row*column;
-
- //consistent with original, but this should never be true
- if ((secondGroupingStart >= column) || (secondGroupingStart <= 0)) { m->mothurOut("[ERROR]: Check your g value."); m->mothurOutEndLine(); return 0; }
-
- //Initialize the matrices
- vector<double> pmatrix; pmatrix.resize(size, 0.0);
- vector<double> permuted; permuted.resize(size, 0.0);
- vector< vector<double> > storage; storage.resize(row);
- for (int i = 0; i < storage.size(); i++) { storage[i].resize(9, 0.0); }
-
- //Produces the sum of each column
- vector<double> total; total.resize(column, 0.0);
- vector<double> ratio; ratio.resize(column, 0.0);
- double total1 = 0.0; double total2 = 0.0;
-
- //total[i] = total abundance for group[i]
+ secondGroupingStart = secGroupingStart; //g
+
+ vector< vector<double> > Pmatrix; Pmatrix.resize(row);
+ for (int i = 0; i < row; i++) { Pmatrix[i].resize(column, 0.0); } // the relative proportion matrix
+ vector< vector<double> > C1; C1.resize(row);
+ for (int i = 0; i < row; i++) { C1[i].resize(3, 0.0); } // statistic profiles for class1 and class 2
+ vector< vector<double> > C2; C2.resize(row); // mean[1], variance[2], standard error[3]
+ for (int i = 0; i < row; i++) { C2[i].resize(3, 0.0); }
+ vector<double> T_statistics; T_statistics.resize(row, 1); // a place to store the true t-statistics
+ vector<double> pvalues; pvalues.resize(row, 1); // place to store pvalues
+ vector<double> qvalues; qvalues.resize(row, 1); // stores qvalues
+
+ //*************************************
+ // convert to proportions
+ // generate Pmatrix
+ //*************************************
+ vector<double> totals; totals.resize(column, 0); // sum of columns
+ //total[i] = total abundance for group[i]
for (int i = 0; i < column; i++) {
for (int j = 0; j < row; j++) {
- total[i] += data[j][i];
+ totals[i] += data[j][i];
}
- }
-
- //total for first grouping
- for (int i = 0; i < secondGroupingStart; i++) { total1 += total[i]; }
-
- //total for second grouping
- for (int i = secondGroupingStart; i < column; i++) { total2 += total[i]; }
-
- //Creates the ratios by first finding the minimum of totals
- double min = total[0];
- for (int i = 0; i < total.size(); i++) {
- if (total[i] < min) { min = total[i]; }
- }
-
- //sanity check
- if (min <= 0.0) { m->mothurOut("[ERROR]: the sum of one of the columns <= 0."); m->mothurOutEndLine(); return 0; }
-
- //Ratio time...
- for(int i = 0; i < ratio.size(); i++){ ratio[i] = total[i] / min; }
-
- //Change matrix into an array as received by R for compatibility - kept to be consistent with original
- int count = 0;
- for(int i = 0; i < column; i++){
- for(int j = 0; j < row; j++){
- pmatrix[count]=data[j][i];
- count++;
+ }
+
+ for (int i = 0; i < column; i++) {
+ for (int j = 0; j < row; j++) {
+ Pmatrix[j][i] = data[j][i]/totals[i];
+
}
- }
-
- if(row == 1){
- for (int i =0; i < column; i++){ pmatrix[i] /= ratio[i]; }
- }else {
- count = 0; int j=-1;
-
- for (int i=0; i < size; i++) {
- if (count % row == 0) { j++; }
- pmatrix[i] /= ratio[j];
- count++;
- }
- }
-
- vector<double> permuted_ttests; permuted_ttests.resize(row, 0.0);
- vector<double> pvalues; pvalues.resize(row, 0.0);
- vector<double> tinitial; tinitial.resize(row, 0.0);
-
- if (m->control_pressed) { return 1; }
-
- //Find the initial values for the matrix.
- start(pmatrix, secondGroupingStart, tinitial, storage);
-
- if (m->control_pressed) { return 1; }
-
- // Start the calculations.
- if ( (column == 2) || (secondGroupingStart < 8) || ((column-secondGroupingStart) < 8) ){
-
- vector<double> fish; fish.resize(row, 0.0);
+ }
+
+ //#********************************************************************************
+ //# ************************** STATISTICAL TESTING ********************************
+ //#********************************************************************************
+
+ if (column == 2){ //# then we have a two sample comparison
+ //#************************************************************
+ //# generate p values fisher's exact test
+ //#************************************************************
+ double total1, total2;
+ //total for first grouping
+ for (int i = 0; i < secondGroupingStart; i++) { total1 += totals[i]; }
+
+ //total for second grouping
+ for (int i = secondGroupingStart; i < column; i++) { total2 += totals[i]; }
+
+ vector<double> fish; fish.resize(row, 0.0);
vector<double> fish2; fish2.resize(row, 0.0);
-
+
for(int i = 0; i < row; i++){
for(int j = 0; j < secondGroupingStart; j++) { fish[i] += data[i][j]; }
for(int j = secondGroupingStart; j < column; j++) { fish2[i] += data[i][j]; }
- //vector<double> tempData; tempData.resize(4, 0.0);
double f11, f12, f21, f22;
f11 = fish[i];
f12 = fish2[i];
f21 = total1 - fish[i];
f22 = total2 - fish2[i];
- double pre = 0.0;
-
MothurFisher fisher;
- pre = fisher.fexact(f11, f12, f21, f22);
-
+ double pre = fisher.fexact(f11, f12, f21, f22);
+ if (pre > 0.999999999) { pre = 1.0; }
+
if (m->control_pressed) { return 1; }
- if (pre > 0.999999999) { pre = 1.0; }
- storage[i][8] = pre;
pvalues[i] = pre;
}
-
- }else {
-
- testp(permuted_ttests, permuted, pmatrix, secondGroupingStart, tinitial, pvalues);
-
- if (m->control_pressed) { return 1; }
-
- // Checks to make sure the matrix isn't sparse.
- vector<double> sparse; sparse.resize(row, 0.0);
- vector<double> sparse2; sparse2.resize(row, 0.0);
-
- int c = 0;
-
+
+ //#*************************************
+ //# calculate q values from p values
+ //#*************************************
+ qvalues = calc_qvalues(pvalues);
+
+ }else { //we have multiple subjects per population
+
+ //#*************************************
+ //# generate statistics mean, var, stderr
+ //#*************************************
+ for(int i = 0; i < row; i++){ // for each taxa
+ //# find the mean of each group
+ double g1Total = 0.0; double g2Total = 0.0;
+ for (int j = 0; j < secondGroupingStart; j++) { g1Total += Pmatrix[i][j]; }
+ C1[i][0] = g1Total/(double)(secondGroupingStart);
+ for (int j = secondGroupingStart; j < column; j++) { g2Total += Pmatrix[i][j]; }
+ C2[i][0] = g2Total/(double)(column-secondGroupingStart);
+
+ //# find the variance of each group
+ double g1Var = 0.0; double g2Var = 0.0;
+ for (int j = 0; j < secondGroupingStart; j++) { g1Var += pow((Pmatrix[i][j]-C1[i][0]), 2); }
+ C1[i][1] = g1Var/(double)(secondGroupingStart-1);
+ for (int j = secondGroupingStart; j < column; j++) { g2Var += pow((Pmatrix[i][j]-C2[i][0]), 2); }
+ C2[i][1] = g2Var/(double)(column-secondGroupingStart-1);
+
+ //# find the std error of each group -std err^2 (will change to std err at end)
+ C1[i][2] = C1[i][1]/(double)(secondGroupingStart);
+ C2[i][2] = C2[i][1]/(double)(column-secondGroupingStart);
+ }
+
+ //#*************************************
+ //# two sample t-statistics
+ //#*************************************
+ for(int i = 0; i < row; i++){ // # for each taxa
+ double xbar_diff = C1[i][0] - C2[i][0];
+ double denom = sqrt(C1[i][2] + C2[i][2]);
+ T_statistics[i] = xbar_diff/denom; // calculate two sample t-statistic
+ }
+
+ /*for (int i = 0; i < row; i++) {
+ for (int j = 0; j < 3; j++) {
+ cout << "C1[" << i+1 << "," << j+1 << "]=" << C1[i][j] << ";" << endl;
+ cout << "C2[" << i+1 << "," << j+1 << "]=" << C2[i][j] << ";" << endl;
+ }
+ cout << "T_statistics[" << i+1 << "]=" << T_statistics[i] << ";" << endl;
+ }*/
+ //#*************************************
+ //# generate initial permuted p-values
+ //#*************************************
+ pvalues = permuted_pvalues(Pmatrix, T_statistics, data);
+
+ //#*************************************
+ //# generate p values for sparse data
+ //# using fisher's exact test
+ //#*************************************
+ double total1, total2;
+ //total for first grouping
+ for (int i = 0; i < secondGroupingStart; i++) { total1 += totals[i]; }
+
+ //total for second grouping
+ for (int i = secondGroupingStart; i < column; i++) { total2 += totals[i]; }
+
+ vector<double> fish; fish.resize(row, 0.0);
+ vector<double> fish2; fish2.resize(row, 0.0);
+
for(int i = 0; i < row; i++){
- for(int j = 0; j < secondGroupingStart; j++) { sparse[i] += data[i][j]; }
- if(sparse[i] < (double)secondGroupingStart) { c++; }
+ for(int j = 0; j < secondGroupingStart; j++) { fish[i] += data[i][j]; }
+ for(int j = secondGroupingStart; j < column; j++) { fish2[i] += data[i][j]; }
+
+ if ((fish[1] < secondGroupingStart) && (fish2[i] < (column-secondGroupingStart))) {
+ double f11, f12, f21, f22;
+ f11 = fish[i];
+ f12 = fish2[i];
+ f21 = total1 - fish[i];
+ f22 = total2 - fish2[i];
- // ?<= for col
- for(int j = secondGroupingStart; j < column; j++) { sparse2[i] += data[i][j]; }
- if( (sparse2[i] < (double)(column-secondGroupingStart))) { c++; }
+ MothurFisher fisher;
+ double pre = fisher.fexact(f11, f12, f21, f22);
+ if (pre > 0.999999999) { pre = 1.0; }
+
+ if (m->control_pressed) { return 1; }
- if (c == 2) {
- c=0;
- double f11,f12,f21,f22;
-
- f11=sparse[i]; sparse[i]=0;
- f12=sparse2[i]; sparse2[i]=0;
- f21 = total1 - f11;
- f22 = total2 - f12;
-
- double pre = 0.0;
-
- MothurFisher fisher;
- pre = fisher.fexact(f11, f12, f21, f22);
-
- if (m->control_pressed) { return 1; }
-
- if (pre > 0.999999999){
- pre = 1.0;
- }
-
- storage[i][8] = pre;
- pvalues[i] = pre;
- }
+ pvalues[i] = pre;
+ }
}
-
- bflag = 1;
- }
- // Calculates the mean of counts (not normalized)
- vector< vector<double> > temp; temp.resize(row);
- for (int i = 0; i < temp.size(); i++) { temp[i].resize(2, 0.0); }
-
- for (int j = 0; j < row; j++){
- if (m->control_pressed) { return 1; }
-
- for (int i = 0; i < secondGroupingStart; i++){ temp[j][0] += data[j][i]; }
- temp[j][0] /= (double)secondGroupingStart;
-
- for(int i = secondGroupingStart; i < column; i++){ temp[j][1] += data[j][i]; }
- temp[j][1] /= (double)(column-secondGroupingStart);
- }
-
- for(int i = 0; i < row; i++){
- if (m->control_pressed) { return 1; }
-
- storage[i][3]=temp[i][0];
- storage[i][7]=temp[i][1];
- storage[i][8]=pvalues[i];
- }
- cout.setf(ios::fixed, ios::floatfield); cout.setf(ios::showpoint);
- cout << "pvalues" << endl;
- for (int i = 0; i < row; i++){ if (pvalues[i] < 0.0000001) {
- pvalues[i] = 0.0;
- }cout << pvalues[i] << '\t'; }
- cout << endl;
- calc_qvalues(pvalues);
-
- // BACKUP checks
- cout.setf(ios::fixed, ios::floatfield); cout.setf(ios::showpoint);
- for (int i = 0; i < row; i++){
-
- if (m->control_pressed) { return 1; }
-
- if(pvalues[i] < threshold){
- m->mothurOut("Feature " + toString((i+1)) + " is significant, p = ");
- cout << pvalues[i];
- m->mothurOutJustToLog(toString(pvalues[i])); m->mothurOutEndLine();
- }
- }
-
- // And now we write the files to a text file.
+ //#*************************************
+ //# calculate q values from p values
+ //#*************************************
+ qvalues = calc_qvalues(pvalues);
+
+ //#*************************************
+ //# convert stderr^2 to std error
+ //#*************************************
+ for(int i = 0; i < row; i++){
+ C1[i][2] = sqrt(C1[i][2]);
+ C2[i][2] = sqrt(C2[i][2]);
+ }
+ }
+
+ // And now we write the files to a text file.
struct tm *local;
time_t t; t = time(NULL);
local = localtime(&t);
ofstream out;
m->openOutputFile(outputFileName, out);
out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
-
+
out << "Local time and date of test: " << asctime(local) << endl;
out << "# rows = " << row << ", # col = " << column << ", g = " << secondGroupingStart << endl << endl;
- if (bflag == 1){ out << numPermutations << " permutations" << endl << endl; }
+ out << numPermutations << " permutations" << endl << endl;
//output column headings - not really sure... documentation labels 9 columns, there are 10 in the output file
//storage 0 = meanGroup1 - line 529, 1 = varGroup1 - line 532, 2 = err rate1 - line 534, 3 = mean of counts group1?? - line 291, 4 = meanGroup2 - line 536, 5 = varGroup2 - line 539, 6 = err rate2 - line 541, 7 = mean of counts group2?? - line 292, 8 = pvalues - line 293
- out << "OTU\tmean(group1)\tvariance(group1)\tstderr(group1)\tmean_of_counts(group1)\tmean(group2)\tvariance(group2)\tstderr(group2)\tmean_of_counts(group1)\tp-value\n";
+ out << "OTU\tmean(group1)\tvariance(group1)\tstderr(group1)\tmean(group2)\tvariance(group2)\tstderr(group2)\tp-value\tq-value\n";
for(int i = 0; i < row; i++){
if (m->control_pressed) { out.close(); return 0; }
- out << (i+1);
- for(int j = 0; j < 9; j++){ out << '\t' << storage[i][j]; }
- out << endl;
+ //if there are binlabels use them otherwise count.
+ if (m->binLabelsInFile.size() == row) { out << m->binLabelsInFile[i] << '\t'; }
+ else { out << (i+1) << '\t'; }
+
+ out << C1[i][0] << '\t' << C1[i][1] << '\t' << C1[i][2] << '\t' << C2[i][0] << '\t' << C2[i][1] << '\t' << C2[i][2] << '\t' << pvalues[i] << '\t' << qvalues[i] << endl;
}
out << endl << endl;
out.close();
-
- return 0;
-
- }catch(exception& e) {
- m->errorOut(e, "MothurMetastats", "runMetastats");
- exit(1);
- }
-}
-/***********************************************************/
-//Find the initial values for the matrix
-int MothurMetastats::start(vector<double>& Imatrix, int secondGroupingStart, vector<double>& initial, vector< vector<double> >& storage) {
- try {
-
- int a = row; a*=4;
-
- double xbardiff = 0.0; double denom = 0.0;
- vector<double> store; store.resize(a, 0.0);
- vector<double> tool; tool.resize(a, 0.0);
- vector< vector<double> > C1; C1.resize(row);
- for (int i = 0; i < C1.size(); i++) { C1[i].resize(3, 0.0); }
- vector< vector<double> > C2; C2.resize(row);
- for (int i = 0; i < C2.size(); i++) { C2[i].resize(3, 0.0); }
-
- meanvar(Imatrix, secondGroupingStart, store);
-
- if (m->control_pressed) { return 0; }
-
- //copy store into tool
- tool = store;
-
- for (int i = 0; i < row; i++){
- C1[i][0]=tool[i]; //mean group 1
- storage[i][0]=C1[i][0];
- C1[i][1]=tool[i+row+row]; // var group 1
- storage[i][1]=C1[i][1];
- C1[i][2]=C1[i][1]/(secondGroupingStart);
- storage[i][2]=sqrt(C1[i][2]);
-
- C2[i][0]=tool[i+row]; // mean group 2
- storage[i][4]=C2[i][0];
- C2[i][1]=tool[i+row+row+row]; // var group 2
- storage[i][5]=C2[i][1];
- C2[i][2]=C2[i][1]/(column-secondGroupingStart);
- storage[i][6]=sqrt(C2[i][2]);
- }
-
- if (m->control_pressed) { return 0; }
-
- for (int i = 0; i < row; i++){
- xbardiff = C1[i][0]-C2[i][0];
- denom = sqrt(C1[i][2]+C2[i][2]);
- initial[i]=fabs(xbardiff/denom);
- }
- return 0;
-
- }catch(exception& e) {
- m->errorOut(e, "MothurMetastats", "start");
- exit(1);
- }
-}
-/***********************************************************/
-int MothurMetastats::meanvar(vector<double>& pmatrix, int secondGroupingStart, vector<double>& store) {
- try {
- vector<double> temp; temp.resize(row, 0.0);
- vector<double> temp2; temp2.resize(row, 0.0);
- vector<double> var; var.resize(row, 0.0);
- vector<double> var2; var2.resize(row, 0.0);
-
- double a = secondGroupingStart;
- double b = column - a;
- int m = a * row;
- int n = row * column;
-
- for (int i = 0; i < m; i++) { temp[i%row] += pmatrix[i]; }
- for (int i = 0; i < n; i++) { temp2[i%row]+= pmatrix[i]; }
- for (int i = 0; i < row; i++) { temp2[i] -= temp[i]; }
- for (int i = 0; i <= row-1;i++) {
- store[i] = temp[i]/a;
- store[i+row]=temp2[i]/b;
- }
-
- //That completes the mean calculations.
-
- for (int i = 0; i < m; i++) { var[i%row] += pow((pmatrix[i]-store[i%row]),2); }
- for (int i = m; i < n; i++) { var2[i%row]+= pow((pmatrix[i]-store[(i%row)+row]),2); }
- for (int i = 0; i <= row-1; i++){
- store[i+2*row]=var[i]/(a-1);
- store[i+3*row]=var2[i]/(b-1);
- }
-
- // That completes var calculations.
-
- return 0;
-
- }catch(exception& e) {
- m->errorOut(e, "MothurMetastats", "meanvar");
- exit(1);
- }
-}
-/***********************************************************/
-int MothurMetastats::testp(vector<double>& permuted_ttests, vector<double>& permuted, vector<double>& Imatrix, int secondGroupingStart, vector<double>& Tinitial, vector<double>& ps) {
- try {
-
- vector<double> Tvalues; Tvalues.resize(row, 0.0);
- vector<double> counter; counter.resize(row, 0.0);
- int a, b, n;
-
- a = numPermutations;
- b = row;
- n = a*b;
-
- for (int j = 1; j <= row; j++) {
- if (m->control_pressed) { return 0; }
- permute_matrix(Imatrix, permuted, secondGroupingStart, Tvalues, Tinitial, counter);
- }
-
- for(int j = 0; j < row; j++) {
- if (m->control_pressed) { return 0; }
- ps[j] = ((counter[j]+1)/(double)(a+1));
- }
-
- return 0;
-
- }catch(exception& e) {
- m->errorOut(e, "MothurMetastats", "testp");
- exit(1);
- }
-}
-/***********************************************************/
-int MothurMetastats::permute_matrix(vector<double>& Imatrix, vector<double>& permuted, int secondGroupingStart, vector<double>& trial_ts, vector<double>& Tinitial, vector<double>& counter1){
- try {
-
- vector<int> y; y.resize(column, 0);
- for (int i = 1; i <= column; i++){ y[i-1] = i; }
-
- permute_array(y);
-
- int f = 0; int c = 0; int k = 0;
- for (int i = 0; i < column; i++){
-
- if (m->control_pressed) { return 0; }
-
- f = y[i]; //column number
- c = 1;
- c *= (f-1);
- c *= row;
- if (f == 1){ c = 0; } // starting value position in the Imatrix
-
- for(int j = 1; j <= row; j++){
- permuted[k] = Imatrix[c];
- c++; k++;
- }
- }
-
- calc_twosample_ts(permuted, secondGroupingStart, trial_ts, Tinitial, counter1);
-
- return 0;
-
- }catch(exception& e) {
- m->errorOut(e, "MothurMetastats", "permute_matrix");
- exit(1);
- }
+
+ return 0;
+
+ }catch(exception& e) {
+ m->errorOut(e, "MothurMetastats", "runMetastats");
+ exit(1);
+ }
}
/***********************************************************/
-int MothurMetastats::permute_array(vector<int>& array) {
+vector<double> MothurMetastats::permuted_pvalues(vector< vector<double> >& Imatrix, vector<double>& tstats, vector< vector<double> >& Fmatrix) {
try {
- static int seeded = 0;
-
- if (! seeded) {
- seeded = 1;
- srand(time(NULL));
- }
-
- for (int i = 0; i < array.size(); i++) {
- if (m->control_pressed) { return 0; }
-
- int selection = rand() % (array.size() - i);
- int tmp = array[i + selection];
- array[i + selection] = array[i];
- array[i] = tmp;
- }
-
- return 0;
-
- }catch(exception& e) {
- m->errorOut(e, "MothurMetastats", "permute_array");
- exit(1);
- }
+ //# matrix stores tstats for each taxa(row) for each permuted trial(column)
+ vector<double> ps; ps.resize(row, 0.0); //# to store the pvalues
+ vector< vector<double> > permuted_ttests; permuted_ttests.resize(numPermutations);
+ for (int i = 0; i < numPermutations; i++) { permuted_ttests[i].resize(row, 0.0); }
+
+ //# calculate null version of tstats using B permutations.
+ for (int i = 0; i < numPermutations; i++) {
+ permuted_ttests[i] = permute_and_calc_ts(Imatrix);
+ }
+
+ //# calculate each pvalue using the null ts
+ if ((secondGroupingStart) < 8 || (column-secondGroupingStart) < 8){
+ vector< vector<double> > cleanedpermuted_ttests; cleanedpermuted_ttests.resize(numPermutations); //# the array pooling just the frequently observed ts
+ //# then pool the t's together!
+ //# count how many high freq taxa there are
+ int hfc = 1;
+ for (int i = 0; i < row; i++) { // # for each taxa
+ double group1Total = 0.0; double group2Total = 0.0;
+ for(int j = 0; j < secondGroupingStart; j++) { group1Total += Fmatrix[i][j]; }
+ for(int j = secondGroupingStart; j < column; j++) { group2Total += Fmatrix[i][j]; }
+
+ if (group1Total >= secondGroupingStart || group2Total >= (column-secondGroupingStart)){
+ hfc++;
+ for (int j = 0; j < numPermutations; j++) { cleanedpermuted_ttests[j].push_back(permuted_ttests[j][i]); }
+ }
+ }
+
+ //#now for each taxa
+ for (int i = 0; i < row; i++) {
+ //number of cleanedpermuted_ttests greater than tstat[i]
+ int numGreater = 0;
+ for (int j = 0; j < numPermutations; j++) {
+ for (int k = 0; k < hfc; k++) {
+ if (cleanedpermuted_ttests[j][k] > abs(tstats[i])) { numGreater++; }
+ }
+ }
+
+ ps[i] = (1/(double)(numPermutations*hfc))*numGreater;
+ }
+ }else{
+ for (int i = 0; i < row; i++) {
+ //number of permuted_ttests[i] greater than tstat[i] //(sum(permuted_ttests[i,] > abs(tstats[i]))+1)
+ int numGreater = 1;
+ for (int j = 0; j < numPermutations; j++) { if (permuted_ttests[j][i] > abs(tstats[i])) { numGreater++; } }
+ ps[i] = (1/(double)(numPermutations+1))*numGreater;
+ }
+ }
+
+ return ps;
+
+ }catch(exception& e) {
+ m->errorOut(e, "MothurMetastats", "permuted_pvalues");
+ exit(1);
+ }
}
/***********************************************************/
-int MothurMetastats::calc_twosample_ts(vector<double>& Pmatrix, int secondGroupingStart, vector<double>& Ts, vector<double>& Tinitial, vector<double>& counter) {
+vector<double> MothurMetastats::permute_and_calc_ts(vector< vector<double> >& Imatrix) {
try {
- int a = row * 4;
-
- vector< vector<double> > C1; C1.resize(row);
- for (int i = 0; i < C1.size(); i++) { C1[i].resize(3, 0.0); }
- vector< vector<double> > C2; C2.resize(row);
- for (int i = 0; i < C2.size(); i++) { C2[i].resize(3, 0.0); }
- vector<double> storage; storage.resize(a, 0.0);
- vector<double> tool; tool.resize(a, 0.0);
- double xbardiff = 0.0; double denom = 0.0;
-
- meanvar(Pmatrix, secondGroupingStart, storage);
-
- for(int i = 0;i <= (a-1); i++) {
- if (m->control_pressed) { return 0; }
- tool[i] = storage[i];
- }
-
- for (int i = 0; i < row; i++){
- if (m->control_pressed) { return 0; }
- C1[i][0]=tool[i];
- C1[i][1]=tool[i+row+row];
- C1[i][2]=C1[i][1]/(secondGroupingStart);
-
- C2[i][0]=tool[i+row];
- C2[i][1]=tool[i+row+row+row]; // var group 2
- C2[i][2]=C2[i][1]/(column-secondGroupingStart);
- }
-
- for (int i = 0; i < row; i++){
- if (m->control_pressed) { return 0; }
- xbardiff = C1[i][0]-C2[i][0];
- denom = sqrt(C1[i][2]+C2[i][2]);
- Ts[i]=fabs(xbardiff/denom);
- if (fabs(Ts[i])>(fabs(Tinitial[i])+.0000000000001)){ //13th place
- counter[i]++;
- }
- }
-
- return 0;
-
- }catch(exception& e) {
- m->errorOut(e, "MothurMetastats", "calc_twosample_ts");
- exit(1);
- }
+ vector< vector<double> > permutedMatrix = Imatrix;
+
+ //randomize columns, ie group abundances.
+ for (int i = 0; i < permutedMatrix.size(); i++) { random_shuffle(permutedMatrix[i].begin(), permutedMatrix[i].end()); }
+
+ //calc ts
+ vector< vector<double> > C1; C1.resize(row);
+ for (int i = 0; i < row; i++) { C1[i].resize(3, 0.0); } // statistic profiles for class1 and class 2
+ vector< vector<double> > C2; C2.resize(row); // mean[1], variance[2], standard error[3]
+ for (int i = 0; i < row; i++) { C2[i].resize(3, 0.0); }
+ vector<double> Ts; Ts.resize(row, 0.0); // a place to store the true t-statistics
+
+ //#*************************************
+ //# generate statistics mean, var, stderr
+ //#*************************************
+ for(int i = 0; i < row; i++){ // for each taxa
+ //# find the mean of each group
+ double g1Total = 0.0; double g2Total = 0.0;
+ for (int j = 0; j < secondGroupingStart; j++) { g1Total += permutedMatrix[i][j]; }
+ C1[i][0] = g1Total/(double)(secondGroupingStart);
+ for (int j = secondGroupingStart; j < column; j++) { g2Total += permutedMatrix[i][j]; }
+ C2[i][0] = g2Total/(double)(column-secondGroupingStart);
+
+ //# find the variance of each group
+ double g1Var = 0.0; double g2Var = 0.0;
+ for (int j = 0; j < secondGroupingStart; j++) { g1Var += pow((permutedMatrix[i][j]-C1[i][0]), 2); }
+ C1[i][1] = g1Var/(double)(secondGroupingStart-1);
+ for (int j = secondGroupingStart; j < column; j++) { g2Var += pow((permutedMatrix[i][j]-C2[i][0]), 2); }
+ C2[i][1] = g2Var/(double)(column-secondGroupingStart-1);
+
+ //# find the std error of each group -std err^2 (will change to std err at end)
+ C1[i][2] = C1[i][1]/(double)(secondGroupingStart);
+ C2[i][2] = C2[i][1]/(double)(column-secondGroupingStart);
+ }
+
+ //#*************************************
+ //# two sample t-statistics
+ //#*************************************
+ for(int i = 0; i < row; i++){ // # for each taxa
+ double xbar_diff = C1[i][0] - C2[i][0];
+ double denom = sqrt(C1[i][2] + C2[i][2]);
+ Ts[i] = abs(xbar_diff/denom); // calculate two sample t-statistic
+ }
+
+ return Ts;
+
+
+ }catch(exception& e) {
+ m->errorOut(e, "MothurMetastats", "permuted_ttests");
+ exit(1);
+ }
}
/***********************************************************/
vector<double> MothurMetastats::calc_qvalues(vector<double>& pValues) {
try {
- vector<double> qvalues;
- int numRows = pValues.size();
+ /* cout << "x <- c(" << pValues[0];
+ for (int l = 1; l < pValues.size(); l++){
+ cout << ", " << pValues[l];
+ }
+ cout << ")\n";*/
+
+ int numRows = pValues.size();
+ vector<double> qvalues(numRows, 0.0);
+
//fill lambdas with 0.00, 0.01, 0.02... 0.95
vector<double> lambdas(96, 0);
for (int i = 1; i < lambdas.size(); i++) { lambdas[i] = lambdas[i-1] + 0.01; }
int count = 0;
for (int i = 0; i < numRows; i++){ // for each p-value in order
if (pValues[i] > lambdas[l]){ count++; }
- pi0_hat[l] = count/(double)(numRows*(1-lambdas[l]));
}
+ pi0_hat[l] = count/(double)(numRows*(1-lambdas[l]));
}
- //vector<double> f_spline = smoothSpline(lambdas, pi0_hat, 3);
- //double pi0 = f_spline[(f_spline.size()-1)]; // this is the essential pi0_hat value
-
- //cubic Spline
- double pi0, notsure; //this is the essential pi0_hat value
- int notSure;
- vector<double> resultsSpline(lambdas.size(), 0.0);
- spline(pi0_hat, lambdas, notSure, notSure, resultsSpline);
- //some sort of loop to get best value??
- splint(pi0_hat, lambdas, notsure, pi0, resultsSpline);
+ double pi0 = smoothSpline(lambdas, pi0_hat, 3);
//order p-values
vector<double> ordered_qs = qvalues;
for (int i = 1; i < ordered_ps.size(); i++) { ordered_ps[i] = ordered_ps[i-1] + 1; }
vector<double> tempPvalues = pValues;
OrderPValues(0, numRows-1, tempPvalues, ordered_ps);
-
- ordered_qs[numRows-1] <- min((pValues[ordered_ps[numRows-1]]*pi0), 1.0);
+
+ ordered_qs[numRows-1] = min((pValues[ordered_ps[numRows-1]]*pi0), 1.0);
for (int i = (numRows-2); i >= 0; i--){
double p = pValues[ordered_ps[i]];
- double temp = p*numRows*pi0/(double)i;
-
+ double temp = p*numRows*pi0/(double)(i+1);
+
ordered_qs[i] = min(temp, ordered_qs[i+1]);
- ordered_qs[i] = min(ordered_qs[i], 1.0);
}
//re-distribute calculated qvalues to appropriate rows
exit(1);
}
}
-/***********************************************************
-vector<double> MothurMetastats::smoothSpline(vector<double> x, vector<double> y, int df) {
+/***********************************************************/
+double MothurMetastats::smoothSpline(vector<double>& x, vector<double>& y, int df) {
try {
-
- cout << "lambdas" << endl;
- for (int l = 0; l < x.size(); l++){ cout << x[l] << '\t'; }
- cout << endl << "pi0_hat" << endl;
- for (int l = 0; l < y.size(); l++){ cout << y[l] << '\t'; }
- cout << endl;
-
- //double low = -1.5; double high = 1.5; double tol = 1e-04; double eps = 2e-08; double maxit = 500;
+
+ double result = 0.0;
int n = x.size();
vector<double> w(n, 1);
-
- //x <- signif(x, 6L) - I think this rounds to 6 decimals places
- //ux <- unique(sort(x)) //x will be unique and sorted since we created it
- //ox <- match(x, ux) //since its unique and sort it will match
-
- vector<double> wbar(n, 0);
- vector<double> ybar(n, 0);
- vector<double> xbar(n, 0.0);
+ double* xb = new double[n];
+ double* yb = new double[n];
+ double* wb = new double[n];
double yssw = 0.0;
for (int i = 0; i < n; i++) {
- wbar[i] = w[i];
- ybar[i] = w[i]*y[i];
- yssw += (w[i] * y[i] * y[i]) - wbar[i] * (ybar[i] * ybar[i]);
- xbar[i] = (x[i] - x[0]) / (x[n-1] - x[0]);
+ wb[i] = w[i];
+ yb[i] = w[i]*y[i];
+ yssw += (w[i] * y[i] * y[i]) - wb[i] * (yb[i] * yb[i]);
+ xb[i] = (x[i] - x[0]) / (x[n-1] - x[0]);
}
- vector<double> knot = sknot1(xbar);
+ vector<double> knot = sknot1(xb, n);
int nk = knot.size() - 4;
-
- //double ispar = 0.0; double spar = 0.0; double icrit = 3.0; double dofoff = 3.0;
-
- return y;
+
+ double low = -1.5; double high = 1.5; double tol = 1e-04; double eps = 2e-08; int maxit = 500;
+ int ispar = 0; int icrit = 3; double dofoff = 3.0;
+ double penalty = 1.0;
+ int ld4 = 4; int isetup = 0; int ldnk = 1; int ier; double spar = 1.0; double crit;
+
+ double* knotb = new double[knot.size()];
+ double* coef1 = new double[nk];
+ double* lev1 = new double[n];
+ double* sz1 = new double[n];
+ for (int i = 0; i < knot.size(); i++) { knotb[i] = knot[i]; }
+
+ Spline spline;
+ spline.sbart(&penalty, &dofoff, &xb[0], &yb[0], &wb[0], &yssw, &n, &knotb[0], &nk, &coef1[0], &sz1[0], &lev1[0], &crit,
+ &icrit, &spar, &ispar, &maxit, &low, &high, &tol, &eps, &isetup, &ld4, &ldnk, &ier);
+
+ result = coef1[nk-1];
+
+ //free memory
+ delete [] xb;
+ delete [] yb;
+ delete [] wb;
+ delete [] knotb;
+ delete [] coef1;
+ delete [] lev1;
+ delete [] sz1;
+
+ return result;
}catch(exception& e) {
m->errorOut(e, "MothurMetastats", "smoothSpline");
}
}
/***********************************************************/
-// This function is taken from Numerical Recipes in C++ by Press et al., 2nd edition, pg. 479
-int MothurMetastats::spline(vector<double>& x, vector<double>& y, int yp1, int ypn, vector<double>& y2) {
- try {
-
- cout << "lambdas" << endl;
- for (int l = 0; l < x.size(); l++){ cout << x[l] << '\t'; }
- cout << endl << "pi0_hat" << endl;
- for (int l = 0; l < y.size(); l++){ cout << y[l] << '\t'; }
- cout << endl;
-
- double p, qn, sig, un;
-
- int n = y2.size();
- vector<double> u(n-1, 0.0);
-
- if (yp1 > 0.99e30) { y2[0] = u[0] = 0.0; }
- else {
- y2[0] = -0.5;
- u[0] = (3.0 / (x[1] - x[0])) * ((y[1] - y[0]) / (x[1]-x[0]) - yp1);
- }
-
- for (int i = 1; i < n-1; i++) {
- sig = (x[i]-x[i-1])/(x[i+1]-x[i-1]);
- p = sig * y2[i-1] + 2.0;
- y2[i] = (sig - 1.0)/p;
- u[i] = (y[i+1]-y[i]) / (x[i+1]-x[i]) - (y[i] - y[i-1]) / (x[i]-x[i-1]);
- u[i] = (6.0*u[i]/(x[i+1]-x[i-1])-sig*u[i-1])/p;
- }
-
- if (ypn > 0.99e30) { qn=un=0.0; }
- else {
- qn=0.5;
- un=(3.0/(x[n-1]-x[n-2]))*(ypn-(y[n-1]-y[n-2])/(x[n-1]-x[n-2]));
- }
-
- y2[n-1]=(un-qn*u[n-2])/(qn*y2[n-2]+1.0);
-
- for (int k=n-2; k>=0; k--) {
- y2[k]=y2[k]*y2[k+1]+u[k];
- }
-
- return 0;
-
- }catch(exception& e) {
- m->errorOut(e, "MothurMetastats", "spline");
- exit(1);
- }
-}
-/***********************************************************/
-// This function is taken from Numerical Recipes in C++ by Press et al., 2nd edition, pg. 479
-int MothurMetastats::splint(vector<double>& xa, vector<double>& ya, double& x, double& y, vector<double>& y2a) {
- try {
- int k;
- double h,b,a;
-
- int n = xa.size();
-
- int klo=0;
- int khi=n-1;
- while (khi-klo > 1) {
-
- if (m->control_pressed) { break; }
-
- k = (khi+klo) >> 1;
- if (xa[k] > x) { khi=k; }
- else { klo=k; }
- }
-
- h=xa[khi]-xa[klo];
- if (h == 0.0) { m->mothurOut("[ERROR]: Bad xa input to splint routine."); m->mothurOutEndLine(); m->control_pressed = true; return 0; }
- a=(xa[khi]-x)/h;
- b=(x - xa[klo])/h;
- y=a*ya[klo]+b*ya[khi]+((a*a*a-a)*y2a[klo]+(b*b*b-b)*y2a[khi])*(h*h)/6.0;
-
- return 0;
-
- }catch(exception& e) {
- m->errorOut(e, "MothurMetastats", "splint");
- exit(1);
- }
-}
-/***********************************************************/
-vector<double> MothurMetastats::sknot1(vector<double> x) {
+vector<double> MothurMetastats::sknot1(double* x, int n) {
try {
vector<double> knots;
- int n = x.size();
int nk = nkn(n);
- cout << nk << endl;
//R equivalent - rep(x[1L], 3L)
knots.push_back(x[0]); knots.push_back(x[0]); knots.push_back(x[0]);
//generate a sequence of nk equally spaced values from 1 to n. R equivalent = seq.int(1, n, length.out = nk)
vector<int> indexes = getSequence(0, n-1, nk);
- for (int i = 0; i < indexes.size(); i++) { knots.push_back(x[indexes[i]]); }
+ for (int i = 0; i < indexes.size(); i++) { knots.push_back(x[indexes[i]]); }
//R equivalent - rep(x[n], 3L)
knots.push_back(x[n-1]); knots.push_back(x[n-1]); knots.push_back(x[n-1]);
-
+
return knots;
}catch(exception& e) {