X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=blobdiff_plain;f=linearalgebra.cpp;h=cd2ca0397bb21493761c69f729893103fc314613;hp=effb1ad44e04abd9c1bdd8523ca3b05f4253de94;hb=615301e57c25e241356a9c2380648d117709458d;hpb=91a27e0483827c06c21c4fe89558923bbfe86573 diff --git a/linearalgebra.cpp b/linearalgebra.cpp index effb1ad..cd2ca03 100644 --- a/linearalgebra.cpp +++ b/linearalgebra.cpp @@ -8,6 +8,7 @@ */ #include "linearalgebra.h" +#include "wilcox.h" // This class references functions used from "Numerical Recipes in C++" // @@ -1228,7 +1229,241 @@ double LinearAlgebra::calcKendallSig(double n, double r){ exit(1); } } +/*********************************************************************************************************************************/ +double LinearAlgebra::calcKruskalWallis(vector& values, double& pValue){ + try { + double H; + set treatments; + + //rank values + sort(values.begin(), values.end(), compareSpearman); + vector ties; + int rankTotal = 0; + vector TIES; + for (int j = 0; j < values.size(); j++) { + treatments.insert(values[j].name); + rankTotal += (j+1); + ties.push_back(&(values[j])); + + if (j != values.size()-1) { // you are not the last so you can look ahead + if (values[j].score != values[j+1].score) { // you are done with ties, rank them and continue + if (ties.size() > 1) { TIES.push_back(ties.size()); } + for (int k = 0; k < ties.size(); k++) { + double thisrank = rankTotal / (double) ties.size(); + (*ties[k]).score = thisrank; + } + ties.clear(); + rankTotal = 0; + } + }else { // you are the last one + if (ties.size() > 1) { TIES.push_back(ties.size()); } + for (int k = 0; k < ties.size(); k++) { + double thisrank = rankTotal / (double) ties.size(); + (*ties[k]).score = thisrank; + } + } + } + + + // H = 12/(N*(N+1)) * (sum Ti^2/n) - 3(N+1) + map sums; + map counts; + for (set::iterator it = treatments.begin(); it != treatments.end(); it++) { sums[*it] = 0.0; counts[*it] = 0; } + + for (int j = 0; j < values.size(); j++) { + sums[values[j].name] += values[j].score; + counts[values[j].name]+= 1.0; + } + + double middleTerm = 0.0; + for (set::iterator it = treatments.begin(); it != treatments.end(); it++) { + middleTerm += ((sums[*it]*sums[*it])/counts[*it]); + } + + double firstTerm = 12 / (double) (values.size()*(values.size()+1)); + double lastTerm = 3 * (values.size()+1); + + H = firstTerm * middleTerm - lastTerm; + + //adjust for ties + if (TIES.size() != 0) { + double sum = 0.0; + for (int j = 0; j < TIES.size(); j++) { sum += ((TIES[j]*TIES[j]*TIES[j])-TIES[j]); } + double result = 1.0 - (sum / (double) ((values.size()*values.size()*values.size())-values.size())); + H /= result; + } + + //Numerical Recipes pg221 + pValue = 1.0 - (gammp(((treatments.size()-1)/(double)2.0), H/2.0)); + + return H; + } + catch(exception& e) { + m->errorOut(e, "LinearAlgebra", "calcKruskalWallis"); + exit(1); + } +} +/*********************************************************************************************************************************/ +//thanks http://www.johndcook.com/cpp_phi.html +double LinearAlgebra::pnorm(double x){ + try { + // constants + double a1 = 0.254829592; + double a2 = -0.284496736; + double a3 = 1.421413741; + double a4 = -1.453152027; + double a5 = 1.061405429; + double p = 0.3275911; + + // Save the sign of x + int sign = 1; + if (x < 0) + sign = -1; + x = fabs(x)/sqrt(2.0); + + // A&S formula 7.1.26 + double t = 1.0/(1.0 + p*x); + double y = 1.0 - (((((a5*t + a4)*t) + a3)*t + a2)*t + a1)*t*exp(-x*x); + + return 0.5*(1.0 + sign*y); + } + catch(exception& e) { + m->errorOut(e, "LinearAlgebra", "pnorm"); + exit(1); + } +} +/*********************************************************************************************************************************/ +double LinearAlgebra::calcWilcoxon(vector& x, vector& y, double& sig){ + try { + double W = 0.0; + sig = 0.0; + + vector ranks; + for (int i = 0; i < x.size(); i++) { + if (m->control_pressed) { return W; } + spearmanRank member("x", x[i]); + ranks.push_back(member); + } + + for (int i = 0; i < y.size(); i++) { + if (m->control_pressed) { return W; } + spearmanRank member("y", y[i]); + ranks.push_back(member); + } + + //sort values + sort(ranks.begin(), ranks.end(), compareSpearman); + + //convert scores to ranks of x + vector ties; + int rankTotal = 0; + vector TIES; + for (int j = 0; j < ranks.size(); j++) { + if (m->control_pressed) { return W; } + rankTotal += (j+1); + ties.push_back(&(ranks[j])); + + if (j != ranks.size()-1) { // you are not the last so you can look ahead + if (ranks[j].score != ranks[j+1].score) { // you are done with ties, rank them and continue + if (ties.size() > 1) { TIES.push_back(ties.size()); } + for (int k = 0; k < ties.size(); k++) { + float thisrank = rankTotal / (float) ties.size(); + (*ties[k]).score = thisrank; + } + ties.clear(); + rankTotal = 0; + } + }else { // you are the last one + if (ties.size() > 1) { TIES.push_back(ties.size()); } + for (int k = 0; k < ties.size(); k++) { + float thisrank = rankTotal / (float) ties.size(); + (*ties[k]).score = thisrank; + } + } + } + + //from R wilcox.test function + //STATISTIC <- sum(r[seq_along(x)]) - n.x * (n.x + 1)/2 + double sumRanks = 0.0; + for (int i = 0; i < ranks.size(); i++) { + if (m->control_pressed) { return W; } + if (ranks[i].name == "x") { sumRanks += ranks[i].score; } + } + + W = sumRanks - x.size() * ((double)(x.size() + 1)) / 2.0; + + //exact <- (n.x < 50) && (n.y < 50) + bool findExact = false; + if ((x.size() < 50) && (y.size() < 50)) { findExact = true; } + + + if (findExact && (TIES.size() == 0)) { //find exact and no ties + //PVAL <- switch(alternative, two.sided = { + //p <- if (STATISTIC > (n.x * n.y/2)) + PWilcox wilcox; + double pval = 0.0; + if (W > ((double)x.size()*y.size()/2.0)) { + //pwilcox(STATISTIC-1, n.x, n.y, lower.tail = FALSE) + pval = wilcox.pwilcox(W-1, x.size(), y.size(), false); + }else { + //pwilcox(STATISTIC,n.x, n.y) + pval = wilcox.pwilcox(W, x.size(), y.size(), true); + } + sig = 2.0 * pval; + if (1.0 < sig) { sig = 1.0; } + }else { + //z <- STATISTIC - n.x * n.y/2 + double z = W - (double)(x.size() * y.size()/2.0); + //NTIES <- table(r) + double sum = 0.0; + for (int j = 0; j < TIES.size(); j++) { sum += ((TIES[j]*TIES[j]*TIES[j])-TIES[j]); } + + //SIGMA <- sqrt((n.x * n.y/12) * ((n.x + n.y + 1) - + //sum(NTIES^3 - NTIES)/((n.x + n.y) * (n.x + n.y - + //1)))) + double sigma = 0.0; + double firstTerm = (double)(x.size() * y.size()/12.0); + double secondTerm = (double)(x.size() + y.size() + 1) - sum / (double)((x.size() + y.size()) * (x.size() + y.size() - 1)); + sigma = sqrt(firstTerm * secondTerm); + + //CORRECTION <- switch(alternative, two.sided = sign(z) * 0.5, greater = 0.5, less = -0.5) + double CORRECTION = 0.0; + if (z < 0) { CORRECTION = -1.0; } + else if (z > 0) { CORRECTION = 1.0; } + CORRECTION *= 0.5; + + z = (z - CORRECTION)/sigma; + + //PVAL <- switch(alternative, two.sided = 2 * min(pnorm(z), pnorm(z, lower.tail = FALSE))) + sig = pnorm(z); + if ((1.0-sig) < sig) { sig = 1.0 - sig; } + sig *= 2; + } + + return W; + } + catch(exception& e) { + m->errorOut(e, "LinearAlgebra", "calcWilcoxon"); + exit(1); + } +} + +/*********************************************************************************************************************************/ +double LinearAlgebra::choose(double n, double k){ + try { + n = floor(n + 0.5); + k = floor(k + 0.5); + + double lchoose = gammln(n + 1.0) - gammln(k + 1.0) - gammln(n - k + 1.0); + + return (floor(exp(lchoose) + 0.5)); + } + catch(exception& e) { + m->errorOut(e, "LinearAlgebra", "choose"); + exit(1); + } +} /*********************************************************************************************************************************/ double LinearAlgebra::calcSpearman(vector& x, vector& y, double& sig){ try { @@ -1479,29 +1714,303 @@ double LinearAlgebra::calcPearsonSig(double n, double r){ /*********************************************************************************************************************************/ vector > LinearAlgebra::getObservedEuclideanDistance(vector >& relAbundData){ + try { - int numSamples = relAbundData.size(); - int numOTUs = relAbundData[0].size(); - - vector > dMatrix(numSamples); - for(int i=0;i > dMatrix(numSamples); + for(int i=0;icontrol_pressed) { return dMatrix; } + + double d = 0; + for(int k=0;kerrorOut(e, "LinearAlgebra", "getObservedEuclideanDistance"); + exit(1); + } +} + +/*********************************************************************************************************************************/ +vector LinearAlgebra::solveEquations(vector > A, vector b){ + try { + int length = (int)b.size(); + vector x(length, 0); + vector index(length); + for(int i=0;icontrol_pressed) { return b; } + lubksb(A, index, b); + + return b; + } + catch(exception& e) { + m->errorOut(e, "LinearAlgebra", "solveEquations"); + exit(1); + } +} +/*********************************************************************************************************************************/ +vector LinearAlgebra::solveEquations(vector > A, vector b){ + try { + int length = (int)b.size(); + vector x(length, 0); + vector index(length); + for(int i=0;icontrol_pressed) { return b; } + lubksb(A, index, b); + + return b; + } + catch(exception& e) { + m->errorOut(e, "LinearAlgebra", "solveEquations"); + exit(1); } - return dMatrix; - } /*********************************************************************************************************************************/ + +void LinearAlgebra::ludcmp(vector >& A, vector& index, double& d){ + try { + double tiny = 1e-20; + + int n = (int)A.size(); + vector vv(n, 0.0); + double temp; + int imax; + + d = 1.0; + + for(int i=0;i big ) big=temp; } + if(big==0.0){ m->mothurOut("Singular matrix in routine ludcmp\n"); } + vv[i] = 1.0/big; + } + + for(int j=0;jcontrol_pressed) { break; } + for(int i=0;i= big){ + big = dum; + imax = i; + } + } + if(j != imax){ + for(int k=0;kerrorOut(e, "LinearAlgebra", "ludcmp"); + exit(1); + } +} + +/*********************************************************************************************************************************/ + +void LinearAlgebra::lubksb(vector >& A, vector& index, vector& b){ + try { + double total; + int n = (int)A.size(); + int ii = 0; + + for(int i=0;icontrol_pressed) { break; } + int ip = index[i]; + total = b[ip]; + b[ip] = b[i]; + + if (ii != 0) { + for(int j=ii-1;j=0;i--){ + total = b[i]; + for(int j=i+1;jerrorOut(e, "LinearAlgebra", "lubksb"); + exit(1); + } +} +/*********************************************************************************************************************************/ + +void LinearAlgebra::ludcmp(vector >& A, vector& index, float& d){ + try { + double tiny = 1e-20; + + int n = (int)A.size(); + vector vv(n, 0.0); + double temp; + int imax; + + d = 1.0; + + for(int i=0;i big ) big=temp; } + if(big==0.0){ m->mothurOut("Singular matrix in routine ludcmp\n"); } + vv[i] = 1.0/big; + } + + for(int j=0;jcontrol_pressed) { break; } + for(int i=0;i= big){ + big = dum; + imax = i; + } + } + if(j != imax){ + for(int k=0;kerrorOut(e, "LinearAlgebra", "ludcmp"); + exit(1); + } +} + +/*********************************************************************************************************************************/ + +void LinearAlgebra::lubksb(vector >& A, vector& index, vector& b){ + try { + float total; + int n = (int)A.size(); + int ii = 0; + + for(int i=0;icontrol_pressed) { break; } + int ip = index[i]; + total = b[ip]; + b[ip] = b[i]; + + if (ii != 0) { + for(int j=ii-1;j=0;i--){ + total = b[i]; + for(int j=i+1;jerrorOut(e, "LinearAlgebra", "lubksb"); + exit(1); + } +} + +/*********************************************************************************************************************************/ + +vector > LinearAlgebra::getInverse(vector > matrix){ + try { + int n = (int)matrix.size(); + + vector > inverse(n); + for(int i=0;i column(n, 0.0000); + vector index(n, 0); + double dummy; + + ludcmp(matrix, index, dummy); + + for(int j=0;jcontrol_pressed) { break; } + + column.assign(n, 0); + + column[j] = 1.0000; + + lubksb(matrix, index, column); + + for(int i=0;ierrorOut(e, "LinearAlgebra", "getInverse"); + exit(1); + } +}