X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=blobdiff_plain;f=linearalgebra.cpp;h=bacf2c1c07bb420430d07416b6ca7354c650a79b;hp=effb1ad44e04abd9c1bdd8523ca3b05f4253de94;hb=b206f634aae1b4ce13978d203247fb64757d5482;hpb=91a27e0483827c06c21c4fe89558923bbfe86573 diff --git a/linearalgebra.cpp b/linearalgebra.cpp index effb1ad..bacf2c1 100644 --- a/linearalgebra.cpp +++ b/linearalgebra.cpp @@ -8,6 +8,9 @@ */ #include "linearalgebra.h" +#include "wilcox.h" + +#define PI 3.1415926535897932384626433832795 // This class references functions used from "Numerical Recipes in C++" // @@ -111,7 +114,7 @@ double LinearAlgebra::gammp(const double a, const double x) { } /*********************************************************************************************************************************/ //Numerical Recipes pg. 223 -double LinearAlgebra::gammq(const double a, const double x) { +/*double LinearAlgebra::gammq(const double a, const double x) { try { double gamser,gammcf,gln; @@ -127,11 +130,11 @@ double LinearAlgebra::gammq(const double a, const double x) { return 0; } catch(exception& e) { - m->errorOut(e, "LinearAlgebra", "gammp"); + m->errorOut(e, "LinearAlgebra", "gammq"); exit(1); } } -/*********************************************************************************************************************************/ +*********************************************************************************************************************************/ //Numerical Recipes pg. 224 double LinearAlgebra::gcf(double& gammcf, const double a, const double x, double& gln){ try { @@ -158,7 +161,7 @@ double LinearAlgebra::gcf(double& gammcf, const double a, const double x, double h *= del; if (fabs(del-1.0) <= EPS) break; } - if (i > ITMAX) { m->mothurOut("[ERROR]: a too large, ITMAX too small in gcf\n"); m->control_pressed = true; } + if (i > ITMAX) { m->mothurOut("[ERROR]: " + toString(a) + " too large, ITMAX=100 too small in gcf\n"); m->control_pressed = true; } gammcf=exp(-x+a*log(x)-gln)*h; return 0.0; @@ -252,7 +255,7 @@ double LinearAlgebra::betacf(const double a, const double b, const double x) { } } /*********************************************************************************************************************************/ - +//[3][4] * [4][5] - columns in first must match rows in second, returns matrix[3][5] vector > LinearAlgebra::matrix_mult(vector > first, vector > second){ try { vector > product; @@ -286,7 +289,23 @@ vector > LinearAlgebra::matrix_mult(vector > first } } +/*********************************************************************************************************************************/ +vector > LinearAlgebra::transpose(vector >matrix){ + try { + vector > trans; trans.resize(matrix[0].size()); + for (int i = 0; i < trans.size(); i++) { + for (int j = 0; j < matrix.size(); j++) { trans[i].push_back(matrix[j][i]); } + } + + return trans; + } + catch(exception& e) { + m->errorOut(e, "LinearAlgebra", "transpose"); + exit(1); + } + +} /*********************************************************************************************************************************/ void LinearAlgebra::recenter(double offset, vector > D, vector >& G){ @@ -1228,7 +1247,259 @@ double LinearAlgebra::calcKendallSig(double n, double r){ exit(1); } } +/*********************************************************************************************************************************/ +double LinearAlgebra::calcKruskalWallis(vector& values, double& pValue){ + try { + double H; + set treatments; + + //rank values + sort(values.begin(), values.end(), compareSpearman); + vector ties; + int rankTotal = 0; + vector TIES; + for (int j = 0; j < values.size(); j++) { + treatments.insert(values[j].name); + rankTotal += (j+1); + ties.push_back(&(values[j])); + + if (j != values.size()-1) { // you are not the last so you can look ahead + if (values[j].score != values[j+1].score) { // you are done with ties, rank them and continue + if (ties.size() > 1) { TIES.push_back(ties.size()); } + for (int k = 0; k < ties.size(); k++) { + double thisrank = rankTotal / (double) ties.size(); + (*ties[k]).score = thisrank; + } + ties.clear(); + rankTotal = 0; + } + }else { // you are the last one + if (ties.size() > 1) { TIES.push_back(ties.size()); } + for (int k = 0; k < ties.size(); k++) { + double thisrank = rankTotal / (double) ties.size(); + (*ties[k]).score = thisrank; + } + } + } + + + // H = 12/(N*(N+1)) * (sum Ti^2/n) - 3(N+1) + map sums; + map counts; + for (set::iterator it = treatments.begin(); it != treatments.end(); it++) { sums[*it] = 0.0; counts[*it] = 0; } + + for (int j = 0; j < values.size(); j++) { + sums[values[j].name] += values[j].score; + counts[values[j].name]+= 1.0; + } + + double middleTerm = 0.0; + for (set::iterator it = treatments.begin(); it != treatments.end(); it++) { + middleTerm += ((sums[*it]*sums[*it])/counts[*it]); + } + + double firstTerm = 12 / (double) (values.size()*(values.size()+1)); + double lastTerm = 3 * (values.size()+1); + + H = firstTerm * middleTerm - lastTerm; + + //adjust for ties + if (TIES.size() != 0) { + double sum = 0.0; + for (int j = 0; j < TIES.size(); j++) { sum += ((TIES[j]*TIES[j]*TIES[j])-TIES[j]); } + double result = 1.0 - (sum / (double) ((values.size()*values.size()*values.size())-values.size())); + H /= result; + } + + if (isnan(H) || isinf(H)) { H = 0; } + + //Numerical Recipes pg221 + pValue = 1.0 - (gammp(((treatments.size()-1)/(double)2.0), H/2.0)); + + return H; + } + catch(exception& e) { + m->errorOut(e, "LinearAlgebra", "calcKruskalWallis"); + exit(1); + } +} +/*********************************************************************************************************************************/ +double LinearAlgebra::normalvariate(double mean, double standardDeviation) { + try { + double u1 = ((double)(rand()) + 1.0 )/( (double)(RAND_MAX) + 1.0); + double u2 = ((double)(rand()) + 1.0 )/( (double)(RAND_MAX) + 1.0); + //double r = sqrt( -2.0*log(u1) ); + //double theta = 2.0*PI*u2; + //cout << cos(8.*atan(1.)*u2)*sqrt(-2.*log(u1)) << endl; + return cos(8.*atan(1.)*u2)*sqrt(-2.*log(u1)); + } + catch(exception& e) { + m->errorOut(e, "LinearAlgebra", "normalvariate"); + exit(1); + } +} +/*********************************************************************************************************************************/ +//thanks http://www.johndcook.com/cpp_phi.html +double LinearAlgebra::pnorm(double x){ + try { + // constants + double a1 = 0.254829592; + double a2 = -0.284496736; + double a3 = 1.421413741; + double a4 = -1.453152027; + double a5 = 1.061405429; + double p = 0.3275911; + + // Save the sign of x + int sign = 1; + if (x < 0) + sign = -1; + x = fabs(x)/sqrt(2.0); + + // A&S formula 7.1.26 + double t = 1.0/(1.0 + p*x); + double y = 1.0 - (((((a5*t + a4)*t) + a3)*t + a2)*t + a1)*t*exp(-x*x); + + return 0.5*(1.0 + sign*y); + + } + catch(exception& e) { + m->errorOut(e, "LinearAlgebra", "pnorm"); + exit(1); + } +} + +/*********************************************************************************************************************************/ +double LinearAlgebra::calcWilcoxon(vector& x, vector& y, double& sig){ + try { + double W = 0.0; + sig = 0.0; + + vector ranks; + for (int i = 0; i < x.size(); i++) { + if (m->control_pressed) { return W; } + spearmanRank member("x", x[i]); + ranks.push_back(member); + } + + for (int i = 0; i < y.size(); i++) { + if (m->control_pressed) { return W; } + spearmanRank member("y", y[i]); + ranks.push_back(member); + } + + //sort values + sort(ranks.begin(), ranks.end(), compareSpearman); + + //convert scores to ranks of x + vector ties; + int rankTotal = 0; + vector TIES; + for (int j = 0; j < ranks.size(); j++) { + if (m->control_pressed) { return W; } + rankTotal += (j+1); + ties.push_back(&(ranks[j])); + + if (j != ranks.size()-1) { // you are not the last so you can look ahead + if (ranks[j].score != ranks[j+1].score) { // you are done with ties, rank them and continue + if (ties.size() > 1) { TIES.push_back(ties.size()); } + for (int k = 0; k < ties.size(); k++) { + float thisrank = rankTotal / (float) ties.size(); + (*ties[k]).score = thisrank; + } + ties.clear(); + rankTotal = 0; + } + }else { // you are the last one + if (ties.size() > 1) { TIES.push_back(ties.size()); } + for (int k = 0; k < ties.size(); k++) { + float thisrank = rankTotal / (float) ties.size(); + (*ties[k]).score = thisrank; + } + } + } + + //from R wilcox.test function + //STATISTIC <- sum(r[seq_along(x)]) - n.x * (n.x + 1)/2 + double sumRanks = 0.0; + for (int i = 0; i < ranks.size(); i++) { + if (m->control_pressed) { return W; } + if (ranks[i].name == "x") { sumRanks += ranks[i].score; } + } + + W = sumRanks - x.size() * ((double)(x.size() + 1)) / 2.0; + + //exact <- (n.x < 50) && (n.y < 50) + bool findExact = false; + if ((x.size() < 50) && (y.size() < 50)) { findExact = true; } + + + if (findExact && (TIES.size() == 0)) { //find exact and no ties + //PVAL <- switch(alternative, two.sided = { + //p <- if (STATISTIC > (n.x * n.y/2)) + PWilcox wilcox; + double pval = 0.0; + if (W > ((double)x.size()*y.size()/2.0)) { + //pwilcox(STATISTIC-1, n.x, n.y, lower.tail = FALSE) + pval = wilcox.pwilcox(W-1, x.size(), y.size(), false); + }else { + //pwilcox(STATISTIC,n.x, n.y) + pval = wilcox.pwilcox(W, x.size(), y.size(), true); + } + sig = 2.0 * pval; + if (1.0 < sig) { sig = 1.0; } + }else { + //z <- STATISTIC - n.x * n.y/2 + double z = W - (double)(x.size() * y.size()/2.0); + //NTIES <- table(r) + double sum = 0.0; + for (int j = 0; j < TIES.size(); j++) { sum += ((TIES[j]*TIES[j]*TIES[j])-TIES[j]); } + + //SIGMA <- sqrt((n.x * n.y/12) * ((n.x + n.y + 1) - + //sum(NTIES^3 - NTIES)/((n.x + n.y) * (n.x + n.y - + //1)))) + double sigma = 0.0; + double firstTerm = (double)(x.size() * y.size()/12.0); + double secondTerm = (double)(x.size() + y.size() + 1) - sum / (double)((x.size() + y.size()) * (x.size() + y.size() - 1)); + sigma = sqrt(firstTerm * secondTerm); + + //CORRECTION <- switch(alternative, two.sided = sign(z) * 0.5, greater = 0.5, less = -0.5) + double CORRECTION = 0.0; + if (z < 0) { CORRECTION = -1.0; } + else if (z > 0) { CORRECTION = 1.0; } + CORRECTION *= 0.5; + + z = (z - CORRECTION)/sigma; + + //PVAL <- switch(alternative, two.sided = 2 * min(pnorm(z), pnorm(z, lower.tail = FALSE))) + sig = pnorm(z); + if ((1.0-sig) < sig) { sig = 1.0 - sig; } + sig *= 2; + } + + return W; + } + catch(exception& e) { + m->errorOut(e, "LinearAlgebra", "calcWilcoxon"); + exit(1); + } +} +/*********************************************************************************************************************************/ +double LinearAlgebra::choose(double n, double k){ + try { + n = floor(n + 0.5); + k = floor(k + 0.5); + + double lchoose = gammln(n + 1.0) - gammln(k + 1.0) - gammln(n - k + 1.0); + + return (floor(exp(lchoose) + 0.5)); + } + catch(exception& e) { + m->errorOut(e, "LinearAlgebra", "choose"); + exit(1); + } +} /*********************************************************************************************************************************/ double LinearAlgebra::calcSpearman(vector& x, vector& y, double& sig){ try { @@ -1479,29 +1750,865 @@ double LinearAlgebra::calcPearsonSig(double n, double r){ /*********************************************************************************************************************************/ vector > LinearAlgebra::getObservedEuclideanDistance(vector >& relAbundData){ + try { - int numSamples = relAbundData.size(); - int numOTUs = relAbundData[0].size(); - - vector > dMatrix(numSamples); - for(int i=0;i > dMatrix(numSamples); + for(int i=0;icontrol_pressed) { return dMatrix; } + + double d = 0; + for(int k=0;kerrorOut(e, "LinearAlgebra", "getObservedEuclideanDistance"); + exit(1); + } +} + +/*********************************************************************************************************************************/ +vector LinearAlgebra::solveEquations(vector > A, vector b){ + try { + int length = (int)b.size(); + vector x(length, 0); + vector index(length); + for(int i=0;icontrol_pressed) { return b; } + lubksb(A, index, b); + + return b; + } + catch(exception& e) { + m->errorOut(e, "LinearAlgebra", "solveEquations"); + exit(1); + } +} +/*********************************************************************************************************************************/ +vector LinearAlgebra::solveEquations(vector > A, vector b){ + try { + int length = (int)b.size(); + vector x(length, 0); + vector index(length); + for(int i=0;icontrol_pressed) { return b; } + lubksb(A, index, b); + + return b; + } + catch(exception& e) { + m->errorOut(e, "LinearAlgebra", "solveEquations"); + exit(1); + } +} + +/*********************************************************************************************************************************/ + +void LinearAlgebra::ludcmp(vector >& A, vector& index, double& d){ + try { + double tiny = 1e-20; + + int n = (int)A.size(); + vector vv(n, 0.0); + double temp; + int imax; + + d = 1.0; + + for(int i=0;i big ) big=temp; } + if(big==0.0){ m->mothurOut("Singular matrix in routine ludcmp\n"); } + vv[i] = 1.0/big; + } + + for(int j=0;jcontrol_pressed) { break; } + for(int i=0;i= big){ + big = dum; + imax = i; + } + } + if(j != imax){ + for(int k=0;kerrorOut(e, "LinearAlgebra", "ludcmp"); + exit(1); + } +} + +/*********************************************************************************************************************************/ + +void LinearAlgebra::lubksb(vector >& A, vector& index, vector& b){ + try { + double total; + int n = (int)A.size(); + int ii = 0; + + for(int i=0;icontrol_pressed) { break; } + int ip = index[i]; + total = b[ip]; + b[ip] = b[i]; + + if (ii != 0) { + for(int j=ii-1;j=0;i--){ + total = b[i]; + for(int j=i+1;jerrorOut(e, "LinearAlgebra", "lubksb"); + exit(1); + } +} +/*********************************************************************************************************************************/ + +void LinearAlgebra::ludcmp(vector >& A, vector& index, float& d){ + try { + double tiny = 1e-20; + + int n = (int)A.size(); + vector vv(n, 0.0); + double temp; + int imax; + + d = 1.0; + + for(int i=0;i big ) big=temp; } + if(big==0.0){ m->mothurOut("Singular matrix in routine ludcmp\n"); } + vv[i] = 1.0/big; + } + + for(int j=0;jcontrol_pressed) { break; } + for(int i=0;i= big){ + big = dum; + imax = i; + } + } + if(j != imax){ + for(int k=0;kerrorOut(e, "LinearAlgebra", "ludcmp"); + exit(1); + } +} + +/*********************************************************************************************************************************/ + +void LinearAlgebra::lubksb(vector >& A, vector& index, vector& b){ + try { + float total; + int n = (int)A.size(); + int ii = 0; + + for(int i=0;icontrol_pressed) { break; } + int ip = index[i]; + total = b[ip]; + b[ip] = b[i]; + + if (ii != 0) { + for(int j=ii-1;j=0;i--){ + total = b[i]; + for(int j=i+1;jerrorOut(e, "LinearAlgebra", "lubksb"); + exit(1); } - return dMatrix; - } /*********************************************************************************************************************************/ + +vector > LinearAlgebra::getInverse(vector > matrix){ + try { + int n = (int)matrix.size(); + + vector > inverse(n); + for(int i=0;i column(n, 0.0000); + vector index(n, 0); + double dummy; + + ludcmp(matrix, index, dummy); + + for(int j=0;jcontrol_pressed) { break; } + + column.assign(n, 0); + + column[j] = 1.0000; + + lubksb(matrix, index, column); + + for(int i=0;ierrorOut(e, "LinearAlgebra", "getInverse"); + exit(1); + } +} +/*********************************************************************************************************************************/ +//modelled R lda function - MASS:::lda.default +vector< vector > LinearAlgebra::lda(vector< vector >& a, vector groups, vector< vector >& means, bool& ignore) { + try { + + set uniqueGroups; + for (int i = 0; i < groups.size(); i++) { uniqueGroups.insert(groups[i]); } + int numGroups = uniqueGroups.size(); + + map quickIndex; //className to index. hoping to save counts, proportions and means in vectors to save time. This map will allow us to know index 0 in counts refers to group1. + int count = 0; + for (set::iterator it = uniqueGroups.begin(); it != uniqueGroups.end(); it++) { quickIndex[*it] = count; count++; } + + int numSampled = groups.size(); //number of sampled groups + int numOtus = a.size(); //number of flagged bins + + //counts <- as.vector(table(g)) //number of samples from each class in random sampling + vector counts; counts.resize(numGroups, 0); + for (int i = 0; i < groups.size(); i++) { + counts[quickIndex[groups[i]]]++; + } + + vector proportions; proportions.resize(numGroups, 0.0); + for (int i = 0; i < numGroups; i++) { proportions[i] = counts[i] / (double) numSampled; } + + means.clear(); //means[0] -> means[0][0] average for [group0][OTU0]. + means.resize(numGroups); for (int i = 0; i < means.size(); i++) { means[i].resize(numOtus, 0.0); } + for (int j = 0; j < numSampled; j++) { //total for each class for each OTU + for (int i = 0; i < numOtus; i++) { means[quickIndex[groups[j]]][i] += a[i][j]; } + } + //average for each class for each OTU + for (int j = 0; j < numGroups; j++) { for (int i = 0; i < numOtus; i++) { means[j][i] /= counts[j]; } } + + //randCov <- x - group.means[g, ] + vector< vector > randCov; //randCov[0][0] -> (random sample value0 for OTU0 - average for samples group in OTU0). example OTU0, random sample 0.01 from class early. average of class early for OTU0 is 0.005. randCov[0][0] = (0.01-0.005) + for (int i = 0; i < numOtus; i++) { //for each flagged OTU + vector tempRand; + for (int j = 0; j < numSampled; j++) { tempRand.push_back(a[i][j] - means[quickIndex[groups[j]]][i]); } + randCov.push_back(tempRand); + } + + //find variance and std for each OTU + //f1 <- sqrt(diag(var(x - group.means[g, ]))) + vector stdF1; + vector ave; + for (int i = 0; i < numOtus; i++) { + stdF1.push_back(0.0); + ave.push_back(m->getAverage(randCov[i])); + } + + for (int i = 0; i < numOtus; i++) { + for (int j = 0; j < numSampled; j++) { stdF1[i] += ((randCov[i][j] - ave[i]) * (randCov[i][j] - ave[i])); } + } + + //fac <- 1/(n - ng) + double fac = 1 / (double) (numSampled-numGroups); + + for (int i = 0; i < stdF1.size(); i++) { + stdF1[i] /= (double) (numSampled-1); + stdF1[i] = sqrt(stdF1[i]); + } + + vector< vector > scaling; //[numOTUS][numOTUS] + for (int i = 0; i < numOtus; i++) { + vector temp; + for (int j = 0; j < numOtus; j++) { + if (i == j) { temp.push_back(1.0/stdF1[i]); } + else { temp.push_back(0.0); } + + } + scaling.push_back(temp); + } + /* + cout << "scaling = " << endl; + for (int i = 0; i < scaling.size(); i++) { + for (int j = 0; j < scaling[i].size(); j++) { cout << scaling[i][j] << '\t'; } + cout << endl; + }*/ + + //X <- sqrt(fac) * ((x - group.means[g, ]) %*% scaling) + vector< vector > X = randCov; //[numOTUS][numSampled] + //((x - group.means[g, ]) %*% scaling) + //matrix multiplication of randCov and scaling + LinearAlgebra linear; + X = linear.matrix_mult(scaling, randCov); //[numOTUS][numOTUS] * [numOTUS][numSampled] = [numOTUS][numSampled] + fac = sqrt(fac); + + for (int i = 0; i < X.size(); i++) { + for (int j = 0; j < X[i].size(); j++) { X[i][j] *= fac; } + } + + vector d; + vector< vector > v; + vector< vector > Xcopy; //X = [numOTUS][numSampled] + bool transpose = false; //svd requires rows < columns, so if they are not then I need to transpose and look for the results in v. + if (X.size() < X[0].size()) { Xcopy = linear.transpose(X); transpose=true; } + else { Xcopy = X; } + linear.svd(Xcopy, d, v); //Xcopy gets the results we want for v below, because R's version is [numSampled][numOTUS] + + /*cout << "Xcopy = " << endl; + for (int i = 0; i < Xcopy.size(); i++) { + for (int j = 0; j < Xcopy[i].size(); j++) { cout << Xcopy[i][j] << '\t'; } + cout << endl; + } + cout << "v = " << endl; + for (int i = 0; i < v.size(); i++) { + for (int j = 0; j < v[i].size(); j++) { cout << v[i][j] << '\t'; } + cout << endl; + } + */ + + int rank = 0; + set goodColumns; + //cout << "d = " << endl; + for (int i = 0; i < d.size(); i++) { if (d[i] > 0.0000000001) { rank++; goodColumns.insert(i); } } //cout << d[i] << endl; + + if (rank == 0) { + ignore=true; //m->mothurOut("[ERROR]: rank = 0: variables are numerically const\n"); m->control_pressed = true; + return scaling; } + + //scaling <- scaling %*% X.s$v[, 1L:rank] %*% diag(1/X.s$d[1L:rank], , rank) + //X.s$v[, 1L:rank] = columns in Xcopy that correspond to "good" d values + //diag(1/X.s$d[1L:rank], , rank) = matrix size rank * rank where the diagonal is 1/"good" dvalues + /*example: + d + [1] 3.721545e+00 3.034607e+00 2.296649e+00 7.986927e-16 6.922408e-16 + [6] 5.471102e-16 + + $v + [,1] [,2] [,3] [,4] [,5] [,6] + [1,] 0.31122175 0.10944725 0.20183340 -0.30136820 0.60786235 -0.13537095 + [2,] -0.29563726 -0.20568893 0.11233366 -0.05073289 0.48234270 0.21965978 + ... + + [1] "X.s$v[, 1L:rank]" + [,1] [,2] [,3] + [1,] 0.31122175 0.10944725 0.20183340 + [2,] -0.29563726 -0.20568893 0.11233366 + ... + [1] "1/X.s$d[1L:rank]" + [1] 0.2687056 0.3295320 0.4354170 + + [1] "diag(1/X.s$d[1L:rank], , rank)" + [,1] [,2] [,3] + [1,] 0.2687056 0.000000 0.000000 + [2,] 0.0000000 0.329532 0.000000 + [3,] 0.0000000 0.000000 0.435417 + */ + if (transpose) { + Xcopy = linear.transpose(v); + /* + cout << "Xcopy = " << endl; + for (int i = 0; i < Xcopy.size(); i++) { + for (int j = 0; j < Xcopy[i].size(); j++) { cout << Xcopy[i][j] << '\t'; } + cout << endl; + }*/ + } + v.clear(); //store "good" columns - X.s$v[, 1L:rank] + v.resize(Xcopy.size()); //[numOTUS]["good" columns] + for (set::iterator it = goodColumns.begin(); it != goodColumns.end(); it++) { + for (int i = 0; i < Xcopy.size(); i++) { + v[i].push_back(Xcopy[i][*it]); + } + } + + vector< vector > diagRanks; diagRanks.resize(rank); + for (int i = 0; i < rank; i++) { diagRanks[i].resize(rank, 0.0); } + count = 0; + for (set::iterator it = goodColumns.begin(); it != goodColumns.end(); it++) { diagRanks[count][count] = 1.0 / d[*it]; count++; } + + scaling = linear.matrix_mult(linear.matrix_mult(scaling, v), diagRanks); //([numOTUS][numOTUS]*[numOTUS]["good" columns]) = [numOTUS]["good" columns] then ([numOTUS]["good" columns] * ["good" columns]["good" columns] = scaling = [numOTUS]["good" columns] + + /*cout << "scaling = " << endl; + for (int i = 0; i < scaling.size(); i++) { + for (int j = 0; j < scaling[i].size(); j++) { cout << scaling[i][j] << '\t'; } + cout << endl; + }*/ + + //Note: linear.matrix_mult [1][numGroups] * [numGroups][numOTUs] - columns in first must match rows in second, returns matrix[1][numOTUs] + vector< vector > prior; prior.push_back(proportions); + vector< vector > xbar = linear.matrix_mult(prior, means); + vector xBar = xbar[0]; //length numOTUs + + /*cout << "xbar" << endl; + for (int j = 0; j < numOtus; j++) { cout << xBar[j] <<'\t'; }cout << endl;*/ + //fac <- 1/(ng - 1) + fac = 1 / (double) (numGroups-1); + //scale(group.means, center = xbar, scale = FALSE) %*% scaling + vector< vector > scaledMeans = means; //[numGroups][numOTUs] + for (int i = 0; i < numGroups; i++) { + for (int j = 0; j < numOtus; j++) { scaledMeans[i][j] -= xBar[j]; } + } + scaledMeans = linear.matrix_mult(scaledMeans, scaling); //[numGroups][numOTUS]*[numOTUS]["good"columns] = [numGroups]["good"columns] + + + //sqrt((n * prior) * fac) + vector temp = proportions; //[numGroups] + for (int i = 0; i < temp.size(); i++) { temp[i] *= numSampled * fac; temp[i] = sqrt(temp[i]); } + + //X <- sqrt((n * prior) * fac) * (scale(group.means, center = xbar, scale = FALSE) %*% scaling) + //X <- temp * scaledMeans + X.clear(); X = scaledMeans; //[numGroups]["good"columns] + for (int i = 0; i < X.size(); i++) { + for (int j = 0; j < X[i].size(); j++) { X[i][j] *= temp[j]; } + } + /* + cout << "X = " << endl; + for (int i = 0; i < X.size(); i++) { + for (int j = 0; j < X[i].size(); j++) { cout << X[i][j] << '\t'; } + cout << endl; + } + */ + + d.clear(); v.clear(); + //we want to transpose so results are in Xcopy, but if that makes rows > columns then we don't since svd requires rows < cols. + transpose=false; + if (X.size() > X[0].size()) { Xcopy = X; transpose=true; } + else { Xcopy = linear.transpose(X); } + linear.svd(Xcopy, d, v); //Xcopy gets the results we want for v below + /*cout << "Xcopy = " << endl; + for (int i = 0; i < Xcopy.size(); i++) { + for (int j = 0; j < Xcopy[i].size(); j++) { cout << Xcopy[i][j] << '\t'; } + cout << endl; + } + + cout << "v = " << endl; + for (int i = 0; i < v.size(); i++) { + for (int j = 0; j < v[i].size(); j++) { cout << v[i][j] << '\t'; } + cout << endl; + } + + cout << "d = " << endl; + for (int i = 0; i < d.size(); i++) { cout << d[i] << endl; }*/ + + //rank <- sum(X.s$d > tol * X.s$d[1L]) + //X.s$d[1L] = larger value in d vector + double largeD = m->max(d); + rank = 0; goodColumns.clear(); + for (int i = 0; i < d.size(); i++) { if (d[i] > (0.0000000001*largeD)) { rank++; goodColumns.insert(i); } } + + if (rank == 0) { + ignore=true;//m->mothurOut("[ERROR]: rank = 0: class means are numerically identical.\n"); m->control_pressed = true; + return scaling; } + + if (transpose) { Xcopy = linear.transpose(v); } + //scaling <- scaling %*% X.s$v[, 1L:rank] - scaling * "good" columns + v.clear(); //store "good" columns - X.s$v[, 1L:rank] + v.resize(Xcopy.size()); //Xcopy = ["good"columns][numGroups] + for (set::iterator it = goodColumns.begin(); it != goodColumns.end(); it++) { + for (int i = 0; i < Xcopy.size(); i++) { + v[i].push_back(Xcopy[i][*it]); + } + } + + scaling = linear.matrix_mult(scaling, v); //[numOTUS]["good" columns] * ["good"columns][new "good" columns] + + /*cout << "scaling = " << endl; + for (int i = 0; i < scaling.size(); i++) { + for (int j = 0; j < scaling[i].size(); j++) { cout << scaling[i][j] << '\t'; } + cout << endl; + }*/ + ignore=false; + return scaling; + } + catch(exception& e) { + m->errorOut(e, "LinearAlgebra", "lda"); + exit(1); + } +} +/*********************************************************************************************************************************/ +//Singular value decomposition (SVD) - adapted from http://svn.lirec.eu/libs/magicsquares/src/SVD.cpp +/* + * svdcomp - SVD decomposition routine. + * Takes an mxn matrix a and decomposes it into udv, where u,v are + * left and right orthogonal transformation matrices, and d is a + * diagonal matrix of singular values. + * + * This routine is adapted from svdecomp.c in XLISP-STAT 2.1 which is + * code from Numerical Recipes adapted by Luke Tierney and David Betz. + * + * Input to dsvd is as follows: + * a = mxn matrix to be decomposed, gets overwritten with u + * m = row dimension of a + * n = column dimension of a + * w = returns the vector of singular values of a + * v = returns the right orthogonal transformation matrix + */ + +int LinearAlgebra::svd(vector< vector >& a, vector& w, vector< vector >& v) { + try { + int flag, i, its, j, jj, k, l, nm; + double c, f, h, s, x, y, z; + double anorm = 0.0, g = 0.0, scale = 0.0; + + int numRows = a.size(); if (numRows == 0) { return 0; } + int numCols = a[0].size(); + w.resize(numCols, 0.0); + v.resize(numCols); for (int i = 0; i < numCols; i++) { v[i].resize(numRows, 0.0); } + + vector rv1; rv1.resize(numCols, 0.0); + if (numRows < numCols){ m->mothurOut("[ERROR]: numRows < numCols\n"); m->control_pressed = true; return 0; } + + /* Householder reduction to bidiagonal form */ + for (i = 0; i < numCols; i++) + { + /* left-hand reduction */ + l = i + 1; + rv1[i] = scale * g; + g = s = scale = 0.0; + if (i < numRows) + { + for (k = i; k < numRows; k++) + scale += fabs((double)a[k][i]); + if (scale) + { + for (k = i; k < numRows; k++) + { + a[k][i] = (double)((double)a[k][i]/scale); + s += ((double)a[k][i] * (double)a[k][i]); + } + f = (double)a[i][i]; + g = -SIGN(sqrt(s), f); + h = f * g - s; + a[i][i] = (double)(f - g); + if (i != numCols - 1) + { + for (j = l; j < numCols; j++) + { + for (s = 0.0, k = i; k < numRows; k++) + s += ((double)a[k][i] * (double)a[k][j]); + f = s / h; + for (k = i; k < numRows; k++) + a[k][j] += (double)(f * (double)a[k][i]); + } + } + for (k = i; k < numRows; k++) + a[k][i] = (double)((double)a[k][i]*scale); + } + } + w[i] = (double)(scale * g); + + /* right-hand reduction */ + g = s = scale = 0.0; + if (i < numRows && i != numCols - 1) + { + for (k = l; k < numCols; k++) + scale += fabs((double)a[i][k]); + if (scale) + { + for (k = l; k < numCols; k++) + { + a[i][k] = (double)((double)a[i][k]/scale); + s += ((double)a[i][k] * (double)a[i][k]); + } + f = (double)a[i][l]; + g = -SIGN(sqrt(s), f); + h = f * g - s; + a[i][l] = (double)(f - g); + for (k = l; k < numCols; k++) + rv1[k] = (double)a[i][k] / h; + if (i != numRows - 1) + { + for (j = l; j < numRows; j++) + { + for (s = 0.0, k = l; k < numCols; k++) + s += ((double)a[j][k] * (double)a[i][k]); + for (k = l; k < numCols; k++) + a[j][k] += (double)(s * rv1[k]); + } + } + for (k = l; k < numCols; k++) + a[i][k] = (double)((double)a[i][k]*scale); + } + } + anorm = max(anorm, (fabs((double)w[i]) + fabs(rv1[i]))); + } + + /* accumulate the right-hand transformation */ + for (i = numCols - 1; i >= 0; i--) + { + if (i < numCols - 1) + { + if (g) + { + for (j = l; j < numCols; j++) + v[j][i] = (double)(((double)a[i][j] / (double)a[i][l]) / g); + /* double division to avoid underflow */ + for (j = l; j < numCols; j++) + { + for (s = 0.0, k = l; k < numCols; k++) + s += ((double)a[i][k] * (double)v[k][j]); + for (k = l; k < numCols; k++) + v[k][j] += (double)(s * (double)v[k][i]); + } + } + for (j = l; j < numCols; j++) + v[i][j] = v[j][i] = 0.0; + } + v[i][i] = 1.0; + g = rv1[i]; + l = i; + } + + /* accumulate the left-hand transformation */ + for (i = numCols - 1; i >= 0; i--) + { + l = i + 1; + g = (double)w[i]; + if (i < numCols - 1) + for (j = l; j < numCols; j++) + a[i][j] = 0.0; + if (g) + { + g = 1.0 / g; + if (i != numCols - 1) + { + for (j = l; j < numCols; j++) + { + for (s = 0.0, k = l; k < numRows; k++) + s += ((double)a[k][i] * (double)a[k][j]); + f = (s / (double)a[i][i]) * g; + for (k = i; k < numRows; k++) + a[k][j] += (double)(f * (double)a[k][i]); + } + } + for (j = i; j < numRows; j++) + a[j][i] = (double)((double)a[j][i]*g); + } + else + { + for (j = i; j < numRows; j++) + a[j][i] = 0.0; + } + ++a[i][i]; + } + + /* diagonalize the bidiagonal form */ + for (k = numCols - 1; k >= 0; k--) + { /* loop over singular values */ + for (its = 0; its < 30; its++) + { /* loop over allowed iterations */ + flag = 1; + for (l = k; l >= 0; l--) + { /* test for splitting */ + nm = l - 1; + if (fabs(rv1[l]) + anorm == anorm) + { + flag = 0; + break; + } + if (fabs((double)w[nm]) + anorm == anorm) + break; + } + if (flag) + { + c = 0.0; + s = 1.0; + for (i = l; i <= k; i++) + { + f = s * rv1[i]; + if (fabs(f) + anorm != anorm) + { + g = (double)w[i]; + h = pythag(f, g); + w[i] = (double)h; + h = 1.0 / h; + c = g * h; + s = (- f * h); + for (j = 0; j < numRows; j++) + { + y = (double)a[j][nm]; + z = (double)a[j][i]; + a[j][nm] = (double)(y * c + z * s); + a[j][i] = (double)(z * c - y * s); + } + } + } + } + z = (double)w[k]; + if (l == k) + { /* convergence */ + if (z < 0.0) + { /* make singular value nonnegative */ + w[k] = (double)(-z); + for (j = 0; j < numCols; j++) + v[j][k] = (-v[j][k]); + } + break; + } + if (its >= 30) { + m->mothurOut("No convergence after 30,000! iterations \n"); m->control_pressed = true; + return(0); + } + + /* shift from bottom 2 x 2 minor */ + x = (double)w[l]; + nm = k - 1; + y = (double)w[nm]; + g = rv1[nm]; + h = rv1[k]; + f = ((y - z) * (y + z) + (g - h) * (g + h)) / (2.0 * h * y); + g = pythag(f, 1.0); + f = ((x - z) * (x + z) + h * ((y / (f + SIGN(g, f))) - h)) / x; + + /* next QR transformation */ + c = s = 1.0; + for (j = l; j <= nm; j++) + { + i = j + 1; + g = rv1[i]; + y = (double)w[i]; + h = s * g; + g = c * g; + z = pythag(f, h); + rv1[j] = z; + c = f / z; + s = h / z; + f = x * c + g * s; + g = g * c - x * s; + h = y * s; + y = y * c; + for (jj = 0; jj < numCols; jj++) + { + x = (double)v[jj][j]; + z = (double)v[jj][i]; + v[jj][j] = (float)(x * c + z * s); + v[jj][i] = (float)(z * c - x * s); + } + z = pythag(f, h); + w[j] = (float)z; + if (z) + { + z = 1.0 / z; + c = f * z; + s = h * z; + } + f = (c * g) + (s * y); + x = (c * y) - (s * g); + for (jj = 0; jj < numRows; jj++) + { + y = (double)a[jj][j]; + z = (double)a[jj][i]; + a[jj][j] = (double)(y * c + z * s); + a[jj][i] = (double)(z * c - y * s); + } + } + rv1[l] = 0.0; + rv1[k] = f; + w[k] = (double)x; + } + } + + return(0); + + } + catch(exception& e) { + m->errorOut(e, "LinearAlgebra", "svd"); + exit(1); + } +} + +