5 * Created by westcott on 1/7/11.
6 * Copyright 2011 Schloss Lab. All rights reserved.
10 #include "linearalgebra.h"
13 #define PI 3.1415926535897932384626433832795
15 // This class references functions used from "Numerical Recipes in C++" //
17 /*********************************************************************************************************************************/
18 inline double SQR(const double a)
22 /*********************************************************************************************************************************/
24 inline double SIGN(const double a, const double b)
26 return b>=0 ? (a>=0 ? a:-a) : (a>=0 ? -a:a);
28 /*********************************************************************************************************************************/
29 //NUmerical recipes pg. 245 - Returns the complementary error function erfc(x) with fractional error everywhere less than 1.2 × 10−7.
30 double LinearAlgebra::erfcc(double x){
36 ans=t*exp(-z*z-1.26551223+t*(1.00002368+t*(0.37409196+t*(0.09678418+
37 t*(-0.18628806+t*(0.27886807+t*(-1.13520398+t*(1.48851587+
38 t*(-0.82215223+t*0.17087277)))))))));
40 //cout << "in erfcc " << t << '\t' << ans<< endl;
41 return (x >= 0.0 ? ans : 2.0 - ans);
44 m->errorOut(e, "LinearAlgebra", "betai");
48 /*********************************************************************************************************************************/
49 //Numerical Recipes pg. 232
50 double LinearAlgebra::betai(const double a, const double b, const double x) {
55 if (x < 0.0 || x > 1.0) { m->mothurOut("[ERROR]: bad x in betai.\n"); m->control_pressed = true; return 0.0; }
57 if (x == 0.0 || x == 1.0) { bt = 0.0; }
58 else { bt = exp(gammln(a+b)-gammln(a)-gammln(b)+a*log(x)+b*log(1.0-x)); }
60 if (x < (a+1.0) / (a+b+2.0)) { result = bt*betacf(a,b,x)/a; }
61 else { result = 1.0-bt*betacf(b,a,1.0-x)/b; }
66 m->errorOut(e, "LinearAlgebra", "betai");
70 /*********************************************************************************************************************************/
71 //Numerical Recipes pg. 219
72 double LinearAlgebra::gammln(const double xx) {
76 static const double cof[6]={76.18009172947146,-86.50532032941677,24.01409824083091,
77 -1.231739572450155,0.120858003e-2,-0.536382e-5};
81 tmp -= (x+0.5)*log(tmp);
86 return -tmp+log(2.5066282746310005*ser/x);
89 m->errorOut(e, "LinearAlgebra", "gammln");
93 /*********************************************************************************************************************************/
94 //Numerical Recipes pg. 223
95 double LinearAlgebra::gammp(const double a, const double x) {
97 double gamser,gammcf,gln;
99 if (x < 0.0 || a <= 0.0) { m->mothurOut("[ERROR]: Invalid arguments in routine GAMMP\n"); m->control_pressed = true; return 0.0;}
101 gser(gamser,a,x,gln);
110 catch(exception& e) {
111 m->errorOut(e, "LinearAlgebra", "gammp");
115 /*********************************************************************************************************************************/
116 //Numerical Recipes pg. 223
117 /*double LinearAlgebra::gammq(const double a, const double x) {
119 double gamser,gammcf,gln;
121 if (x < 0.0 || a <= 0.0) { m->mothurOut("[ERROR]: Invalid arguments in routine GAMMQ\n"); m->control_pressed = true; return 0.0; }
123 gser(gamser,a,x,gln);
132 catch(exception& e) {
133 m->errorOut(e, "LinearAlgebra", "gammq");
137 *********************************************************************************************************************************/
138 //Numerical Recipes pg. 224
139 double LinearAlgebra::gcf(double& gammcf, const double a, const double x, double& gln){
142 const double EPS=numeric_limits<double>::epsilon();
143 const double FPMIN=numeric_limits<double>::min()/EPS;
145 double an,b,c,d,del,h;
152 for (i=1;i<=ITMAX;i++) {
156 if (fabs(d) < FPMIN) { d=FPMIN; }
158 if (fabs(c) < FPMIN) { c=FPMIN; }
162 if (fabs(del-1.0) <= EPS) break;
164 if (i > ITMAX) { m->mothurOut("[ERROR]: " + toString(a) + " too large, ITMAX=100 too small in gcf\n"); m->control_pressed = true; }
165 gammcf=exp(-x+a*log(x)-gln)*h;
169 catch(exception& e) {
170 m->errorOut(e, "LinearAlgebra", "gcf");
175 /*********************************************************************************************************************************/
176 //Numerical Recipes pg. 223
177 double LinearAlgebra::gser(double& gamser, const double a, const double x, double& gln) {
181 const double EPS = numeric_limits<double>::epsilon();
185 if (x < 0.0) { m->mothurOut("[ERROR]: x less than 0 in routine GSER\n"); m->control_pressed = true; }
186 gamser=0.0; return 0.0;
190 for (n=0;n<100;n++) {
194 if (fabs(del) < fabs(sum)*EPS) {
195 gamser=sum*exp(-x+a*log(x)-gln);
200 m->mothurOut("[ERROR]: a too large, ITMAX too small in routine GSER\n");
205 catch(exception& e) {
206 m->errorOut(e, "LinearAlgebra", "gser");
210 /*********************************************************************************************************************************/
211 //Numerical Recipes pg. 233
212 double LinearAlgebra::betacf(const double a, const double b, const double x) {
214 const int MAXIT = 100;
215 const double EPS = numeric_limits<double>::epsilon();
216 const double FPMIN = numeric_limits<double>::min() / EPS;
218 double aa, c, d, del, h, qab, qam, qap;
225 if (fabs(d) < FPMIN) d=FPMIN;
228 for (m1=1;m1<=MAXIT;m1++) {
230 aa=m1*(b-m1)*x/((qam+m2)*(a+m2));
232 if (fabs(d) < FPMIN) d=FPMIN;
234 if (fabs(c) < FPMIN) c=FPMIN;
237 aa = -(a+m1)*(qab+m1)*x/((a+m2)*(qap+m2));
239 if (fabs(d) < FPMIN) d=FPMIN;
241 if (fabs(c) < FPMIN) c=FPMIN;
245 if (fabs(del-1.0) < EPS) break;
248 if (m1 > MAXIT) { m->mothurOut("[ERROR]: a or b too big or MAXIT too small in betacf."); m->mothurOutEndLine(); m->control_pressed = true; }
252 catch(exception& e) {
253 m->errorOut(e, "LinearAlgebra", "betacf");
257 /*********************************************************************************************************************************/
258 //[3][4] * [4][5] - columns in first must match rows in second, returns matrix[3][5]
259 vector<vector<double> > LinearAlgebra::matrix_mult(vector<vector<double> > first, vector<vector<double> > second){
261 vector<vector<double> > product;
263 int first_rows = first.size();
264 int first_cols = first[0].size();
265 int second_cols = second[0].size();
267 product.resize(first_rows);
268 for(int i=0;i<first_rows;i++){
269 product[i].resize(second_cols);
272 for(int i=0;i<first_rows;i++){
273 for(int j=0;j<second_cols;j++){
275 if (m->control_pressed) { return product; }
278 for(int k=0;k<first_cols;k++){
279 product[i][j] += first[i][k] * second[k][j];
286 catch(exception& e) {
287 m->errorOut(e, "LinearAlgebra", "matrix_mult");
292 /*********************************************************************************************************************************/
294 vector<vector<double> > LinearAlgebra::transpose(vector<vector<double> >matrix){
296 vector<vector<double> > trans; trans.resize(matrix[0].size());
297 for (int i = 0; i < trans.size(); i++) {
298 for (int j = 0; j < matrix.size(); j++) { trans[i].push_back(matrix[j][i]); }
303 catch(exception& e) {
304 m->errorOut(e, "LinearAlgebra", "transpose");
309 /*********************************************************************************************************************************/
311 void LinearAlgebra::recenter(double offset, vector<vector<double> > D, vector<vector<double> >& G){
315 vector<vector<double> > A(rank);
316 vector<vector<double> > C(rank);
317 for(int i=0;i<rank;i++){
322 double scale = -1.0000 / (double) rank;
324 for(int i=0;i<rank;i++){
326 C[i][i] = 1.0000 + scale;
327 for(int j=i+1;j<rank;j++){
328 A[i][j] = A[j][i] = -0.5 * D[i][j] * D[i][j] + offset;
329 C[i][j] = C[j][i] = scale;
333 A = matrix_mult(C,A);
334 G = matrix_mult(A,C);
336 catch(exception& e) {
337 m->errorOut(e, "LinearAlgebra", "recenter");
342 /*********************************************************************************************************************************/
344 // This function is taken from Numerical Recipes in C++ by Press et al., 2nd edition, pg. 479
346 int LinearAlgebra::tred2(vector<vector<double> >& a, vector<double>& d, vector<double>& e){
348 double scale, hh, h, g, f;
355 for(int i=n-1;i>0;i--){
359 for(int k=0;k<l+1;k++){
360 scale += fabs(a[i][k]);
366 for(int k=0;k<l+1;k++){
368 h += a[i][k] * a[i][k];
371 g = (f >= 0.0 ? -sqrt(h) : sqrt(h));
376 for(int j=0;j<l+1;j++){
377 a[j][i] = a[i][j] / h;
379 for(int k=0;k<j+1;k++){
380 g += a[j][k] * a[i][k];
382 for(int k=j+1;k<l+1;k++){
383 g += a[k][j] * a[i][k];
389 for(int j=0;j<l+1;j++){
391 e[j] = g = e[j] - hh * f;
392 for(int k=0;k<j+1;k++){
393 a[j][k] -= (f * e[k] + g * a[i][k]);
408 for(int i=0;i<n;i++){
411 for(int j=0;j<l;j++){
413 for(int k=0;k<l;k++){
414 g += a[i][k] * a[k][j];
416 for(int k=0;k<l;k++){
417 a[k][j] -= g * a[k][i];
423 for(int j=0;j<l;j++){
424 a[j][i] = a[i][j] = 0.0;
430 catch(exception& e) {
431 m->errorOut(e, "LinearAlgebra", "tred2");
436 /*********************************************************************************************************************************/
438 double LinearAlgebra::pythag(double a, double b) { return(pow(a*a+b*b,0.5)); }
440 /*********************************************************************************************************************************/
442 // This function is taken from Numerical Recipes in C++ by Press et al., 2nd edition, pg. 479
444 int LinearAlgebra::qtli(vector<double>& d, vector<double>& e, vector<vector<double> >& z) {
447 double s, r, p, g, f, dd, c, b;
450 for(int i=1;i<=n;i++){
455 for(int l=0;l<n;l++){
458 for(myM=l;myM<n-1;myM++){
459 dd = fabs(d[myM]) + fabs(d[myM+1]);
460 if(fabs(e[myM])+dd == dd) break;
463 if(iter++ == 3000) cerr << "Too many iterations in tqli\n";
464 g = (d[l+1]-d[l]) / (2.0 * e[l]);
466 g = d[myM] - d[l] + e[l] / (g + SIGN(r,g));
469 for(i=myM-1;i>=l;i--){
472 e[i+1] = (r=pythag(f,g));
481 r = (d[i] - g) * s + 2.0 * c * b;
482 d[i+1] = g + ( p = s * r);
484 for(int k=0;k<n;k++){
486 z[k][i+1] = s * z[k][i] + c * f;
487 z[k][i] = c * z[k][i] - s * f;
490 if(r == 0.00 && i >= l) continue;
499 for(int i=0;i<n;i++){
501 for(int j=i;j<n;j++){
509 for(int j=0;j<n;j++){
519 catch(exception& e) {
520 m->errorOut(e, "LinearAlgebra", "qtli");
524 /*********************************************************************************************************************************/
525 //groups by dimension
526 vector< vector<double> > LinearAlgebra::calculateEuclidianDistance(vector< vector<double> >& axes, int dimensions){
529 vector< vector<double> > dists; dists.resize(axes.size());
530 for (int i = 0; i < dists.size(); i++) { dists[i].resize(axes.size(), 0.0); }
532 if (dimensions == 1) { //one dimension calc = abs(x-y)
534 for (int i = 0; i < dists.size(); i++) {
536 if (m->control_pressed) { return dists; }
538 for (int j = 0; j < i; j++) {
539 dists[i][j] = abs(axes[i][0] - axes[j][0]);
540 dists[j][i] = dists[i][j];
544 }else if (dimensions > 1) { //two dimension calc = sqrt ((x1 - y1)^2 + (x2 - y2)^2)...
546 for (int i = 0; i < dists.size(); i++) {
548 if (m->control_pressed) { return dists; }
550 for (int j = 0; j < i; j++) {
552 for (int k = 0; k < dimensions; k++) {
553 sum += ((axes[i][k] - axes[j][k]) * (axes[i][k] - axes[j][k]));
556 dists[i][j] = sqrt(sum);
557 dists[j][i] = dists[i][j];
565 catch(exception& e) {
566 m->errorOut(e, "LinearAlgebra", "calculateEuclidianDistance");
570 /*********************************************************************************************************************************/
571 //returns groups by dimensions from dimensions by groups
572 vector< vector<double> > LinearAlgebra::calculateEuclidianDistance(vector< vector<double> >& axes){
575 vector< vector<double> > dists; dists.resize(axes[0].size());
576 for (int i = 0; i < dists.size(); i++) { dists[i].resize(axes[0].size(), 0.0); }
578 if (axes.size() == 1) { //one dimension calc = abs(x-y)
580 for (int i = 0; i < dists.size(); i++) {
582 if (m->control_pressed) { return dists; }
584 for (int j = 0; j < i; j++) {
585 dists[i][j] = abs(axes[0][i] - axes[0][j]);
586 dists[j][i] = dists[i][j];
590 }else if (axes.size() > 1) { //two dimension calc = sqrt ((x1 - y1)^2 + (x2 - y2)^2)...
592 for (int i = 0; i < dists[0].size(); i++) {
594 if (m->control_pressed) { return dists; }
596 for (int j = 0; j < i; j++) {
598 for (int k = 0; k < axes.size(); k++) {
599 sum += ((axes[k][i] - axes[k][j]) * (axes[k][i] - axes[k][j]));
602 dists[i][j] = sqrt(sum);
603 dists[j][i] = dists[i][j];
611 catch(exception& e) {
612 m->errorOut(e, "LinearAlgebra", "calculateEuclidianDistance");
616 /*********************************************************************************************************************************/
617 //assumes both matrices are square and the same size
618 double LinearAlgebra::calcPearson(vector< vector<double> >& euclidDists, vector< vector<double> >& userDists){
621 //find average for - X
623 float averageEuclid = 0.0;
624 for (int i = 0; i < euclidDists.size(); i++) {
625 for (int j = 0; j < i; j++) {
626 averageEuclid += euclidDists[i][j];
630 averageEuclid = averageEuclid / (float) count;
632 //find average for - Y
634 float averageUser = 0.0;
635 for (int i = 0; i < userDists.size(); i++) {
636 for (int j = 0; j < i; j++) {
637 averageUser += userDists[i][j];
641 averageUser = averageUser / (float) count;
643 double numerator = 0.0;
644 double denomTerm1 = 0.0;
645 double denomTerm2 = 0.0;
647 for (int i = 0; i < euclidDists.size(); i++) {
649 for (int k = 0; k < i; k++) { //just lt dists
651 float Yi = userDists[i][k];
652 float Xi = euclidDists[i][k];
654 numerator += ((Xi - averageEuclid) * (Yi - averageUser));
655 denomTerm1 += ((Xi - averageEuclid) * (Xi - averageEuclid));
656 denomTerm2 += ((Yi - averageUser) * (Yi - averageUser));
660 double denom = (sqrt(denomTerm1) * sqrt(denomTerm2));
661 double r = numerator / denom;
663 //divide by zero error
664 if (isnan(r) || isinf(r)) { r = 0.0; }
669 catch(exception& e) {
670 m->errorOut(e, "LinearAlgebra", "calcPearson");
674 /*********************************************************************************************************************************/
675 //assumes both matrices are square and the same size
676 double LinearAlgebra::calcSpearman(vector< vector<double> >& euclidDists, vector< vector<double> >& userDists){
681 map<float, int> tableX;
682 map<float, int>::iterator itTable;
683 vector<spearmanRank> scores;
685 for (int i = 0; i < euclidDists.size(); i++) {
686 for (int j = 0; j < i; j++) {
687 spearmanRank member(toString(scores.size()), euclidDists[i][j]);
688 scores.push_back(member);
690 //count number of repeats
691 itTable = tableX.find(euclidDists[i][j]);
692 if (itTable == tableX.end()) {
693 tableX[euclidDists[i][j]] = 1;
695 tableX[euclidDists[i][j]]++;
701 sort(scores.begin(), scores.end(), compareSpearman);
705 for (itTable = tableX.begin(); itTable != tableX.end(); itTable++) {
706 double tx = (double) itTable->second;
707 Lx += ((pow(tx, 3.0) - tx) / 12.0);
711 map<string, float> rankEuclid;
712 vector<spearmanRank> ties;
714 for (int j = 0; j < scores.size(); j++) {
716 ties.push_back(scores[j]);
718 if (j != (scores.size()-1)) { // you are not the last so you can look ahead
719 if (scores[j].score != scores[j+1].score) { // you are done with ties, rank them and continue
721 for (int k = 0; k < ties.size(); k++) {
722 float thisrank = rankTotal / (float) ties.size();
723 rankEuclid[ties[k].name] = thisrank;
728 }else { // you are the last one
730 for (int k = 0; k < ties.size(); k++) {
731 float thisrank = rankTotal / (float) ties.size();
732 rankEuclid[ties[k].name] = thisrank;
739 map<float, int> tableY;
742 for (int i = 0; i < userDists.size(); i++) {
743 for (int j = 0; j < i; j++) {
744 spearmanRank member(toString(scores.size()), userDists[i][j]);
745 scores.push_back(member);
747 //count number of repeats
748 itTable = tableY.find(userDists[i][j]);
749 if (itTable == tableY.end()) {
750 tableY[userDists[i][j]] = 1;
752 tableY[userDists[i][j]]++;
758 sort(scores.begin(), scores.end(), compareSpearman);
762 for (itTable = tableY.begin(); itTable != tableY.end(); itTable++) {
763 double ty = (double) itTable->second;
764 Ly += ((pow(ty, 3.0) - ty) / 12.0);
768 map<string, float> rankUser;
771 for (int j = 0; j < scores.size(); j++) {
773 ties.push_back(scores[j]);
775 if (j != (scores.size()-1)) { // you are not the last so you can look ahead
776 if (scores[j].score != scores[j+1].score) { // you are done with ties, rank them and continue
778 for (int k = 0; k < ties.size(); k++) {
779 float thisrank = rankTotal / (float) ties.size();
780 rankUser[ties[k].name] = thisrank;
785 }else { // you are the last one
787 for (int k = 0; k < ties.size(); k++) {
788 float thisrank = rankTotal / (float) ties.size();
789 rankUser[ties[k].name] = thisrank;
797 for (int i = 0; i < userDists.size(); i++) {
798 for (int j = 0; j < i; j++) {
800 float xi = rankEuclid[toString(count)];
801 float yi = rankUser[toString(count)];
803 di += ((xi - yi) * (xi - yi));
809 double n = (double) count;
811 double SX2 = ((pow(n, 3.0) - n) / 12.0) - Lx;
812 double SY2 = ((pow(n, 3.0) - n) / 12.0) - Ly;
814 r = (SX2 + SY2 - di) / (2.0 * sqrt((SX2*SY2)));
816 //divide by zero error
817 if (isnan(r) || isinf(r)) { r = 0.0; }
822 catch(exception& e) {
823 m->errorOut(e, "LinearAlgebra", "calcSpearman");
828 /*********************************************************************************************************************************/
829 //assumes both matrices are square and the same size
830 double LinearAlgebra::calcKendall(vector< vector<double> >& euclidDists, vector< vector<double> >& userDists){
835 vector<spearmanRank> scores;
836 for (int i = 0; i < euclidDists.size(); i++) {
837 for (int j = 0; j < i; j++) {
838 spearmanRank member(toString(scores.size()), euclidDists[i][j]);
839 scores.push_back(member);
844 sort(scores.begin(), scores.end(), compareSpearman);
847 map<string, float> rankEuclid;
848 vector<spearmanRank> ties;
850 for (int j = 0; j < scores.size(); j++) {
852 ties.push_back(scores[j]);
854 if (j != (scores.size()-1)) { // you are not the last so you can look ahead
855 if (scores[j].score != scores[j+1].score) { // you are done with ties, rank them and continue
857 for (int k = 0; k < ties.size(); k++) {
858 float thisrank = rankTotal / (float) ties.size();
859 rankEuclid[ties[k].name] = thisrank;
864 }else { // you are the last one
866 for (int k = 0; k < ties.size(); k++) {
867 float thisrank = rankTotal / (float) ties.size();
868 rankEuclid[ties[k].name] = thisrank;
873 vector<spearmanRank> scoresUser;
874 for (int i = 0; i < userDists.size(); i++) {
875 for (int j = 0; j < i; j++) {
876 spearmanRank member(toString(scoresUser.size()), userDists[i][j]);
877 scoresUser.push_back(member);
882 sort(scoresUser.begin(), scoresUser.end(), compareSpearman);
885 map<string, float> rankUser;
888 for (int j = 0; j < scoresUser.size(); j++) {
890 ties.push_back(scoresUser[j]);
892 if (j != (scoresUser.size()-1)) { // you are not the last so you can look ahead
893 if (scoresUser[j].score != scoresUser[j+1].score) { // you are done with ties, rank them and continue
895 for (int k = 0; k < ties.size(); k++) {
896 float thisrank = rankTotal / (float) ties.size();
897 rankUser[ties[k].name] = thisrank;
902 }else { // you are the last one
904 for (int k = 0; k < ties.size(); k++) {
905 float thisrank = rankTotal / (float) ties.size();
906 rankUser[ties[k].name] = thisrank;
915 vector<spearmanRank> user;
916 for (int l = 0; l < scores.size(); l++) {
917 spearmanRank member(scores[l].name, rankUser[scores[l].name]);
918 user.push_back(member);
922 for (int l = 0; l < scores.size(); l++) {
924 int numWithHigherRank = 0;
925 int numWithLowerRank = 0;
926 float thisrank = user[l].score;
928 for (int u = l+1; u < scores.size(); u++) {
929 if (user[u].score > thisrank) { numWithHigherRank++; }
930 else if (user[u].score < thisrank) { numWithLowerRank++; }
934 numCoor += numWithHigherRank;
935 numDisCoor += numWithLowerRank;
938 r = (numCoor - numDisCoor) / (float) count;
940 //divide by zero error
941 if (isnan(r) || isinf(r)) { r = 0.0; }
946 catch(exception& e) {
947 m->errorOut(e, "LinearAlgebra", "calcKendall");
951 /*********************************************************************************************************************************/
952 double LinearAlgebra::calcKendall(vector<double>& x, vector<double>& y, double& sig){
954 if (x.size() != y.size()) { m->mothurOut("[ERROR]: vector size mismatch."); m->mothurOutEndLine(); return 0.0; }
957 vector<spearmanRank> xscores;
958 for (int i = 0; i < x.size(); i++) {
959 spearmanRank member(toString(i), x[i]);
960 xscores.push_back(member);
964 sort(xscores.begin(), xscores.end(), compareSpearman);
966 //convert scores to ranks of x
967 vector<spearmanRank*> ties;
969 for (int j = 0; j < xscores.size(); j++) {
971 ties.push_back(&(xscores[j]));
973 if (j != xscores.size()-1) { // you are not the last so you can look ahead
974 if (xscores[j].score != xscores[j+1].score) { // you are done with ties, rank them and continue
975 for (int k = 0; k < ties.size(); k++) {
976 float thisrank = rankTotal / (float) ties.size();
977 (*ties[k]).score = thisrank;
982 }else { // you are the last one
983 for (int k = 0; k < ties.size(); k++) {
984 float thisrank = rankTotal / (float) ties.size();
985 (*ties[k]).score = thisrank;
992 vector<spearmanRank> yscores;
993 for (int j = 0; j < y.size(); j++) {
994 spearmanRank member(toString(j), y[j]);
995 yscores.push_back(member);
999 sort(yscores.begin(), yscores.end(), compareSpearman);
1002 map<string, float> rank;
1003 vector<spearmanRank> yties;
1005 for (int j = 0; j < yscores.size(); j++) {
1007 yties.push_back(yscores[j]);
1009 if (j != yscores.size()-1) { // you are not the last so you can look ahead
1010 if (yscores[j].score != yscores[j+1].score) { // you are done with ties, rank them and continue
1011 for (int k = 0; k < yties.size(); k++) {
1012 float thisrank = rankTotal / (float) yties.size();
1013 rank[yties[k].name] = thisrank;
1018 }else { // you are the last one
1019 for (int k = 0; k < yties.size(); k++) {
1020 float thisrank = rankTotal / (float) yties.size();
1021 rank[yties[k].name] = thisrank;
1031 vector<spearmanRank> otus;
1032 for (int l = 0; l < xscores.size(); l++) {
1033 spearmanRank member(xscores[l].name, rank[xscores[l].name]);
1034 otus.push_back(member);
1038 for (int l = 0; l < xscores.size(); l++) {
1040 int numWithHigherRank = 0;
1041 int numWithLowerRank = 0;
1042 float thisrank = otus[l].score;
1044 for (int u = l+1; u < xscores.size(); u++) {
1045 if (otus[u].score > thisrank) { numWithHigherRank++; }
1046 else if (otus[u].score < thisrank) { numWithLowerRank++; }
1050 numCoor += numWithHigherRank;
1051 numDisCoor += numWithLowerRank;
1054 double p = (numCoor - numDisCoor) / (float) count;
1056 sig = calcKendallSig(x.size(), p);
1060 catch(exception& e) {
1061 m->errorOut(e, "LinearAlgebra", "calcKendall");
1065 double LinearAlgebra::ran0(int& idum)
1067 const int IA=16807,IM=2147483647,IQ=127773;
1068 const int IR=2836,MASK=123459876;
1069 const double AM=1.0/double(IM);
1075 idum=IA*(idum-k*IQ)-IR*k;
1076 if (idum < 0) idum += IM;
1082 double LinearAlgebra::ran1(int &idum)
1084 const int IA=16807,IM=2147483647,IQ=127773,IR=2836,NTAB=32;
1085 const int NDIV=(1+(IM-1)/NTAB);
1086 const double EPS=3.0e-16,AM=1.0/IM,RNMX=(1.0-EPS);
1088 static vector<int> iv(NTAB);
1092 if (idum <= 0 || !iy) {
1093 if (-idum < 1) idum=1;
1095 for (j=NTAB+7;j>=0;j--) {
1097 idum=IA*(idum-k*IQ)-IR*k;
1098 if (idum < 0) idum += IM;
1099 if (j < NTAB) iv[j] = idum;
1104 idum=IA*(idum-k*IQ)-IR*k;
1105 if (idum < 0) idum += IM;
1109 if ((temp=AM*iy) > RNMX) return RNMX;
1113 double LinearAlgebra::ran2(int &idum)
1115 const int IM1=2147483563,IM2=2147483399;
1116 const int IA1=40014,IA2=40692,IQ1=53668,IQ2=52774;
1117 const int IR1=12211,IR2=3791,NTAB=32,IMM1=IM1-1;
1118 const int NDIV=1+IMM1/NTAB;
1119 const double EPS=3.0e-16,RNMX=1.0-EPS,AM=1.0/double(IM1);
1120 static int idum2=123456789,iy=0;
1121 static vector<int> iv(NTAB);
1126 idum=(idum==0 ? 1 : -idum);
1128 for (j=NTAB+7;j>=0;j--) {
1130 idum=IA1*(idum-k*IQ1)-k*IR1;
1131 if (idum < 0) idum += IM1;
1132 if (j < NTAB) iv[j] = idum;
1137 idum=IA1*(idum-k*IQ1)-k*IR1;
1138 if (idum < 0) idum += IM1;
1140 idum2=IA2*(idum2-k*IQ2)-k*IR2;
1141 if (idum2 < 0) idum2 += IM2;
1145 if (iy < 1) iy += IMM1;
1146 if ((temp=AM*iy) > RNMX) return RNMX;
1150 double LinearAlgebra::ran3(int &idum)
1152 static int inext,inextp;
1154 const int MBIG=1000000000,MSEED=161803398,MZ=0;
1155 const double FAC=(1.0/MBIG);
1156 static vector<int> ma(56);
1159 if (idum < 0 || iff == 0) {
1161 mj=labs(MSEED-labs(idum));
1165 for (i=1;i<=54;i++) {
1169 if (mk < int(MZ)) mk += MBIG;
1173 for (i=1;i<=55;i++) {
1174 ma[i] -= ma[1+(i+30) % 55];
1175 if (ma[i] < int(MZ)) ma[i] += MBIG;
1181 if (++inext == 56) inext=1;
1182 if (++inextp == 56) inextp=1;
1183 mj=ma[inext]-ma[inextp];
1184 if (mj < int(MZ)) mj += MBIG;
1189 double LinearAlgebra::ran4(int &idum)
1191 #if defined(vax) || defined(_vax_) || defined(__vax__) || defined(VAX)
1192 static const unsigned long jflone = 0x00004080;
1193 static const unsigned long jflmsk = 0xffff007f;
1195 static const unsigned long jflone = 0x3f800000;
1196 static const unsigned long jflmsk = 0x007fffff;
1198 unsigned long irword,itemp,lword;
1199 static int idums = 0;
1207 psdes(lword,irword);
1208 itemp=jflone | (jflmsk & irword);
1210 return (*(float *)&itemp)-1.0;
1213 void LinearAlgebra::psdes(unsigned long &lword, unsigned long &irword)
1216 static const unsigned long c1[NITER]={
1217 0xbaa96887L, 0x1e17d32cL, 0x03bcdc3cL, 0x0f33d1b2L};
1218 static const unsigned long c2[NITER]={
1219 0x4b0f3b58L, 0xe874f0c3L, 0x6955c5a6L, 0x55a7ca46L};
1220 unsigned long i,ia,ib,iswap,itmph=0,itmpl=0;
1222 for (i=0;i<NITER;i++) {
1223 ia=(iswap=irword) ^ c1[i];
1224 itmpl = ia & 0xffff;
1226 ib=itmpl*itmpl+ ~(itmph*itmph);
1227 irword=lword ^ (((ia = (ib >> 16) |
1228 ((ib & 0xffff) << 16)) ^ c2[i])+itmpl*itmph);
1232 /*********************************************************************************************************************************/
1233 double LinearAlgebra::calcKendallSig(double n, double r){
1237 double svar=(4.0*n+10.0)/(9.0*n*(n-1.0));
1238 double z= r/sqrt(svar);
1239 sig=erfcc(fabs(z)/1.4142136);
1241 if (isnan(sig) || isinf(sig)) { sig = 0.0; }
1245 catch(exception& e) {
1246 m->errorOut(e, "LinearAlgebra", "calcKendallSig");
1250 /*********************************************************************************************************************************/
1251 double LinearAlgebra::calcKruskalWallis(vector<spearmanRank>& values, double& pValue){
1254 set<string> treatments;
1257 sort(values.begin(), values.end(), compareSpearman);
1258 vector<spearmanRank*> ties;
1261 for (int j = 0; j < values.size(); j++) {
1262 treatments.insert(values[j].name);
1264 ties.push_back(&(values[j]));
1266 if (j != values.size()-1) { // you are not the last so you can look ahead
1267 if (values[j].score != values[j+1].score) { // you are done with ties, rank them and continue
1268 if (ties.size() > 1) { TIES.push_back(ties.size()); }
1269 for (int k = 0; k < ties.size(); k++) {
1270 double thisrank = rankTotal / (double) ties.size();
1271 (*ties[k]).score = thisrank;
1276 }else { // you are the last one
1277 if (ties.size() > 1) { TIES.push_back(ties.size()); }
1278 for (int k = 0; k < ties.size(); k++) {
1279 double thisrank = rankTotal / (double) ties.size();
1280 (*ties[k]).score = thisrank;
1286 // H = 12/(N*(N+1)) * (sum Ti^2/n) - 3(N+1)
1287 map<string, double> sums;
1288 map<string, double> counts;
1289 for (set<string>::iterator it = treatments.begin(); it != treatments.end(); it++) { sums[*it] = 0.0; counts[*it] = 0; }
1291 for (int j = 0; j < values.size(); j++) {
1292 sums[values[j].name] += values[j].score;
1293 counts[values[j].name]+= 1.0;
1296 double middleTerm = 0.0;
1297 for (set<string>::iterator it = treatments.begin(); it != treatments.end(); it++) {
1298 middleTerm += ((sums[*it]*sums[*it])/counts[*it]);
1301 double firstTerm = 12 / (double) (values.size()*(values.size()+1));
1302 double lastTerm = 3 * (values.size()+1);
1304 H = firstTerm * middleTerm - lastTerm;
1307 if (TIES.size() != 0) {
1309 for (int j = 0; j < TIES.size(); j++) { sum += ((TIES[j]*TIES[j]*TIES[j])-TIES[j]); }
1310 double result = 1.0 - (sum / (double) ((values.size()*values.size()*values.size())-values.size()));
1314 if (isnan(H) || isinf(H)) { H = 0; }
1316 //Numerical Recipes pg221
1317 pValue = 1.0 - (gammp(((treatments.size()-1)/(double)2.0), H/2.0));
1321 catch(exception& e) {
1322 m->errorOut(e, "LinearAlgebra", "calcKruskalWallis");
1326 /*********************************************************************************************************************************/
1327 double LinearAlgebra::normalvariate(double mean, double standardDeviation) {
1329 double u1 = ((double)(rand()) + 1.0 )/( (double)(RAND_MAX) + 1.0);
1330 double u2 = ((double)(rand()) + 1.0 )/( (double)(RAND_MAX) + 1.0);
1331 //double r = sqrt( -2.0*log(u1) );
1332 //double theta = 2.0*PI*u2;
1333 //cout << cos(8.*atan(1.)*u2)*sqrt(-2.*log(u1)) << endl;
1334 return cos(8.*atan(1.)*u2)*sqrt(-2.*log(u1));
1336 catch(exception& e) {
1337 m->errorOut(e, "LinearAlgebra", "normalvariate");
1341 /*********************************************************************************************************************************/
1342 //thanks http://www.johndcook.com/cpp_phi.html
1343 double LinearAlgebra::pnorm(double x){
1346 double a1 = 0.254829592;
1347 double a2 = -0.284496736;
1348 double a3 = 1.421413741;
1349 double a4 = -1.453152027;
1350 double a5 = 1.061405429;
1351 double p = 0.3275911;
1353 // Save the sign of x
1357 x = fabs(x)/sqrt(2.0);
1359 // A&S formula 7.1.26
1360 double t = 1.0/(1.0 + p*x);
1361 double y = 1.0 - (((((a5*t + a4)*t) + a3)*t + a2)*t + a1)*t*exp(-x*x);
1363 return 0.5*(1.0 + sign*y);
1366 catch(exception& e) {
1367 m->errorOut(e, "LinearAlgebra", "pnorm");
1372 /*********************************************************************************************************************************/
1373 double LinearAlgebra::calcWilcoxon(vector<double>& x, vector<double>& y, double& sig){
1378 vector<spearmanRank> ranks;
1379 for (int i = 0; i < x.size(); i++) {
1380 if (m->control_pressed) { return W; }
1381 spearmanRank member("x", x[i]);
1382 ranks.push_back(member);
1385 for (int i = 0; i < y.size(); i++) {
1386 if (m->control_pressed) { return W; }
1387 spearmanRank member("y", y[i]);
1388 ranks.push_back(member);
1392 sort(ranks.begin(), ranks.end(), compareSpearman);
1394 //convert scores to ranks of x
1395 vector<spearmanRank*> ties;
1398 for (int j = 0; j < ranks.size(); j++) {
1399 if (m->control_pressed) { return W; }
1401 ties.push_back(&(ranks[j]));
1403 if (j != ranks.size()-1) { // you are not the last so you can look ahead
1404 if (ranks[j].score != ranks[j+1].score) { // you are done with ties, rank them and continue
1405 if (ties.size() > 1) { TIES.push_back(ties.size()); }
1406 for (int k = 0; k < ties.size(); k++) {
1407 float thisrank = rankTotal / (float) ties.size();
1408 (*ties[k]).score = thisrank;
1413 }else { // you are the last one
1414 if (ties.size() > 1) { TIES.push_back(ties.size()); }
1415 for (int k = 0; k < ties.size(); k++) {
1416 float thisrank = rankTotal / (float) ties.size();
1417 (*ties[k]).score = thisrank;
1422 //from R wilcox.test function
1423 //STATISTIC <- sum(r[seq_along(x)]) - n.x * (n.x + 1)/2
1424 double sumRanks = 0.0;
1425 for (int i = 0; i < ranks.size(); i++) {
1426 if (m->control_pressed) { return W; }
1427 if (ranks[i].name == "x") { sumRanks += ranks[i].score; }
1430 W = sumRanks - x.size() * ((double)(x.size() + 1)) / 2.0;
1432 //exact <- (n.x < 50) && (n.y < 50)
1433 bool findExact = false;
1434 if ((x.size() < 50) && (y.size() < 50)) { findExact = true; }
1437 if (findExact && (TIES.size() == 0)) { //find exact and no ties
1438 //PVAL <- switch(alternative, two.sided = {
1439 //p <- if (STATISTIC > (n.x * n.y/2))
1442 if (W > ((double)x.size()*y.size()/2.0)) {
1443 //pwilcox(STATISTIC-1, n.x, n.y, lower.tail = FALSE)
1444 pval = wilcox.pwilcox(W-1, x.size(), y.size(), false);
1446 //pwilcox(STATISTIC,n.x, n.y)
1447 pval = wilcox.pwilcox(W, x.size(), y.size(), true);
1450 if (1.0 < sig) { sig = 1.0; }
1452 //z <- STATISTIC - n.x * n.y/2
1453 double z = W - (double)(x.size() * y.size()/2.0);
1456 for (int j = 0; j < TIES.size(); j++) { sum += ((TIES[j]*TIES[j]*TIES[j])-TIES[j]); }
1458 //SIGMA <- sqrt((n.x * n.y/12) * ((n.x + n.y + 1) -
1459 //sum(NTIES^3 - NTIES)/((n.x + n.y) * (n.x + n.y -
1462 double firstTerm = (double)(x.size() * y.size()/12.0);
1463 double secondTerm = (double)(x.size() + y.size() + 1) - sum / (double)((x.size() + y.size()) * (x.size() + y.size() - 1));
1464 sigma = sqrt(firstTerm * secondTerm);
1466 //CORRECTION <- switch(alternative, two.sided = sign(z) * 0.5, greater = 0.5, less = -0.5)
1467 double CORRECTION = 0.0;
1468 if (z < 0) { CORRECTION = -1.0; }
1469 else if (z > 0) { CORRECTION = 1.0; }
1472 z = (z - CORRECTION)/sigma;
1474 //PVAL <- switch(alternative, two.sided = 2 * min(pnorm(z), pnorm(z, lower.tail = FALSE)))
1476 if ((1.0-sig) < sig) { sig = 1.0 - sig; }
1482 catch(exception& e) {
1483 m->errorOut(e, "LinearAlgebra", "calcWilcoxon");
1488 /*********************************************************************************************************************************/
1489 double LinearAlgebra::choose(double n, double k){
1494 double lchoose = gammln(n + 1.0) - gammln(k + 1.0) - gammln(n - k + 1.0);
1496 return (floor(exp(lchoose) + 0.5));
1498 catch(exception& e) {
1499 m->errorOut(e, "LinearAlgebra", "choose");
1503 /*********************************************************************************************************************************/
1504 double LinearAlgebra::calcSpearman(vector<double>& x, vector<double>& y, double& sig){
1506 if (x.size() != y.size()) { m->mothurOut("[ERROR]: vector size mismatch."); m->mothurOutEndLine(); return 0.0; }
1509 double sf = 0.0; //f^3 - f where f is the number of ties in x;
1510 double sg = 0.0; //f^3 - f where f is the number of ties in y;
1511 map<float, int> tableX;
1512 map<float, int>::iterator itTable;
1513 vector<spearmanRank> xscores;
1515 for (int i = 0; i < x.size(); i++) {
1516 spearmanRank member(toString(i), x[i]);
1517 xscores.push_back(member);
1519 //count number of repeats
1520 itTable = tableX.find(x[i]);
1521 if (itTable == tableX.end()) {
1531 for (itTable = tableX.begin(); itTable != tableX.end(); itTable++) {
1532 double tx = (double) itTable->second;
1533 Lx += ((pow(tx, 3.0) - tx) / 12.0);
1538 sort(xscores.begin(), xscores.end(), compareSpearman);
1540 //convert scores to ranks of x
1542 map<string, float> rankx;
1543 vector<spearmanRank> xties;
1545 for (int j = 0; j < xscores.size(); j++) {
1547 xties.push_back(xscores[j]);
1549 if (j != xscores.size()-1) { // you are not the last so you can look ahead
1550 if (xscores[j].score != xscores[j+1].score) { // you are done with ties, rank them and continue
1551 for (int k = 0; k < xties.size(); k++) {
1552 float thisrank = rankTotal / (float) xties.size();
1553 rankx[xties[k].name] = thisrank;
1555 int t = xties.size();
1560 }else { // you are the last one
1561 for (int k = 0; k < xties.size(); k++) {
1562 float thisrank = rankTotal / (float) xties.size();
1563 rankx[xties[k].name] = thisrank;
1569 vector<spearmanRank> yscores;
1570 map<float, int> tableY;
1571 for (int j = 0; j < y.size(); j++) {
1572 spearmanRank member(toString(j), y[j]);
1573 yscores.push_back(member);
1575 itTable = tableY.find(member.score);
1576 if (itTable == tableY.end()) {
1577 tableY[member.score] = 1;
1579 tableY[member.score]++;
1586 for (itTable = tableY.begin(); itTable != tableY.end(); itTable++) {
1587 double ty = (double) itTable->second;
1588 Ly += ((pow(ty, 3.0) - ty) / 12.0);
1591 sort(yscores.begin(), yscores.end(), compareSpearman);
1594 map<string, float> rank;
1595 vector<spearmanRank> yties;
1597 for (int j = 0; j < yscores.size(); j++) {
1599 yties.push_back(yscores[j]);
1601 if (j != yscores.size()-1) { // you are not the last so you can look ahead
1602 if (yscores[j].score != yscores[j+1].score) { // you are done with ties, rank them and continue
1603 for (int k = 0; k < yties.size(); k++) {
1604 float thisrank = rankTotal / (float) yties.size();
1605 rank[yties[k].name] = thisrank;
1607 int t = yties.size();
1612 }else { // you are the last one
1613 for (int k = 0; k < yties.size(); k++) {
1614 float thisrank = rankTotal / (float) yties.size();
1615 rank[yties[k].name] = thisrank;
1621 for (int k = 0; k < x.size(); k++) {
1623 float xi = rankx[toString(k)];
1624 float yi = rank[toString(k)];
1626 di += ((xi - yi) * (xi - yi));
1631 double n = (double) x.size();
1632 double SX2 = ((pow(n, 3.0) - n) / 12.0) - Lx;
1633 double SY2 = ((pow(n, 3.0) - n) / 12.0) - Ly;
1635 p = (SX2 + SY2 - di) / (2.0 * sqrt((SX2*SY2)));
1637 //Numerical Recipes 646
1638 sig = calcSpearmanSig(n, sf, sg, di);
1642 catch(exception& e) {
1643 m->errorOut(e, "LinearAlgebra", "calcSpearman");
1647 /*********************************************************************************************************************************/
1648 double LinearAlgebra::calcSpearmanSig(double n, double sf, double sg, double d){
1652 double probrs = 0.0;
1654 double en3n=en*en*en-en;
1655 double aved=en3n/6.0-(sf+sg)/12.0;
1656 double fac=(1.0-sf/en3n)*(1.0-sg/en3n);
1657 double vard=((en-1.0)*en*en*SQR(en+1.0)/36.0)*fac;
1658 double zd=(d-aved)/sqrt(vard);
1659 double probd=erfcc(fabs(zd)/1.4142136);
1660 double rs=(1.0-(6.0/en3n)*(d+(sf+sg)/12.0))/sqrt(fac);
1661 fac=(rs+1.0)*(1.0-rs);
1663 double t=rs*sqrt((en-2.0)/fac);
1665 probrs=betai(0.5*df,0.5,df/(df+t*t));
1670 //smaller of probd and probrs is sig
1672 if (probd < probrs) { sig = probd; }
1674 if (isnan(sig) || isinf(sig)) { sig = 0.0; }
1678 catch(exception& e) {
1679 m->errorOut(e, "LinearAlgebra", "calcSpearmanSig");
1683 /*********************************************************************************************************************************/
1684 double LinearAlgebra::calcPearson(vector<double>& x, vector<double>& y, double& sig){
1686 if (x.size() != y.size()) { m->mothurOut("[ERROR]: vector size mismatch."); m->mothurOutEndLine(); return 0.0; }
1689 float averageX = 0.0;
1690 for (int i = 0; i < x.size(); i++) { averageX += x[i]; }
1691 averageX = averageX / (float) x.size();
1695 for (int j = 0; j < y.size(); j++) { sumY += y[j]; }
1696 float Ybar = sumY / (float) y.size();
1699 double numerator = 0.0;
1700 double denomTerm1 = 0.0;
1701 double denomTerm2 = 0.0;
1703 for (int j = 0; j < x.size(); j++) {
1707 numerator += ((Xi - averageX) * (Yi - Ybar));
1708 denomTerm1 += ((Xi - averageX) * (Xi - averageX));
1709 denomTerm2 += ((Yi - Ybar) * (Yi - Ybar));
1712 double denom = (sqrt(denomTerm1) * sqrt(denomTerm2));
1714 r = numerator / denom;
1716 //Numerical Recipes pg.644
1717 sig = calcPearsonSig(x.size(), r);
1721 catch(exception& e) {
1722 m->errorOut(e, "LinearAlgebra", "calcPearson");
1726 /*********************************************************************************************************************************/
1727 double LinearAlgebra::calcPearsonSig(double n, double r){
1731 const double TINY = 1.0e-20;
1732 double z = 0.5*log((1.0+r+TINY)/(1.0-r+TINY)); //Fisher's z transformation
1734 //code below was giving an error in betacf with sop files
1736 //double t = r*sqrt(df/((1.0-r+TINY)*(1.0+r+TINY)));
1737 //sig = betai(0.5+df, 0.5, df/(df+t*t));
1739 //Numerical Recipes says code below gives approximately the same result
1740 sig = erfcc(fabs(z*sqrt(n-1.0))/1.4142136);
1741 if (isnan(sig) || isinf(sig)) { sig = 0.0; }
1745 catch(exception& e) {
1746 m->errorOut(e, "LinearAlgebra", "calcPearsonSig");
1750 /*********************************************************************************************************************************/
1752 vector<vector<double> > LinearAlgebra::getObservedEuclideanDistance(vector<vector<double> >& relAbundData){
1755 int numSamples = relAbundData.size();
1756 int numOTUs = relAbundData[0].size();
1758 vector<vector<double> > dMatrix(numSamples);
1759 for(int i=0;i<numSamples;i++){
1760 dMatrix[i].resize(numSamples);
1763 for(int i=0;i<numSamples;i++){
1764 for(int j=0;j<numSamples;j++){
1766 if (m->control_pressed) { return dMatrix; }
1769 for(int k=0;k<numOTUs;k++){
1770 d += pow((relAbundData[i][k] - relAbundData[j][k]), 2.0000);
1772 dMatrix[i][j] = pow(d, 0.50000);
1773 dMatrix[j][i] = dMatrix[i][j];
1779 catch(exception& e) {
1780 m->errorOut(e, "LinearAlgebra", "getObservedEuclideanDistance");
1785 /*********************************************************************************************************************************/
1786 vector<double> LinearAlgebra::solveEquations(vector<vector<double> > A, vector<double> b){
1788 int length = (int)b.size();
1789 vector<double> x(length, 0);
1790 vector<int> index(length);
1791 for(int i=0;i<length;i++){ index[i] = i; }
1794 ludcmp(A, index, d); if (m->control_pressed) { return b; }
1795 lubksb(A, index, b);
1799 catch(exception& e) {
1800 m->errorOut(e, "LinearAlgebra", "solveEquations");
1804 /*********************************************************************************************************************************/
1805 vector<float> LinearAlgebra::solveEquations(vector<vector<float> > A, vector<float> b){
1807 int length = (int)b.size();
1808 vector<double> x(length, 0);
1809 vector<int> index(length);
1810 for(int i=0;i<length;i++){ index[i] = i; }
1813 ludcmp(A, index, d); if (m->control_pressed) { return b; }
1814 lubksb(A, index, b);
1818 catch(exception& e) {
1819 m->errorOut(e, "LinearAlgebra", "solveEquations");
1824 /*********************************************************************************************************************************/
1826 void LinearAlgebra::ludcmp(vector<vector<double> >& A, vector<int>& index, double& d){
1828 double tiny = 1e-20;
1830 int n = (int)A.size();
1831 vector<double> vv(n, 0.0);
1837 for(int i=0;i<n;i++){
1839 for(int j=0;j<n;j++){ if((temp=fabs(A[i][j])) > big ) big=temp; }
1840 if(big==0.0){ m->mothurOut("Singular matrix in routine ludcmp\n"); }
1844 for(int j=0;j<n;j++){
1845 if (m->control_pressed) { break; }
1846 for(int i=0;i<j;i++){
1847 double sum = A[i][j];
1848 for(int k=0;k<i;k++){ sum -= A[i][k] * A[k][j]; }
1853 for(int i=j;i<n;i++){
1854 double sum = A[i][j];
1855 for(int k=0;k<j;k++){ sum -= A[i][k] * A[k][j]; }
1858 if((dum = vv[i] * fabs(sum)) >= big){
1864 for(int k=0;k<n;k++){
1865 double dum = A[imax][k];
1866 A[imax][k] = A[j][k];
1874 if(A[j][j] == 0.0){ A[j][j] = tiny; }
1877 double dum = 1.0/A[j][j];
1878 for(int i=j+1;i<n;i++){ A[i][j] *= dum; }
1882 catch(exception& e) {
1883 m->errorOut(e, "LinearAlgebra", "ludcmp");
1888 /*********************************************************************************************************************************/
1890 void LinearAlgebra::lubksb(vector<vector<double> >& A, vector<int>& index, vector<double>& b){
1893 int n = (int)A.size();
1896 for(int i=0;i<n;i++){
1897 if (m->control_pressed) { break; }
1903 for(int j=ii-1;j<i;j++){
1904 total -= A[i][j] * b[j];
1907 else if(total != 0){ ii = i+1; }
1910 for(int i=n-1;i>=0;i--){
1912 for(int j=i+1;j<n;j++){ total -= A[i][j] * b[j]; }
1913 b[i] = total / A[i][i];
1916 catch(exception& e) {
1917 m->errorOut(e, "LinearAlgebra", "lubksb");
1921 /*********************************************************************************************************************************/
1923 void LinearAlgebra::ludcmp(vector<vector<float> >& A, vector<int>& index, float& d){
1925 double tiny = 1e-20;
1927 int n = (int)A.size();
1928 vector<float> vv(n, 0.0);
1934 for(int i=0;i<n;i++){
1936 for(int j=0;j<n;j++){ if((temp=fabs(A[i][j])) > big ) big=temp; }
1937 if(big==0.0){ m->mothurOut("Singular matrix in routine ludcmp\n"); }
1941 for(int j=0;j<n;j++){
1942 if (m->control_pressed) { break; }
1943 for(int i=0;i<j;i++){
1944 float sum = A[i][j];
1945 for(int k=0;k<i;k++){ sum -= A[i][k] * A[k][j]; }
1950 for(int i=j;i<n;i++){
1951 float sum = A[i][j];
1952 for(int k=0;k<j;k++){ sum -= A[i][k] * A[k][j]; }
1955 if((dum = vv[i] * fabs(sum)) >= big){
1961 for(int k=0;k<n;k++){
1962 float dum = A[imax][k];
1963 A[imax][k] = A[j][k];
1971 if(A[j][j] == 0.0){ A[j][j] = tiny; }
1974 float dum = 1.0/A[j][j];
1975 for(int i=j+1;i<n;i++){ A[i][j] *= dum; }
1979 catch(exception& e) {
1980 m->errorOut(e, "LinearAlgebra", "ludcmp");
1985 /*********************************************************************************************************************************/
1987 void LinearAlgebra::lubksb(vector<vector<float> >& A, vector<int>& index, vector<float>& b){
1990 int n = (int)A.size();
1993 for(int i=0;i<n;i++){
1994 if (m->control_pressed) { break; }
2000 for(int j=ii-1;j<i;j++){
2001 total -= A[i][j] * b[j];
2004 else if(total != 0){ ii = i+1; }
2007 for(int i=n-1;i>=0;i--){
2009 for(int j=i+1;j<n;j++){ total -= A[i][j] * b[j]; }
2010 b[i] = total / A[i][i];
2013 catch(exception& e) {
2014 m->errorOut(e, "LinearAlgebra", "lubksb");
2019 /*********************************************************************************************************************************/
2021 vector<vector<double> > LinearAlgebra::getInverse(vector<vector<double> > matrix){
2023 int n = (int)matrix.size();
2025 vector<vector<double> > inverse(n);
2026 for(int i=0;i<n;i++){ inverse[i].assign(n, 0.0000); }
2028 vector<double> column(n, 0.0000);
2029 vector<int> index(n, 0);
2032 ludcmp(matrix, index, dummy);
2034 for(int j=0;j<n;j++){
2035 if (m->control_pressed) { break; }
2037 column.assign(n, 0);
2041 lubksb(matrix, index, column);
2043 for(int i=0;i<n;i++){ inverse[i][j] = column[i]; }
2048 catch(exception& e) {
2049 m->errorOut(e, "LinearAlgebra", "getInverse");
2053 /*********************************************************************************************************************************/
2054 //modelled R lda function - MASS:::lda.default
2055 vector< vector<double> > LinearAlgebra::lda(vector< vector<double> >& a, vector<string> groups, vector< vector<double> >& means, bool& ignore) {
2058 set<string> uniqueGroups;
2059 for (int i = 0; i < groups.size(); i++) { uniqueGroups.insert(groups[i]); }
2060 int numGroups = uniqueGroups.size();
2062 map<string, int> quickIndex; //className to index. hoping to save counts, proportions and means in vectors to save time. This map will allow us to know index 0 in counts refers to group1.
2064 for (set<string>::iterator it = uniqueGroups.begin(); it != uniqueGroups.end(); it++) { quickIndex[*it] = count; count++; }
2066 int numSampled = groups.size(); //number of sampled groups
2067 int numOtus = a.size(); //number of flagged bins
2069 //counts <- as.vector(table(g)) //number of samples from each class in random sampling
2070 vector<int> counts; counts.resize(numGroups, 0);
2071 for (int i = 0; i < groups.size(); i++) {
2072 counts[quickIndex[groups[i]]]++;
2075 vector<double> proportions; proportions.resize(numGroups, 0.0);
2076 for (int i = 0; i < numGroups; i++) { proportions[i] = counts[i] / (double) numSampled; }
2078 means.clear(); //means[0] -> means[0][0] average for [group0][OTU0].
2079 means.resize(numGroups); for (int i = 0; i < means.size(); i++) { means[i].resize(numOtus, 0.0); }
2080 for (int j = 0; j < numSampled; j++) { //total for each class for each OTU
2081 for (int i = 0; i < numOtus; i++) { means[quickIndex[groups[j]]][i] += a[i][j]; }
2083 //average for each class for each OTU
2084 for (int j = 0; j < numGroups; j++) { for (int i = 0; i < numOtus; i++) { means[j][i] /= counts[j]; } }
2086 //randCov <- x - group.means[g, ]
2087 vector< vector<double> > randCov; //randCov[0][0] -> (random sample value0 for OTU0 - average for samples group in OTU0). example OTU0, random sample 0.01 from class early. average of class early for OTU0 is 0.005. randCov[0][0] = (0.01-0.005)
2088 for (int i = 0; i < numOtus; i++) { //for each flagged OTU
2089 vector<double> tempRand;
2090 for (int j = 0; j < numSampled; j++) { tempRand.push_back(a[i][j] - means[quickIndex[groups[j]]][i]); }
2091 randCov.push_back(tempRand);
2094 //find variance and std for each OTU
2095 //f1 <- sqrt(diag(var(x - group.means[g, ])))
2096 vector<double> stdF1;
2098 for (int i = 0; i < numOtus; i++) {
2099 stdF1.push_back(0.0);
2100 ave.push_back(m->getAverage(randCov[i]));
2103 for (int i = 0; i < numOtus; i++) {
2104 for (int j = 0; j < numSampled; j++) { stdF1[i] += ((randCov[i][j] - ave[i]) * (randCov[i][j] - ave[i])); }
2108 double fac = 1 / (double) (numSampled-numGroups);
2110 for (int i = 0; i < stdF1.size(); i++) {
2111 stdF1[i] /= (double) (numSampled-1);
2112 stdF1[i] = sqrt(stdF1[i]);
2115 vector< vector<double> > scaling; //[numOTUS][numOTUS]
2116 for (int i = 0; i < numOtus; i++) {
2117 vector<double> temp;
2118 for (int j = 0; j < numOtus; j++) {
2119 if (i == j) { temp.push_back(1.0/stdF1[i]); }
2120 else { temp.push_back(0.0); }
2123 scaling.push_back(temp);
2126 cout << "scaling = " << endl;
2127 for (int i = 0; i < scaling.size(); i++) {
2128 for (int j = 0; j < scaling[i].size(); j++) { cout << scaling[i][j] << '\t'; }
2132 //X <- sqrt(fac) * ((x - group.means[g, ]) %*% scaling)
2133 vector< vector<double> > X = randCov; //[numOTUS][numSampled]
2134 //((x - group.means[g, ]) %*% scaling)
2135 //matrix multiplication of randCov and scaling
2136 LinearAlgebra linear;
2137 X = linear.matrix_mult(scaling, randCov); //[numOTUS][numOTUS] * [numOTUS][numSampled] = [numOTUS][numSampled]
2140 for (int i = 0; i < X.size(); i++) {
2141 for (int j = 0; j < X[i].size(); j++) { X[i][j] *= fac; }
2145 vector< vector<double> > v;
2146 vector< vector<double> > Xcopy; //X = [numOTUS][numSampled]
2147 bool transpose = false; //svd requires rows < columns, so if they are not then I need to transpose and look for the results in v.
2148 if (X.size() < X[0].size()) { Xcopy = linear.transpose(X); transpose=true; }
2150 linear.svd(Xcopy, d, v); //Xcopy gets the results we want for v below, because R's version is [numSampled][numOTUS]
2152 /*cout << "Xcopy = " << endl;
2153 for (int i = 0; i < Xcopy.size(); i++) {
2154 for (int j = 0; j < Xcopy[i].size(); j++) { cout << Xcopy[i][j] << '\t'; }
2157 cout << "v = " << endl;
2158 for (int i = 0; i < v.size(); i++) {
2159 for (int j = 0; j < v[i].size(); j++) { cout << v[i][j] << '\t'; }
2165 set<int> goodColumns;
2166 //cout << "d = " << endl;
2167 for (int i = 0; i < d.size(); i++) { if (d[i] > 0.0000000001) { rank++; goodColumns.insert(i); } } //cout << d[i] << endl;
2170 ignore=true; //m->mothurOut("[ERROR]: rank = 0: variables are numerically const\n"); m->control_pressed = true;
2173 //scaling <- scaling %*% X.s$v[, 1L:rank] %*% diag(1/X.s$d[1L:rank], , rank)
2174 //X.s$v[, 1L:rank] = columns in Xcopy that correspond to "good" d values
2175 //diag(1/X.s$d[1L:rank], , rank) = matrix size rank * rank where the diagonal is 1/"good" dvalues
2178 [1] 3.721545e+00 3.034607e+00 2.296649e+00 7.986927e-16 6.922408e-16
2182 [,1] [,2] [,3] [,4] [,5] [,6]
2183 [1,] 0.31122175 0.10944725 0.20183340 -0.30136820 0.60786235 -0.13537095
2184 [2,] -0.29563726 -0.20568893 0.11233366 -0.05073289 0.48234270 0.21965978
2187 [1] "X.s$v[, 1L:rank]"
2189 [1,] 0.31122175 0.10944725 0.20183340
2190 [2,] -0.29563726 -0.20568893 0.11233366
2192 [1] "1/X.s$d[1L:rank]"
2193 [1] 0.2687056 0.3295320 0.4354170
2195 [1] "diag(1/X.s$d[1L:rank], , rank)"
2197 [1,] 0.2687056 0.000000 0.000000
2198 [2,] 0.0000000 0.329532 0.000000
2199 [3,] 0.0000000 0.000000 0.435417
2202 Xcopy = linear.transpose(v);
2204 cout << "Xcopy = " << endl;
2205 for (int i = 0; i < Xcopy.size(); i++) {
2206 for (int j = 0; j < Xcopy[i].size(); j++) { cout << Xcopy[i][j] << '\t'; }
2210 v.clear(); //store "good" columns - X.s$v[, 1L:rank]
2211 v.resize(Xcopy.size()); //[numOTUS]["good" columns]
2212 for (set<int>::iterator it = goodColumns.begin(); it != goodColumns.end(); it++) {
2213 for (int i = 0; i < Xcopy.size(); i++) {
2214 v[i].push_back(Xcopy[i][*it]);
2218 vector< vector<double> > diagRanks; diagRanks.resize(rank);
2219 for (int i = 0; i < rank; i++) { diagRanks[i].resize(rank, 0.0); }
2221 for (set<int>::iterator it = goodColumns.begin(); it != goodColumns.end(); it++) { diagRanks[count][count] = 1.0 / d[*it]; count++; }
2223 scaling = linear.matrix_mult(linear.matrix_mult(scaling, v), diagRanks); //([numOTUS][numOTUS]*[numOTUS]["good" columns]) = [numOTUS]["good" columns] then ([numOTUS]["good" columns] * ["good" columns]["good" columns] = scaling = [numOTUS]["good" columns]
2225 /*cout << "scaling = " << endl;
2226 for (int i = 0; i < scaling.size(); i++) {
2227 for (int j = 0; j < scaling[i].size(); j++) { cout << scaling[i][j] << '\t'; }
2231 //Note: linear.matrix_mult [1][numGroups] * [numGroups][numOTUs] - columns in first must match rows in second, returns matrix[1][numOTUs]
2232 vector< vector<double> > prior; prior.push_back(proportions);
2233 vector< vector<double> > xbar = linear.matrix_mult(prior, means);
2234 vector<double> xBar = xbar[0]; //length numOTUs
2236 /*cout << "xbar" << endl;
2237 for (int j = 0; j < numOtus; j++) { cout << xBar[j] <<'\t'; }cout << endl;*/
2239 fac = 1 / (double) (numGroups-1);
2240 //scale(group.means, center = xbar, scale = FALSE) %*% scaling
2241 vector< vector<double> > scaledMeans = means; //[numGroups][numOTUs]
2242 for (int i = 0; i < numGroups; i++) {
2243 for (int j = 0; j < numOtus; j++) { scaledMeans[i][j] -= xBar[j]; }
2245 scaledMeans = linear.matrix_mult(scaledMeans, scaling); //[numGroups][numOTUS]*[numOTUS]["good"columns] = [numGroups]["good"columns]
2248 //sqrt((n * prior) * fac)
2249 vector<double> temp = proportions; //[numGroups]
2250 for (int i = 0; i < temp.size(); i++) { temp[i] *= numSampled * fac; temp[i] = sqrt(temp[i]); }
2252 //X <- sqrt((n * prior) * fac) * (scale(group.means, center = xbar, scale = FALSE) %*% scaling)
2253 //X <- temp * scaledMeans
2254 X.clear(); X = scaledMeans; //[numGroups]["good"columns]
2255 for (int i = 0; i < X.size(); i++) {
2256 for (int j = 0; j < X[i].size(); j++) { X[i][j] *= temp[j]; }
2259 cout << "X = " << endl;
2260 for (int i = 0; i < X.size(); i++) {
2261 for (int j = 0; j < X[i].size(); j++) { cout << X[i][j] << '\t'; }
2266 d.clear(); v.clear();
2267 //we want to transpose so results are in Xcopy, but if that makes rows > columns then we don't since svd requires rows < cols.
2269 if (X.size() > X[0].size()) { Xcopy = X; transpose=true; }
2270 else { Xcopy = linear.transpose(X); }
2271 linear.svd(Xcopy, d, v); //Xcopy gets the results we want for v below
2272 /*cout << "Xcopy = " << endl;
2273 for (int i = 0; i < Xcopy.size(); i++) {
2274 for (int j = 0; j < Xcopy[i].size(); j++) { cout << Xcopy[i][j] << '\t'; }
2278 cout << "v = " << endl;
2279 for (int i = 0; i < v.size(); i++) {
2280 for (int j = 0; j < v[i].size(); j++) { cout << v[i][j] << '\t'; }
2284 cout << "d = " << endl;
2285 for (int i = 0; i < d.size(); i++) { cout << d[i] << endl; }*/
2287 //rank <- sum(X.s$d > tol * X.s$d[1L])
2288 //X.s$d[1L] = larger value in d vector
2289 double largeD = m->max(d);
2290 rank = 0; goodColumns.clear();
2291 for (int i = 0; i < d.size(); i++) { if (d[i] > (0.0000000001*largeD)) { rank++; goodColumns.insert(i); } }
2294 ignore=true;//m->mothurOut("[ERROR]: rank = 0: class means are numerically identical.\n"); m->control_pressed = true;
2297 if (transpose) { Xcopy = linear.transpose(v); }
2298 //scaling <- scaling %*% X.s$v[, 1L:rank] - scaling * "good" columns
2299 v.clear(); //store "good" columns - X.s$v[, 1L:rank]
2300 v.resize(Xcopy.size()); //Xcopy = ["good"columns][numGroups]
2301 for (set<int>::iterator it = goodColumns.begin(); it != goodColumns.end(); it++) {
2302 for (int i = 0; i < Xcopy.size(); i++) {
2303 v[i].push_back(Xcopy[i][*it]);
2307 scaling = linear.matrix_mult(scaling, v); //[numOTUS]["good" columns] * ["good"columns][new "good" columns]
2309 /*cout << "scaling = " << endl;
2310 for (int i = 0; i < scaling.size(); i++) {
2311 for (int j = 0; j < scaling[i].size(); j++) { cout << scaling[i][j] << '\t'; }
2317 catch(exception& e) {
2318 m->errorOut(e, "LinearAlgebra", "lda");
2322 /*********************************************************************************************************************************/
2323 //Singular value decomposition (SVD) - adapted from http://svn.lirec.eu/libs/magicsquares/src/SVD.cpp
2325 * svdcomp - SVD decomposition routine.
2326 * Takes an mxn matrix a and decomposes it into udv, where u,v are
2327 * left and right orthogonal transformation matrices, and d is a
2328 * diagonal matrix of singular values.
2330 * This routine is adapted from svdecomp.c in XLISP-STAT 2.1 which is
2331 * code from Numerical Recipes adapted by Luke Tierney and David Betz.
2333 * Input to dsvd is as follows:
2334 * a = mxn matrix to be decomposed, gets overwritten with u
2335 * m = row dimension of a
2336 * n = column dimension of a
2337 * w = returns the vector of singular values of a
2338 * v = returns the right orthogonal transformation matrix
2341 int LinearAlgebra::svd(vector< vector<double> >& a, vector<double>& w, vector< vector<double> >& v) {
2343 int flag, i, its, j, jj, k, l, nm;
2344 double c, f, h, s, x, y, z;
2345 double anorm = 0.0, g = 0.0, scale = 0.0;
2347 int numRows = a.size(); if (numRows == 0) { return 0; }
2348 int numCols = a[0].size();
2349 w.resize(numCols, 0.0);
2350 v.resize(numCols); for (int i = 0; i < numCols; i++) { v[i].resize(numRows, 0.0); }
2352 vector<double> rv1; rv1.resize(numCols, 0.0);
2353 if (numRows < numCols){ m->mothurOut("[ERROR]: numRows < numCols\n"); m->control_pressed = true; return 0; }
2355 /* Householder reduction to bidiagonal form */
2356 for (i = 0; i < numCols; i++)
2358 /* left-hand reduction */
2361 g = s = scale = 0.0;
2364 for (k = i; k < numRows; k++)
2365 scale += fabs((double)a[k][i]);
2368 for (k = i; k < numRows; k++)
2370 a[k][i] = (double)((double)a[k][i]/scale);
2371 s += ((double)a[k][i] * (double)a[k][i]);
2373 f = (double)a[i][i];
2374 g = -SIGN(sqrt(s), f);
2376 a[i][i] = (double)(f - g);
2377 if (i != numCols - 1)
2379 for (j = l; j < numCols; j++)
2381 for (s = 0.0, k = i; k < numRows; k++)
2382 s += ((double)a[k][i] * (double)a[k][j]);
2384 for (k = i; k < numRows; k++)
2385 a[k][j] += (double)(f * (double)a[k][i]);
2388 for (k = i; k < numRows; k++)
2389 a[k][i] = (double)((double)a[k][i]*scale);
2392 w[i] = (double)(scale * g);
2394 /* right-hand reduction */
2395 g = s = scale = 0.0;
2396 if (i < numRows && i != numCols - 1)
2398 for (k = l; k < numCols; k++)
2399 scale += fabs((double)a[i][k]);
2402 for (k = l; k < numCols; k++)
2404 a[i][k] = (double)((double)a[i][k]/scale);
2405 s += ((double)a[i][k] * (double)a[i][k]);
2407 f = (double)a[i][l];
2408 g = -SIGN(sqrt(s), f);
2410 a[i][l] = (double)(f - g);
2411 for (k = l; k < numCols; k++)
2412 rv1[k] = (double)a[i][k] / h;
2413 if (i != numRows - 1)
2415 for (j = l; j < numRows; j++)
2417 for (s = 0.0, k = l; k < numCols; k++)
2418 s += ((double)a[j][k] * (double)a[i][k]);
2419 for (k = l; k < numCols; k++)
2420 a[j][k] += (double)(s * rv1[k]);
2423 for (k = l; k < numCols; k++)
2424 a[i][k] = (double)((double)a[i][k]*scale);
2427 anorm = max(anorm, (fabs((double)w[i]) + fabs(rv1[i])));
2430 /* accumulate the right-hand transformation */
2431 for (i = numCols - 1; i >= 0; i--)
2433 if (i < numCols - 1)
2437 for (j = l; j < numCols; j++)
2438 v[j][i] = (double)(((double)a[i][j] / (double)a[i][l]) / g);
2439 /* double division to avoid underflow */
2440 for (j = l; j < numCols; j++)
2442 for (s = 0.0, k = l; k < numCols; k++)
2443 s += ((double)a[i][k] * (double)v[k][j]);
2444 for (k = l; k < numCols; k++)
2445 v[k][j] += (double)(s * (double)v[k][i]);
2448 for (j = l; j < numCols; j++)
2449 v[i][j] = v[j][i] = 0.0;
2456 /* accumulate the left-hand transformation */
2457 for (i = numCols - 1; i >= 0; i--)
2461 if (i < numCols - 1)
2462 for (j = l; j < numCols; j++)
2467 if (i != numCols - 1)
2469 for (j = l; j < numCols; j++)
2471 for (s = 0.0, k = l; k < numRows; k++)
2472 s += ((double)a[k][i] * (double)a[k][j]);
2473 f = (s / (double)a[i][i]) * g;
2474 for (k = i; k < numRows; k++)
2475 a[k][j] += (double)(f * (double)a[k][i]);
2478 for (j = i; j < numRows; j++)
2479 a[j][i] = (double)((double)a[j][i]*g);
2483 for (j = i; j < numRows; j++)
2489 /* diagonalize the bidiagonal form */
2490 for (k = numCols - 1; k >= 0; k--)
2491 { /* loop over singular values */
2492 for (its = 0; its < 30; its++)
2493 { /* loop over allowed iterations */
2495 for (l = k; l >= 0; l--)
2496 { /* test for splitting */
2498 if (fabs(rv1[l]) + anorm == anorm)
2503 if (fabs((double)w[nm]) + anorm == anorm)
2510 for (i = l; i <= k; i++)
2513 if (fabs(f) + anorm != anorm)
2521 for (j = 0; j < numRows; j++)
2523 y = (double)a[j][nm];
2524 z = (double)a[j][i];
2525 a[j][nm] = (double)(y * c + z * s);
2526 a[j][i] = (double)(z * c - y * s);
2535 { /* make singular value nonnegative */
2536 w[k] = (double)(-z);
2537 for (j = 0; j < numCols; j++)
2538 v[j][k] = (-v[j][k]);
2543 m->mothurOut("No convergence after 30,000! iterations \n"); m->control_pressed = true;
2547 /* shift from bottom 2 x 2 minor */
2553 f = ((y - z) * (y + z) + (g - h) * (g + h)) / (2.0 * h * y);
2555 f = ((x - z) * (x + z) + h * ((y / (f + SIGN(g, f))) - h)) / x;
2557 /* next QR transformation */
2559 for (j = l; j <= nm; j++)
2574 for (jj = 0; jj < numCols; jj++)
2576 x = (double)v[jj][j];
2577 z = (double)v[jj][i];
2578 v[jj][j] = (float)(x * c + z * s);
2579 v[jj][i] = (float)(z * c - x * s);
2589 f = (c * g) + (s * y);
2590 x = (c * y) - (s * g);
2591 for (jj = 0; jj < numRows; jj++)
2593 y = (double)a[jj][j];
2594 z = (double)a[jj][i];
2595 a[jj][j] = (double)(y * c + z * s);
2596 a[jj][i] = (double)(z * c - y * s);
2608 catch(exception& e) {
2609 m->errorOut(e, "LinearAlgebra", "svd");