5 * Created by westcott on 1/7/11.
6 * Copyright 2011 Schloss Lab. All rights reserved.
10 #include "linearalgebra.h"
13 // This class references functions used from "Numerical Recipes in C++" //
15 /*********************************************************************************************************************************/
16 inline double SQR(const double a)
20 /*********************************************************************************************************************************/
22 inline double SIGN(const double a, const double b)
24 return b>=0 ? (a>=0 ? a:-a) : (a>=0 ? -a:a);
26 /*********************************************************************************************************************************/
27 //NUmerical recipes pg. 245 - Returns the complementary error function erfc(x) with fractional error everywhere less than 1.2 × 10−7.
28 double LinearAlgebra::erfcc(double x){
34 ans=t*exp(-z*z-1.26551223+t*(1.00002368+t*(0.37409196+t*(0.09678418+
35 t*(-0.18628806+t*(0.27886807+t*(-1.13520398+t*(1.48851587+
36 t*(-0.82215223+t*0.17087277)))))))));
38 //cout << "in erfcc " << t << '\t' << ans<< endl;
39 return (x >= 0.0 ? ans : 2.0 - ans);
42 m->errorOut(e, "LinearAlgebra", "betai");
46 /*********************************************************************************************************************************/
47 //Numerical Recipes pg. 232
48 double LinearAlgebra::betai(const double a, const double b, const double x) {
53 if (x < 0.0 || x > 1.0) { m->mothurOut("[ERROR]: bad x in betai.\n"); m->control_pressed = true; return 0.0; }
55 if (x == 0.0 || x == 1.0) { bt = 0.0; }
56 else { bt = exp(gammln(a+b)-gammln(a)-gammln(b)+a*log(x)+b*log(1.0-x)); }
58 if (x < (a+1.0) / (a+b+2.0)) { result = bt*betacf(a,b,x)/a; }
59 else { result = 1.0-bt*betacf(b,a,1.0-x)/b; }
64 m->errorOut(e, "LinearAlgebra", "betai");
68 /*********************************************************************************************************************************/
69 //Numerical Recipes pg. 219
70 double LinearAlgebra::gammln(const double xx) {
74 static const double cof[6]={76.18009172947146,-86.50532032941677,24.01409824083091,
75 -1.231739572450155,0.120858003e-2,-0.536382e-5};
79 tmp -= (x+0.5)*log(tmp);
84 return -tmp+log(2.5066282746310005*ser/x);
87 m->errorOut(e, "LinearAlgebra", "gammln");
91 /*********************************************************************************************************************************/
92 //Numerical Recipes pg. 223
93 double LinearAlgebra::gammp(const double a, const double x) {
95 double gamser,gammcf,gln;
97 if (x < 0.0 || a <= 0.0) { m->mothurOut("[ERROR]: Invalid arguments in routine GAMMP\n"); m->control_pressed = true; return 0.0;}
108 catch(exception& e) {
109 m->errorOut(e, "LinearAlgebra", "gammp");
113 /*********************************************************************************************************************************/
114 //Numerical Recipes pg. 223
115 double LinearAlgebra::gammq(const double a, const double x) {
117 double gamser,gammcf,gln;
119 if (x < 0.0 || a <= 0.0) { m->mothurOut("[ERROR]: Invalid arguments in routine GAMMQ\n"); m->control_pressed = true; return 0.0; }
121 gser(gamser,a,x,gln);
130 catch(exception& e) {
131 m->errorOut(e, "LinearAlgebra", "gammp");
135 /*********************************************************************************************************************************/
136 //Numerical Recipes pg. 224
137 double LinearAlgebra::gcf(double& gammcf, const double a, const double x, double& gln){
140 const double EPS=numeric_limits<double>::epsilon();
141 const double FPMIN=numeric_limits<double>::min()/EPS;
143 double an,b,c,d,del,h;
150 for (i=1;i<=ITMAX;i++) {
154 if (fabs(d) < FPMIN) { d=FPMIN; }
156 if (fabs(c) < FPMIN) { c=FPMIN; }
160 if (fabs(del-1.0) <= EPS) break;
162 if (i > ITMAX) { m->mothurOut("[ERROR]: a too large, ITMAX too small in gcf\n"); m->control_pressed = true; }
163 gammcf=exp(-x+a*log(x)-gln)*h;
167 catch(exception& e) {
168 m->errorOut(e, "LinearAlgebra", "gcf");
173 /*********************************************************************************************************************************/
174 //Numerical Recipes pg. 223
175 double LinearAlgebra::gser(double& gamser, const double a, const double x, double& gln) {
179 const double EPS = numeric_limits<double>::epsilon();
183 if (x < 0.0) { m->mothurOut("[ERROR]: x less than 0 in routine GSER\n"); m->control_pressed = true; }
184 gamser=0.0; return 0.0;
188 for (n=0;n<100;n++) {
192 if (fabs(del) < fabs(sum)*EPS) {
193 gamser=sum*exp(-x+a*log(x)-gln);
198 m->mothurOut("[ERROR]: a too large, ITMAX too small in routine GSER\n");
203 catch(exception& e) {
204 m->errorOut(e, "LinearAlgebra", "gser");
208 /*********************************************************************************************************************************/
209 //Numerical Recipes pg. 233
210 double LinearAlgebra::betacf(const double a, const double b, const double x) {
212 const int MAXIT = 100;
213 const double EPS = numeric_limits<double>::epsilon();
214 const double FPMIN = numeric_limits<double>::min() / EPS;
216 double aa, c, d, del, h, qab, qam, qap;
223 if (fabs(d) < FPMIN) d=FPMIN;
226 for (m1=1;m1<=MAXIT;m1++) {
228 aa=m1*(b-m1)*x/((qam+m2)*(a+m2));
230 if (fabs(d) < FPMIN) d=FPMIN;
232 if (fabs(c) < FPMIN) c=FPMIN;
235 aa = -(a+m1)*(qab+m1)*x/((a+m2)*(qap+m2));
237 if (fabs(d) < FPMIN) d=FPMIN;
239 if (fabs(c) < FPMIN) c=FPMIN;
243 if (fabs(del-1.0) < EPS) break;
246 if (m1 > MAXIT) { m->mothurOut("[ERROR]: a or b too big or MAXIT too small in betacf."); m->mothurOutEndLine(); m->control_pressed = true; }
250 catch(exception& e) {
251 m->errorOut(e, "LinearAlgebra", "betacf");
255 /*********************************************************************************************************************************/
256 //[3][4] * [4][5] - columns in first must match rows in second, returns matrix[3][5]
257 vector<vector<double> > LinearAlgebra::matrix_mult(vector<vector<double> > first, vector<vector<double> > second){
259 vector<vector<double> > product;
261 int first_rows = first.size();
262 int first_cols = first[0].size();
263 int second_cols = second[0].size();
265 product.resize(first_rows);
266 for(int i=0;i<first_rows;i++){
267 product[i].resize(second_cols);
270 for(int i=0;i<first_rows;i++){
271 for(int j=0;j<second_cols;j++){
273 if (m->control_pressed) { return product; }
276 for(int k=0;k<first_cols;k++){
277 product[i][j] += first[i][k] * second[k][j];
284 catch(exception& e) {
285 m->errorOut(e, "LinearAlgebra", "matrix_mult");
290 /*********************************************************************************************************************************/
292 vector<vector<double> > LinearAlgebra::transpose(vector<vector<double> >matrix){
294 vector<vector<double> > trans; trans.resize(matrix[0].size());
295 for (int i = 0; i < trans.size(); i++) {
296 for (int j = 0; j < matrix.size(); j++) { trans[i].push_back(matrix[j][i]); }
301 catch(exception& e) {
302 m->errorOut(e, "LinearAlgebra", "transpose");
307 /*********************************************************************************************************************************/
309 void LinearAlgebra::recenter(double offset, vector<vector<double> > D, vector<vector<double> >& G){
313 vector<vector<double> > A(rank);
314 vector<vector<double> > C(rank);
315 for(int i=0;i<rank;i++){
320 double scale = -1.0000 / (double) rank;
322 for(int i=0;i<rank;i++){
324 C[i][i] = 1.0000 + scale;
325 for(int j=i+1;j<rank;j++){
326 A[i][j] = A[j][i] = -0.5 * D[i][j] * D[i][j] + offset;
327 C[i][j] = C[j][i] = scale;
331 A = matrix_mult(C,A);
332 G = matrix_mult(A,C);
334 catch(exception& e) {
335 m->errorOut(e, "LinearAlgebra", "recenter");
340 /*********************************************************************************************************************************/
342 // This function is taken from Numerical Recipes in C++ by Press et al., 2nd edition, pg. 479
344 int LinearAlgebra::tred2(vector<vector<double> >& a, vector<double>& d, vector<double>& e){
346 double scale, hh, h, g, f;
353 for(int i=n-1;i>0;i--){
357 for(int k=0;k<l+1;k++){
358 scale += fabs(a[i][k]);
364 for(int k=0;k<l+1;k++){
366 h += a[i][k] * a[i][k];
369 g = (f >= 0.0 ? -sqrt(h) : sqrt(h));
374 for(int j=0;j<l+1;j++){
375 a[j][i] = a[i][j] / h;
377 for(int k=0;k<j+1;k++){
378 g += a[j][k] * a[i][k];
380 for(int k=j+1;k<l+1;k++){
381 g += a[k][j] * a[i][k];
387 for(int j=0;j<l+1;j++){
389 e[j] = g = e[j] - hh * f;
390 for(int k=0;k<j+1;k++){
391 a[j][k] -= (f * e[k] + g * a[i][k]);
406 for(int i=0;i<n;i++){
409 for(int j=0;j<l;j++){
411 for(int k=0;k<l;k++){
412 g += a[i][k] * a[k][j];
414 for(int k=0;k<l;k++){
415 a[k][j] -= g * a[k][i];
421 for(int j=0;j<l;j++){
422 a[j][i] = a[i][j] = 0.0;
428 catch(exception& e) {
429 m->errorOut(e, "LinearAlgebra", "tred2");
434 /*********************************************************************************************************************************/
436 double LinearAlgebra::pythag(double a, double b) { return(pow(a*a+b*b,0.5)); }
438 /*********************************************************************************************************************************/
440 // This function is taken from Numerical Recipes in C++ by Press et al., 2nd edition, pg. 479
442 int LinearAlgebra::qtli(vector<double>& d, vector<double>& e, vector<vector<double> >& z) {
445 double s, r, p, g, f, dd, c, b;
448 for(int i=1;i<=n;i++){
453 for(int l=0;l<n;l++){
456 for(myM=l;myM<n-1;myM++){
457 dd = fabs(d[myM]) + fabs(d[myM+1]);
458 if(fabs(e[myM])+dd == dd) break;
461 if(iter++ == 3000) cerr << "Too many iterations in tqli\n";
462 g = (d[l+1]-d[l]) / (2.0 * e[l]);
464 g = d[myM] - d[l] + e[l] / (g + SIGN(r,g));
467 for(i=myM-1;i>=l;i--){
470 e[i+1] = (r=pythag(f,g));
479 r = (d[i] - g) * s + 2.0 * c * b;
480 d[i+1] = g + ( p = s * r);
482 for(int k=0;k<n;k++){
484 z[k][i+1] = s * z[k][i] + c * f;
485 z[k][i] = c * z[k][i] - s * f;
488 if(r == 0.00 && i >= l) continue;
497 for(int i=0;i<n;i++){
499 for(int j=i;j<n;j++){
507 for(int j=0;j<n;j++){
517 catch(exception& e) {
518 m->errorOut(e, "LinearAlgebra", "qtli");
522 /*********************************************************************************************************************************/
523 //groups by dimension
524 vector< vector<double> > LinearAlgebra::calculateEuclidianDistance(vector< vector<double> >& axes, int dimensions){
527 vector< vector<double> > dists; dists.resize(axes.size());
528 for (int i = 0; i < dists.size(); i++) { dists[i].resize(axes.size(), 0.0); }
530 if (dimensions == 1) { //one dimension calc = abs(x-y)
532 for (int i = 0; i < dists.size(); i++) {
534 if (m->control_pressed) { return dists; }
536 for (int j = 0; j < i; j++) {
537 dists[i][j] = abs(axes[i][0] - axes[j][0]);
538 dists[j][i] = dists[i][j];
542 }else if (dimensions > 1) { //two dimension calc = sqrt ((x1 - y1)^2 + (x2 - y2)^2)...
544 for (int i = 0; i < dists.size(); i++) {
546 if (m->control_pressed) { return dists; }
548 for (int j = 0; j < i; j++) {
550 for (int k = 0; k < dimensions; k++) {
551 sum += ((axes[i][k] - axes[j][k]) * (axes[i][k] - axes[j][k]));
554 dists[i][j] = sqrt(sum);
555 dists[j][i] = dists[i][j];
563 catch(exception& e) {
564 m->errorOut(e, "LinearAlgebra", "calculateEuclidianDistance");
568 /*********************************************************************************************************************************/
569 //returns groups by dimensions from dimensions by groups
570 vector< vector<double> > LinearAlgebra::calculateEuclidianDistance(vector< vector<double> >& axes){
573 vector< vector<double> > dists; dists.resize(axes[0].size());
574 for (int i = 0; i < dists.size(); i++) { dists[i].resize(axes[0].size(), 0.0); }
576 if (axes.size() == 1) { //one dimension calc = abs(x-y)
578 for (int i = 0; i < dists.size(); i++) {
580 if (m->control_pressed) { return dists; }
582 for (int j = 0; j < i; j++) {
583 dists[i][j] = abs(axes[0][i] - axes[0][j]);
584 dists[j][i] = dists[i][j];
588 }else if (axes.size() > 1) { //two dimension calc = sqrt ((x1 - y1)^2 + (x2 - y2)^2)...
590 for (int i = 0; i < dists[0].size(); i++) {
592 if (m->control_pressed) { return dists; }
594 for (int j = 0; j < i; j++) {
596 for (int k = 0; k < axes.size(); k++) {
597 sum += ((axes[k][i] - axes[k][j]) * (axes[k][i] - axes[k][j]));
600 dists[i][j] = sqrt(sum);
601 dists[j][i] = dists[i][j];
609 catch(exception& e) {
610 m->errorOut(e, "LinearAlgebra", "calculateEuclidianDistance");
614 /*********************************************************************************************************************************/
615 //assumes both matrices are square and the same size
616 double LinearAlgebra::calcPearson(vector< vector<double> >& euclidDists, vector< vector<double> >& userDists){
619 //find average for - X
621 float averageEuclid = 0.0;
622 for (int i = 0; i < euclidDists.size(); i++) {
623 for (int j = 0; j < i; j++) {
624 averageEuclid += euclidDists[i][j];
628 averageEuclid = averageEuclid / (float) count;
630 //find average for - Y
632 float averageUser = 0.0;
633 for (int i = 0; i < userDists.size(); i++) {
634 for (int j = 0; j < i; j++) {
635 averageUser += userDists[i][j];
639 averageUser = averageUser / (float) count;
641 double numerator = 0.0;
642 double denomTerm1 = 0.0;
643 double denomTerm2 = 0.0;
645 for (int i = 0; i < euclidDists.size(); i++) {
647 for (int k = 0; k < i; k++) { //just lt dists
649 float Yi = userDists[i][k];
650 float Xi = euclidDists[i][k];
652 numerator += ((Xi - averageEuclid) * (Yi - averageUser));
653 denomTerm1 += ((Xi - averageEuclid) * (Xi - averageEuclid));
654 denomTerm2 += ((Yi - averageUser) * (Yi - averageUser));
658 double denom = (sqrt(denomTerm1) * sqrt(denomTerm2));
659 double r = numerator / denom;
661 //divide by zero error
662 if (isnan(r) || isinf(r)) { r = 0.0; }
667 catch(exception& e) {
668 m->errorOut(e, "LinearAlgebra", "calcPearson");
672 /*********************************************************************************************************************************/
673 //assumes both matrices are square and the same size
674 double LinearAlgebra::calcSpearman(vector< vector<double> >& euclidDists, vector< vector<double> >& userDists){
679 map<float, int> tableX;
680 map<float, int>::iterator itTable;
681 vector<spearmanRank> scores;
683 for (int i = 0; i < euclidDists.size(); i++) {
684 for (int j = 0; j < i; j++) {
685 spearmanRank member(toString(scores.size()), euclidDists[i][j]);
686 scores.push_back(member);
688 //count number of repeats
689 itTable = tableX.find(euclidDists[i][j]);
690 if (itTable == tableX.end()) {
691 tableX[euclidDists[i][j]] = 1;
693 tableX[euclidDists[i][j]]++;
699 sort(scores.begin(), scores.end(), compareSpearman);
703 for (itTable = tableX.begin(); itTable != tableX.end(); itTable++) {
704 double tx = (double) itTable->second;
705 Lx += ((pow(tx, 3.0) - tx) / 12.0);
709 map<string, float> rankEuclid;
710 vector<spearmanRank> ties;
712 for (int j = 0; j < scores.size(); j++) {
714 ties.push_back(scores[j]);
716 if (j != (scores.size()-1)) { // you are not the last so you can look ahead
717 if (scores[j].score != scores[j+1].score) { // you are done with ties, rank them and continue
719 for (int k = 0; k < ties.size(); k++) {
720 float thisrank = rankTotal / (float) ties.size();
721 rankEuclid[ties[k].name] = thisrank;
726 }else { // you are the last one
728 for (int k = 0; k < ties.size(); k++) {
729 float thisrank = rankTotal / (float) ties.size();
730 rankEuclid[ties[k].name] = thisrank;
737 map<float, int> tableY;
740 for (int i = 0; i < userDists.size(); i++) {
741 for (int j = 0; j < i; j++) {
742 spearmanRank member(toString(scores.size()), userDists[i][j]);
743 scores.push_back(member);
745 //count number of repeats
746 itTable = tableY.find(userDists[i][j]);
747 if (itTable == tableY.end()) {
748 tableY[userDists[i][j]] = 1;
750 tableY[userDists[i][j]]++;
756 sort(scores.begin(), scores.end(), compareSpearman);
760 for (itTable = tableY.begin(); itTable != tableY.end(); itTable++) {
761 double ty = (double) itTable->second;
762 Ly += ((pow(ty, 3.0) - ty) / 12.0);
766 map<string, float> rankUser;
769 for (int j = 0; j < scores.size(); j++) {
771 ties.push_back(scores[j]);
773 if (j != (scores.size()-1)) { // you are not the last so you can look ahead
774 if (scores[j].score != scores[j+1].score) { // you are done with ties, rank them and continue
776 for (int k = 0; k < ties.size(); k++) {
777 float thisrank = rankTotal / (float) ties.size();
778 rankUser[ties[k].name] = thisrank;
783 }else { // you are the last one
785 for (int k = 0; k < ties.size(); k++) {
786 float thisrank = rankTotal / (float) ties.size();
787 rankUser[ties[k].name] = thisrank;
795 for (int i = 0; i < userDists.size(); i++) {
796 for (int j = 0; j < i; j++) {
798 float xi = rankEuclid[toString(count)];
799 float yi = rankUser[toString(count)];
801 di += ((xi - yi) * (xi - yi));
807 double n = (double) count;
809 double SX2 = ((pow(n, 3.0) - n) / 12.0) - Lx;
810 double SY2 = ((pow(n, 3.0) - n) / 12.0) - Ly;
812 r = (SX2 + SY2 - di) / (2.0 * sqrt((SX2*SY2)));
814 //divide by zero error
815 if (isnan(r) || isinf(r)) { r = 0.0; }
820 catch(exception& e) {
821 m->errorOut(e, "LinearAlgebra", "calcSpearman");
826 /*********************************************************************************************************************************/
827 //assumes both matrices are square and the same size
828 double LinearAlgebra::calcKendall(vector< vector<double> >& euclidDists, vector< vector<double> >& userDists){
833 vector<spearmanRank> scores;
834 for (int i = 0; i < euclidDists.size(); i++) {
835 for (int j = 0; j < i; j++) {
836 spearmanRank member(toString(scores.size()), euclidDists[i][j]);
837 scores.push_back(member);
842 sort(scores.begin(), scores.end(), compareSpearman);
845 map<string, float> rankEuclid;
846 vector<spearmanRank> ties;
848 for (int j = 0; j < scores.size(); j++) {
850 ties.push_back(scores[j]);
852 if (j != (scores.size()-1)) { // you are not the last so you can look ahead
853 if (scores[j].score != scores[j+1].score) { // you are done with ties, rank them and continue
855 for (int k = 0; k < ties.size(); k++) {
856 float thisrank = rankTotal / (float) ties.size();
857 rankEuclid[ties[k].name] = thisrank;
862 }else { // you are the last one
864 for (int k = 0; k < ties.size(); k++) {
865 float thisrank = rankTotal / (float) ties.size();
866 rankEuclid[ties[k].name] = thisrank;
871 vector<spearmanRank> scoresUser;
872 for (int i = 0; i < userDists.size(); i++) {
873 for (int j = 0; j < i; j++) {
874 spearmanRank member(toString(scoresUser.size()), userDists[i][j]);
875 scoresUser.push_back(member);
880 sort(scoresUser.begin(), scoresUser.end(), compareSpearman);
883 map<string, float> rankUser;
886 for (int j = 0; j < scoresUser.size(); j++) {
888 ties.push_back(scoresUser[j]);
890 if (j != (scoresUser.size()-1)) { // you are not the last so you can look ahead
891 if (scoresUser[j].score != scoresUser[j+1].score) { // you are done with ties, rank them and continue
893 for (int k = 0; k < ties.size(); k++) {
894 float thisrank = rankTotal / (float) ties.size();
895 rankUser[ties[k].name] = thisrank;
900 }else { // you are the last one
902 for (int k = 0; k < ties.size(); k++) {
903 float thisrank = rankTotal / (float) ties.size();
904 rankUser[ties[k].name] = thisrank;
913 vector<spearmanRank> user;
914 for (int l = 0; l < scores.size(); l++) {
915 spearmanRank member(scores[l].name, rankUser[scores[l].name]);
916 user.push_back(member);
920 for (int l = 0; l < scores.size(); l++) {
922 int numWithHigherRank = 0;
923 int numWithLowerRank = 0;
924 float thisrank = user[l].score;
926 for (int u = l+1; u < scores.size(); u++) {
927 if (user[u].score > thisrank) { numWithHigherRank++; }
928 else if (user[u].score < thisrank) { numWithLowerRank++; }
932 numCoor += numWithHigherRank;
933 numDisCoor += numWithLowerRank;
936 r = (numCoor - numDisCoor) / (float) count;
938 //divide by zero error
939 if (isnan(r) || isinf(r)) { r = 0.0; }
944 catch(exception& e) {
945 m->errorOut(e, "LinearAlgebra", "calcKendall");
949 /*********************************************************************************************************************************/
950 double LinearAlgebra::calcKendall(vector<double>& x, vector<double>& y, double& sig){
952 if (x.size() != y.size()) { m->mothurOut("[ERROR]: vector size mismatch."); m->mothurOutEndLine(); return 0.0; }
955 vector<spearmanRank> xscores;
956 for (int i = 0; i < x.size(); i++) {
957 spearmanRank member(toString(i), x[i]);
958 xscores.push_back(member);
962 sort(xscores.begin(), xscores.end(), compareSpearman);
964 //convert scores to ranks of x
965 vector<spearmanRank*> ties;
967 for (int j = 0; j < xscores.size(); j++) {
969 ties.push_back(&(xscores[j]));
971 if (j != xscores.size()-1) { // you are not the last so you can look ahead
972 if (xscores[j].score != xscores[j+1].score) { // you are done with ties, rank them and continue
973 for (int k = 0; k < ties.size(); k++) {
974 float thisrank = rankTotal / (float) ties.size();
975 (*ties[k]).score = thisrank;
980 }else { // you are the last one
981 for (int k = 0; k < ties.size(); k++) {
982 float thisrank = rankTotal / (float) ties.size();
983 (*ties[k]).score = thisrank;
990 vector<spearmanRank> yscores;
991 for (int j = 0; j < y.size(); j++) {
992 spearmanRank member(toString(j), y[j]);
993 yscores.push_back(member);
997 sort(yscores.begin(), yscores.end(), compareSpearman);
1000 map<string, float> rank;
1001 vector<spearmanRank> yties;
1003 for (int j = 0; j < yscores.size(); j++) {
1005 yties.push_back(yscores[j]);
1007 if (j != yscores.size()-1) { // you are not the last so you can look ahead
1008 if (yscores[j].score != yscores[j+1].score) { // you are done with ties, rank them and continue
1009 for (int k = 0; k < yties.size(); k++) {
1010 float thisrank = rankTotal / (float) yties.size();
1011 rank[yties[k].name] = thisrank;
1016 }else { // you are the last one
1017 for (int k = 0; k < yties.size(); k++) {
1018 float thisrank = rankTotal / (float) yties.size();
1019 rank[yties[k].name] = thisrank;
1029 vector<spearmanRank> otus;
1030 for (int l = 0; l < xscores.size(); l++) {
1031 spearmanRank member(xscores[l].name, rank[xscores[l].name]);
1032 otus.push_back(member);
1036 for (int l = 0; l < xscores.size(); l++) {
1038 int numWithHigherRank = 0;
1039 int numWithLowerRank = 0;
1040 float thisrank = otus[l].score;
1042 for (int u = l+1; u < xscores.size(); u++) {
1043 if (otus[u].score > thisrank) { numWithHigherRank++; }
1044 else if (otus[u].score < thisrank) { numWithLowerRank++; }
1048 numCoor += numWithHigherRank;
1049 numDisCoor += numWithLowerRank;
1052 double p = (numCoor - numDisCoor) / (float) count;
1054 sig = calcKendallSig(x.size(), p);
1058 catch(exception& e) {
1059 m->errorOut(e, "LinearAlgebra", "calcKendall");
1063 double LinearAlgebra::ran0(int& idum)
1065 const int IA=16807,IM=2147483647,IQ=127773;
1066 const int IR=2836,MASK=123459876;
1067 const double AM=1.0/double(IM);
1073 idum=IA*(idum-k*IQ)-IR*k;
1074 if (idum < 0) idum += IM;
1080 double LinearAlgebra::ran1(int &idum)
1082 const int IA=16807,IM=2147483647,IQ=127773,IR=2836,NTAB=32;
1083 const int NDIV=(1+(IM-1)/NTAB);
1084 const double EPS=3.0e-16,AM=1.0/IM,RNMX=(1.0-EPS);
1086 static vector<int> iv(NTAB);
1090 if (idum <= 0 || !iy) {
1091 if (-idum < 1) idum=1;
1093 for (j=NTAB+7;j>=0;j--) {
1095 idum=IA*(idum-k*IQ)-IR*k;
1096 if (idum < 0) idum += IM;
1097 if (j < NTAB) iv[j] = idum;
1102 idum=IA*(idum-k*IQ)-IR*k;
1103 if (idum < 0) idum += IM;
1107 if ((temp=AM*iy) > RNMX) return RNMX;
1111 double LinearAlgebra::ran2(int &idum)
1113 const int IM1=2147483563,IM2=2147483399;
1114 const int IA1=40014,IA2=40692,IQ1=53668,IQ2=52774;
1115 const int IR1=12211,IR2=3791,NTAB=32,IMM1=IM1-1;
1116 const int NDIV=1+IMM1/NTAB;
1117 const double EPS=3.0e-16,RNMX=1.0-EPS,AM=1.0/double(IM1);
1118 static int idum2=123456789,iy=0;
1119 static vector<int> iv(NTAB);
1124 idum=(idum==0 ? 1 : -idum);
1126 for (j=NTAB+7;j>=0;j--) {
1128 idum=IA1*(idum-k*IQ1)-k*IR1;
1129 if (idum < 0) idum += IM1;
1130 if (j < NTAB) iv[j] = idum;
1135 idum=IA1*(idum-k*IQ1)-k*IR1;
1136 if (idum < 0) idum += IM1;
1138 idum2=IA2*(idum2-k*IQ2)-k*IR2;
1139 if (idum2 < 0) idum2 += IM2;
1143 if (iy < 1) iy += IMM1;
1144 if ((temp=AM*iy) > RNMX) return RNMX;
1148 double LinearAlgebra::ran3(int &idum)
1150 static int inext,inextp;
1152 const int MBIG=1000000000,MSEED=161803398,MZ=0;
1153 const double FAC=(1.0/MBIG);
1154 static vector<int> ma(56);
1157 if (idum < 0 || iff == 0) {
1159 mj=labs(MSEED-labs(idum));
1163 for (i=1;i<=54;i++) {
1167 if (mk < int(MZ)) mk += MBIG;
1171 for (i=1;i<=55;i++) {
1172 ma[i] -= ma[1+(i+30) % 55];
1173 if (ma[i] < int(MZ)) ma[i] += MBIG;
1179 if (++inext == 56) inext=1;
1180 if (++inextp == 56) inextp=1;
1181 mj=ma[inext]-ma[inextp];
1182 if (mj < int(MZ)) mj += MBIG;
1187 double LinearAlgebra::ran4(int &idum)
1189 #if defined(vax) || defined(_vax_) || defined(__vax__) || defined(VAX)
1190 static const unsigned long jflone = 0x00004080;
1191 static const unsigned long jflmsk = 0xffff007f;
1193 static const unsigned long jflone = 0x3f800000;
1194 static const unsigned long jflmsk = 0x007fffff;
1196 unsigned long irword,itemp,lword;
1197 static int idums = 0;
1205 psdes(lword,irword);
1206 itemp=jflone | (jflmsk & irword);
1208 return (*(float *)&itemp)-1.0;
1211 void LinearAlgebra::psdes(unsigned long &lword, unsigned long &irword)
1214 static const unsigned long c1[NITER]={
1215 0xbaa96887L, 0x1e17d32cL, 0x03bcdc3cL, 0x0f33d1b2L};
1216 static const unsigned long c2[NITER]={
1217 0x4b0f3b58L, 0xe874f0c3L, 0x6955c5a6L, 0x55a7ca46L};
1218 unsigned long i,ia,ib,iswap,itmph=0,itmpl=0;
1220 for (i=0;i<NITER;i++) {
1221 ia=(iswap=irword) ^ c1[i];
1222 itmpl = ia & 0xffff;
1224 ib=itmpl*itmpl+ ~(itmph*itmph);
1225 irword=lword ^ (((ia = (ib >> 16) |
1226 ((ib & 0xffff) << 16)) ^ c2[i])+itmpl*itmph);
1230 /*********************************************************************************************************************************/
1231 double LinearAlgebra::calcKendallSig(double n, double r){
1235 double svar=(4.0*n+10.0)/(9.0*n*(n-1.0));
1236 double z= r/sqrt(svar);
1237 sig=erfcc(fabs(z)/1.4142136);
1239 if (isnan(sig) || isinf(sig)) { sig = 0.0; }
1243 catch(exception& e) {
1244 m->errorOut(e, "LinearAlgebra", "calcKendallSig");
1248 /*********************************************************************************************************************************/
1249 double LinearAlgebra::calcKruskalWallis(vector<spearmanRank>& values, double& pValue){
1252 set<string> treatments;
1255 sort(values.begin(), values.end(), compareSpearman);
1256 vector<spearmanRank*> ties;
1259 for (int j = 0; j < values.size(); j++) {
1260 treatments.insert(values[j].name);
1262 ties.push_back(&(values[j]));
1264 if (j != values.size()-1) { // you are not the last so you can look ahead
1265 if (values[j].score != values[j+1].score) { // you are done with ties, rank them and continue
1266 if (ties.size() > 1) { TIES.push_back(ties.size()); }
1267 for (int k = 0; k < ties.size(); k++) {
1268 double thisrank = rankTotal / (double) ties.size();
1269 (*ties[k]).score = thisrank;
1274 }else { // you are the last one
1275 if (ties.size() > 1) { TIES.push_back(ties.size()); }
1276 for (int k = 0; k < ties.size(); k++) {
1277 double thisrank = rankTotal / (double) ties.size();
1278 (*ties[k]).score = thisrank;
1284 // H = 12/(N*(N+1)) * (sum Ti^2/n) - 3(N+1)
1285 map<string, double> sums;
1286 map<string, double> counts;
1287 for (set<string>::iterator it = treatments.begin(); it != treatments.end(); it++) { sums[*it] = 0.0; counts[*it] = 0; }
1289 for (int j = 0; j < values.size(); j++) {
1290 sums[values[j].name] += values[j].score;
1291 counts[values[j].name]+= 1.0;
1294 double middleTerm = 0.0;
1295 for (set<string>::iterator it = treatments.begin(); it != treatments.end(); it++) {
1296 middleTerm += ((sums[*it]*sums[*it])/counts[*it]);
1299 double firstTerm = 12 / (double) (values.size()*(values.size()+1));
1300 double lastTerm = 3 * (values.size()+1);
1302 H = firstTerm * middleTerm - lastTerm;
1305 if (TIES.size() != 0) {
1307 for (int j = 0; j < TIES.size(); j++) { sum += ((TIES[j]*TIES[j]*TIES[j])-TIES[j]); }
1308 double result = 1.0 - (sum / (double) ((values.size()*values.size()*values.size())-values.size()));
1312 //Numerical Recipes pg221
1313 pValue = 1.0 - (gammp(((treatments.size()-1)/(double)2.0), H/2.0));
1317 catch(exception& e) {
1318 m->errorOut(e, "LinearAlgebra", "calcKruskalWallis");
1322 /*********************************************************************************************************************************/
1323 //python random.normalvariate - thanks http://madssj.com/blog/2008/05/07/porting-normalvariate-from-python-to-c/
1324 double LinearAlgebra::normalvariate(double mu, double sigma) {
1326 double NV_MAGICCONST = 1.7155277699214135; /* (4 * exp(-0.5) / sqrt(2.0)); */
1327 unsigned long int MAX_RANDOM = 2147483647; /* (2 ** 31) - 1; */
1329 double u1, u2, z, zz;
1331 if (m->control_pressed) { break; }
1332 u1 = ((float)random()) / MAX_RANDOM;
1333 u2 = 1.0 - (((float)random()) / MAX_RANDOM);
1334 z = NV_MAGICCONST * (u1 - 0.5) / u2;
1336 if (zz <= -(log(u2))) {
1340 return mu + z * sigma;
1342 catch(exception& e) {
1343 m->errorOut(e, "LinearAlgebra", "normalvariate");
1347 /*********************************************************************************************************************************/
1348 //thanks http://www.johndcook.com/cpp_phi.html
1349 double LinearAlgebra::pnorm(double x){
1352 double a1 = 0.254829592;
1353 double a2 = -0.284496736;
1354 double a3 = 1.421413741;
1355 double a4 = -1.453152027;
1356 double a5 = 1.061405429;
1357 double p = 0.3275911;
1359 // Save the sign of x
1363 x = fabs(x)/sqrt(2.0);
1365 // A&S formula 7.1.26
1366 double t = 1.0/(1.0 + p*x);
1367 double y = 1.0 - (((((a5*t + a4)*t) + a3)*t + a2)*t + a1)*t*exp(-x*x);
1369 return 0.5*(1.0 + sign*y);
1371 catch(exception& e) {
1372 m->errorOut(e, "LinearAlgebra", "pnorm");
1377 /*********************************************************************************************************************************/
1378 double LinearAlgebra::calcWilcoxon(vector<double>& x, vector<double>& y, double& sig){
1383 vector<spearmanRank> ranks;
1384 for (int i = 0; i < x.size(); i++) {
1385 if (m->control_pressed) { return W; }
1386 spearmanRank member("x", x[i]);
1387 ranks.push_back(member);
1390 for (int i = 0; i < y.size(); i++) {
1391 if (m->control_pressed) { return W; }
1392 spearmanRank member("y", y[i]);
1393 ranks.push_back(member);
1397 sort(ranks.begin(), ranks.end(), compareSpearman);
1399 //convert scores to ranks of x
1400 vector<spearmanRank*> ties;
1403 for (int j = 0; j < ranks.size(); j++) {
1404 if (m->control_pressed) { return W; }
1406 ties.push_back(&(ranks[j]));
1408 if (j != ranks.size()-1) { // you are not the last so you can look ahead
1409 if (ranks[j].score != ranks[j+1].score) { // you are done with ties, rank them and continue
1410 if (ties.size() > 1) { TIES.push_back(ties.size()); }
1411 for (int k = 0; k < ties.size(); k++) {
1412 float thisrank = rankTotal / (float) ties.size();
1413 (*ties[k]).score = thisrank;
1418 }else { // you are the last one
1419 if (ties.size() > 1) { TIES.push_back(ties.size()); }
1420 for (int k = 0; k < ties.size(); k++) {
1421 float thisrank = rankTotal / (float) ties.size();
1422 (*ties[k]).score = thisrank;
1427 //from R wilcox.test function
1428 //STATISTIC <- sum(r[seq_along(x)]) - n.x * (n.x + 1)/2
1429 double sumRanks = 0.0;
1430 for (int i = 0; i < ranks.size(); i++) {
1431 if (m->control_pressed) { return W; }
1432 if (ranks[i].name == "x") { sumRanks += ranks[i].score; }
1435 W = sumRanks - x.size() * ((double)(x.size() + 1)) / 2.0;
1437 //exact <- (n.x < 50) && (n.y < 50)
1438 bool findExact = false;
1439 if ((x.size() < 50) && (y.size() < 50)) { findExact = true; }
1442 if (findExact && (TIES.size() == 0)) { //find exact and no ties
1443 //PVAL <- switch(alternative, two.sided = {
1444 //p <- if (STATISTIC > (n.x * n.y/2))
1447 if (W > ((double)x.size()*y.size()/2.0)) {
1448 //pwilcox(STATISTIC-1, n.x, n.y, lower.tail = FALSE)
1449 pval = wilcox.pwilcox(W-1, x.size(), y.size(), false);
1451 //pwilcox(STATISTIC,n.x, n.y)
1452 pval = wilcox.pwilcox(W, x.size(), y.size(), true);
1455 if (1.0 < sig) { sig = 1.0; }
1457 //z <- STATISTIC - n.x * n.y/2
1458 double z = W - (double)(x.size() * y.size()/2.0);
1461 for (int j = 0; j < TIES.size(); j++) { sum += ((TIES[j]*TIES[j]*TIES[j])-TIES[j]); }
1463 //SIGMA <- sqrt((n.x * n.y/12) * ((n.x + n.y + 1) -
1464 //sum(NTIES^3 - NTIES)/((n.x + n.y) * (n.x + n.y -
1467 double firstTerm = (double)(x.size() * y.size()/12.0);
1468 double secondTerm = (double)(x.size() + y.size() + 1) - sum / (double)((x.size() + y.size()) * (x.size() + y.size() - 1));
1469 sigma = sqrt(firstTerm * secondTerm);
1471 //CORRECTION <- switch(alternative, two.sided = sign(z) * 0.5, greater = 0.5, less = -0.5)
1472 double CORRECTION = 0.0;
1473 if (z < 0) { CORRECTION = -1.0; }
1474 else if (z > 0) { CORRECTION = 1.0; }
1477 z = (z - CORRECTION)/sigma;
1479 //PVAL <- switch(alternative, two.sided = 2 * min(pnorm(z), pnorm(z, lower.tail = FALSE)))
1481 if ((1.0-sig) < sig) { sig = 1.0 - sig; }
1487 catch(exception& e) {
1488 m->errorOut(e, "LinearAlgebra", "calcWilcoxon");
1493 /*********************************************************************************************************************************/
1494 double LinearAlgebra::choose(double n, double k){
1499 double lchoose = gammln(n + 1.0) - gammln(k + 1.0) - gammln(n - k + 1.0);
1501 return (floor(exp(lchoose) + 0.5));
1503 catch(exception& e) {
1504 m->errorOut(e, "LinearAlgebra", "choose");
1508 /*********************************************************************************************************************************/
1509 double LinearAlgebra::calcSpearman(vector<double>& x, vector<double>& y, double& sig){
1511 if (x.size() != y.size()) { m->mothurOut("[ERROR]: vector size mismatch."); m->mothurOutEndLine(); return 0.0; }
1514 double sf = 0.0; //f^3 - f where f is the number of ties in x;
1515 double sg = 0.0; //f^3 - f where f is the number of ties in y;
1516 map<float, int> tableX;
1517 map<float, int>::iterator itTable;
1518 vector<spearmanRank> xscores;
1520 for (int i = 0; i < x.size(); i++) {
1521 spearmanRank member(toString(i), x[i]);
1522 xscores.push_back(member);
1524 //count number of repeats
1525 itTable = tableX.find(x[i]);
1526 if (itTable == tableX.end()) {
1536 for (itTable = tableX.begin(); itTable != tableX.end(); itTable++) {
1537 double tx = (double) itTable->second;
1538 Lx += ((pow(tx, 3.0) - tx) / 12.0);
1543 sort(xscores.begin(), xscores.end(), compareSpearman);
1545 //convert scores to ranks of x
1547 map<string, float> rankx;
1548 vector<spearmanRank> xties;
1550 for (int j = 0; j < xscores.size(); j++) {
1552 xties.push_back(xscores[j]);
1554 if (j != xscores.size()-1) { // you are not the last so you can look ahead
1555 if (xscores[j].score != xscores[j+1].score) { // you are done with ties, rank them and continue
1556 for (int k = 0; k < xties.size(); k++) {
1557 float thisrank = rankTotal / (float) xties.size();
1558 rankx[xties[k].name] = thisrank;
1560 int t = xties.size();
1565 }else { // you are the last one
1566 for (int k = 0; k < xties.size(); k++) {
1567 float thisrank = rankTotal / (float) xties.size();
1568 rankx[xties[k].name] = thisrank;
1574 vector<spearmanRank> yscores;
1575 map<float, int> tableY;
1576 for (int j = 0; j < y.size(); j++) {
1577 spearmanRank member(toString(j), y[j]);
1578 yscores.push_back(member);
1580 itTable = tableY.find(member.score);
1581 if (itTable == tableY.end()) {
1582 tableY[member.score] = 1;
1584 tableY[member.score]++;
1591 for (itTable = tableY.begin(); itTable != tableY.end(); itTable++) {
1592 double ty = (double) itTable->second;
1593 Ly += ((pow(ty, 3.0) - ty) / 12.0);
1596 sort(yscores.begin(), yscores.end(), compareSpearman);
1599 map<string, float> rank;
1600 vector<spearmanRank> yties;
1602 for (int j = 0; j < yscores.size(); j++) {
1604 yties.push_back(yscores[j]);
1606 if (j != yscores.size()-1) { // you are not the last so you can look ahead
1607 if (yscores[j].score != yscores[j+1].score) { // you are done with ties, rank them and continue
1608 for (int k = 0; k < yties.size(); k++) {
1609 float thisrank = rankTotal / (float) yties.size();
1610 rank[yties[k].name] = thisrank;
1612 int t = yties.size();
1617 }else { // you are the last one
1618 for (int k = 0; k < yties.size(); k++) {
1619 float thisrank = rankTotal / (float) yties.size();
1620 rank[yties[k].name] = thisrank;
1626 for (int k = 0; k < x.size(); k++) {
1628 float xi = rankx[toString(k)];
1629 float yi = rank[toString(k)];
1631 di += ((xi - yi) * (xi - yi));
1636 double n = (double) x.size();
1637 double SX2 = ((pow(n, 3.0) - n) / 12.0) - Lx;
1638 double SY2 = ((pow(n, 3.0) - n) / 12.0) - Ly;
1640 p = (SX2 + SY2 - di) / (2.0 * sqrt((SX2*SY2)));
1642 //Numerical Recipes 646
1643 sig = calcSpearmanSig(n, sf, sg, di);
1647 catch(exception& e) {
1648 m->errorOut(e, "LinearAlgebra", "calcSpearman");
1652 /*********************************************************************************************************************************/
1653 double LinearAlgebra::calcSpearmanSig(double n, double sf, double sg, double d){
1657 double probrs = 0.0;
1659 double en3n=en*en*en-en;
1660 double aved=en3n/6.0-(sf+sg)/12.0;
1661 double fac=(1.0-sf/en3n)*(1.0-sg/en3n);
1662 double vard=((en-1.0)*en*en*SQR(en+1.0)/36.0)*fac;
1663 double zd=(d-aved)/sqrt(vard);
1664 double probd=erfcc(fabs(zd)/1.4142136);
1665 double rs=(1.0-(6.0/en3n)*(d+(sf+sg)/12.0))/sqrt(fac);
1666 fac=(rs+1.0)*(1.0-rs);
1668 double t=rs*sqrt((en-2.0)/fac);
1670 probrs=betai(0.5*df,0.5,df/(df+t*t));
1675 //smaller of probd and probrs is sig
1677 if (probd < probrs) { sig = probd; }
1679 if (isnan(sig) || isinf(sig)) { sig = 0.0; }
1683 catch(exception& e) {
1684 m->errorOut(e, "LinearAlgebra", "calcSpearmanSig");
1688 /*********************************************************************************************************************************/
1689 double LinearAlgebra::calcPearson(vector<double>& x, vector<double>& y, double& sig){
1691 if (x.size() != y.size()) { m->mothurOut("[ERROR]: vector size mismatch."); m->mothurOutEndLine(); return 0.0; }
1694 float averageX = 0.0;
1695 for (int i = 0; i < x.size(); i++) { averageX += x[i]; }
1696 averageX = averageX / (float) x.size();
1700 for (int j = 0; j < y.size(); j++) { sumY += y[j]; }
1701 float Ybar = sumY / (float) y.size();
1704 double numerator = 0.0;
1705 double denomTerm1 = 0.0;
1706 double denomTerm2 = 0.0;
1708 for (int j = 0; j < x.size(); j++) {
1712 numerator += ((Xi - averageX) * (Yi - Ybar));
1713 denomTerm1 += ((Xi - averageX) * (Xi - averageX));
1714 denomTerm2 += ((Yi - Ybar) * (Yi - Ybar));
1717 double denom = (sqrt(denomTerm1) * sqrt(denomTerm2));
1719 r = numerator / denom;
1721 //Numerical Recipes pg.644
1722 sig = calcPearsonSig(x.size(), r);
1726 catch(exception& e) {
1727 m->errorOut(e, "LinearAlgebra", "calcPearson");
1731 /*********************************************************************************************************************************/
1732 double LinearAlgebra::calcPearsonSig(double n, double r){
1736 const double TINY = 1.0e-20;
1737 double z = 0.5*log((1.0+r+TINY)/(1.0-r+TINY)); //Fisher's z transformation
1739 //code below was giving an error in betacf with sop files
1741 //double t = r*sqrt(df/((1.0-r+TINY)*(1.0+r+TINY)));
1742 //sig = betai(0.5+df, 0.5, df/(df+t*t));
1744 //Numerical Recipes says code below gives approximately the same result
1745 sig = erfcc(fabs(z*sqrt(n-1.0))/1.4142136);
1746 if (isnan(sig) || isinf(sig)) { sig = 0.0; }
1750 catch(exception& e) {
1751 m->errorOut(e, "LinearAlgebra", "calcPearsonSig");
1755 /*********************************************************************************************************************************/
1757 vector<vector<double> > LinearAlgebra::getObservedEuclideanDistance(vector<vector<double> >& relAbundData){
1760 int numSamples = relAbundData.size();
1761 int numOTUs = relAbundData[0].size();
1763 vector<vector<double> > dMatrix(numSamples);
1764 for(int i=0;i<numSamples;i++){
1765 dMatrix[i].resize(numSamples);
1768 for(int i=0;i<numSamples;i++){
1769 for(int j=0;j<numSamples;j++){
1771 if (m->control_pressed) { return dMatrix; }
1774 for(int k=0;k<numOTUs;k++){
1775 d += pow((relAbundData[i][k] - relAbundData[j][k]), 2.0000);
1777 dMatrix[i][j] = pow(d, 0.50000);
1778 dMatrix[j][i] = dMatrix[i][j];
1784 catch(exception& e) {
1785 m->errorOut(e, "LinearAlgebra", "getObservedEuclideanDistance");
1790 /*********************************************************************************************************************************/
1791 vector<double> LinearAlgebra::solveEquations(vector<vector<double> > A, vector<double> b){
1793 int length = (int)b.size();
1794 vector<double> x(length, 0);
1795 vector<int> index(length);
1796 for(int i=0;i<length;i++){ index[i] = i; }
1799 ludcmp(A, index, d); if (m->control_pressed) { return b; }
1800 lubksb(A, index, b);
1804 catch(exception& e) {
1805 m->errorOut(e, "LinearAlgebra", "solveEquations");
1809 /*********************************************************************************************************************************/
1810 vector<float> LinearAlgebra::solveEquations(vector<vector<float> > A, vector<float> b){
1812 int length = (int)b.size();
1813 vector<double> x(length, 0);
1814 vector<int> index(length);
1815 for(int i=0;i<length;i++){ index[i] = i; }
1818 ludcmp(A, index, d); if (m->control_pressed) { return b; }
1819 lubksb(A, index, b);
1823 catch(exception& e) {
1824 m->errorOut(e, "LinearAlgebra", "solveEquations");
1829 /*********************************************************************************************************************************/
1831 void LinearAlgebra::ludcmp(vector<vector<double> >& A, vector<int>& index, double& d){
1833 double tiny = 1e-20;
1835 int n = (int)A.size();
1836 vector<double> vv(n, 0.0);
1842 for(int i=0;i<n;i++){
1844 for(int j=0;j<n;j++){ if((temp=fabs(A[i][j])) > big ) big=temp; }
1845 if(big==0.0){ m->mothurOut("Singular matrix in routine ludcmp\n"); }
1849 for(int j=0;j<n;j++){
1850 if (m->control_pressed) { break; }
1851 for(int i=0;i<j;i++){
1852 double sum = A[i][j];
1853 for(int k=0;k<i;k++){ sum -= A[i][k] * A[k][j]; }
1858 for(int i=j;i<n;i++){
1859 double sum = A[i][j];
1860 for(int k=0;k<j;k++){ sum -= A[i][k] * A[k][j]; }
1863 if((dum = vv[i] * fabs(sum)) >= big){
1869 for(int k=0;k<n;k++){
1870 double dum = A[imax][k];
1871 A[imax][k] = A[j][k];
1879 if(A[j][j] == 0.0){ A[j][j] = tiny; }
1882 double dum = 1.0/A[j][j];
1883 for(int i=j+1;i<n;i++){ A[i][j] *= dum; }
1887 catch(exception& e) {
1888 m->errorOut(e, "LinearAlgebra", "ludcmp");
1893 /*********************************************************************************************************************************/
1895 void LinearAlgebra::lubksb(vector<vector<double> >& A, vector<int>& index, vector<double>& b){
1898 int n = (int)A.size();
1901 for(int i=0;i<n;i++){
1902 if (m->control_pressed) { break; }
1908 for(int j=ii-1;j<i;j++){
1909 total -= A[i][j] * b[j];
1912 else if(total != 0){ ii = i+1; }
1915 for(int i=n-1;i>=0;i--){
1917 for(int j=i+1;j<n;j++){ total -= A[i][j] * b[j]; }
1918 b[i] = total / A[i][i];
1921 catch(exception& e) {
1922 m->errorOut(e, "LinearAlgebra", "lubksb");
1926 /*********************************************************************************************************************************/
1928 void LinearAlgebra::ludcmp(vector<vector<float> >& A, vector<int>& index, float& d){
1930 double tiny = 1e-20;
1932 int n = (int)A.size();
1933 vector<float> vv(n, 0.0);
1939 for(int i=0;i<n;i++){
1941 for(int j=0;j<n;j++){ if((temp=fabs(A[i][j])) > big ) big=temp; }
1942 if(big==0.0){ m->mothurOut("Singular matrix in routine ludcmp\n"); }
1946 for(int j=0;j<n;j++){
1947 if (m->control_pressed) { break; }
1948 for(int i=0;i<j;i++){
1949 float sum = A[i][j];
1950 for(int k=0;k<i;k++){ sum -= A[i][k] * A[k][j]; }
1955 for(int i=j;i<n;i++){
1956 float sum = A[i][j];
1957 for(int k=0;k<j;k++){ sum -= A[i][k] * A[k][j]; }
1960 if((dum = vv[i] * fabs(sum)) >= big){
1966 for(int k=0;k<n;k++){
1967 float dum = A[imax][k];
1968 A[imax][k] = A[j][k];
1976 if(A[j][j] == 0.0){ A[j][j] = tiny; }
1979 float dum = 1.0/A[j][j];
1980 for(int i=j+1;i<n;i++){ A[i][j] *= dum; }
1984 catch(exception& e) {
1985 m->errorOut(e, "LinearAlgebra", "ludcmp");
1990 /*********************************************************************************************************************************/
1992 void LinearAlgebra::lubksb(vector<vector<float> >& A, vector<int>& index, vector<float>& b){
1995 int n = (int)A.size();
1998 for(int i=0;i<n;i++){
1999 if (m->control_pressed) { break; }
2005 for(int j=ii-1;j<i;j++){
2006 total -= A[i][j] * b[j];
2009 else if(total != 0){ ii = i+1; }
2012 for(int i=n-1;i>=0;i--){
2014 for(int j=i+1;j<n;j++){ total -= A[i][j] * b[j]; }
2015 b[i] = total / A[i][i];
2018 catch(exception& e) {
2019 m->errorOut(e, "LinearAlgebra", "lubksb");
2024 /*********************************************************************************************************************************/
2026 vector<vector<double> > LinearAlgebra::getInverse(vector<vector<double> > matrix){
2028 int n = (int)matrix.size();
2030 vector<vector<double> > inverse(n);
2031 for(int i=0;i<n;i++){ inverse[i].assign(n, 0.0000); }
2033 vector<double> column(n, 0.0000);
2034 vector<int> index(n, 0);
2037 ludcmp(matrix, index, dummy);
2039 for(int j=0;j<n;j++){
2040 if (m->control_pressed) { break; }
2042 column.assign(n, 0);
2046 lubksb(matrix, index, column);
2048 for(int i=0;i<n;i++){ inverse[i][j] = column[i]; }
2053 catch(exception& e) {
2054 m->errorOut(e, "LinearAlgebra", "getInverse");
2058 /*********************************************************************************************************************************/
2059 //modelled R lda function - MASS:::lda.default
2060 vector< vector<double> > LinearAlgebra::lda(vector< vector<double> >& a, vector<string> groups, vector< vector<double> >& means, bool& ignore) {
2063 set<string> uniqueGroups;
2064 for (int i = 0; i < groups.size(); i++) { uniqueGroups.insert(groups[i]); }
2065 int numGroups = uniqueGroups.size();
2067 map<string, int> quickIndex; //className to index. hoping to save counts, proportions and means in vectors to save time. This map will allow us to know index 0 in counts refers to group1.
2069 for (set<string>::iterator it = uniqueGroups.begin(); it != uniqueGroups.end(); it++) { quickIndex[*it] = count; count++; }
2071 int numSampled = groups.size(); //number of sampled groups
2072 int numOtus = a.size(); //number of flagged bins
2074 //counts <- as.vector(table(g)) //number of samples from each class in random sampling
2075 vector<int> counts; counts.resize(numGroups, 0);
2076 for (int i = 0; i < groups.size(); i++) {
2077 counts[quickIndex[groups[i]]]++;
2080 vector<double> proportions; proportions.resize(numGroups, 0.0);
2081 for (int i = 0; i < numGroups; i++) { proportions[i] = counts[i] / (double) numSampled; }
2083 means.clear(); //means[0] -> means[0][0] average for [group0][OTU0].
2084 means.resize(numGroups); for (int i = 0; i < means.size(); i++) { means[i].resize(numOtus, 0.0); }
2085 for (int j = 0; j < numSampled; j++) { //total for each class for each OTU
2086 for (int i = 0; i < numOtus; i++) { means[quickIndex[groups[j]]][i] += a[i][j]; }
2088 //average for each class for each OTU
2089 for (int j = 0; j < numGroups; j++) { for (int i = 0; i < numOtus; i++) { means[j][i] /= counts[j]; } }
2091 //randCov <- x - group.means[g, ]
2092 vector< vector<double> > randCov; //randCov[0][0] -> (random sample value0 for OTU0 - average for samples group in OTU0). example OTU0, random sample 0.01 from class early. average of class early for OTU0 is 0.005. randCov[0][0] = (0.01-0.005)
2093 for (int i = 0; i < numOtus; i++) { //for each flagged OTU
2094 vector<double> tempRand;
2095 for (int j = 0; j < numSampled; j++) { tempRand.push_back(a[i][j] - means[quickIndex[groups[j]]][i]); }
2096 randCov.push_back(tempRand);
2099 //find variance and std for each OTU
2100 //f1 <- sqrt(diag(var(x - group.means[g, ])))
2101 vector<double> stdF1;
2103 for (int i = 0; i < numOtus; i++) {
2104 stdF1.push_back(0.0);
2105 ave.push_back(m->getAverage(randCov[i]));
2108 for (int i = 0; i < numOtus; i++) {
2109 for (int j = 0; j < numSampled; j++) { stdF1[i] += ((randCov[i][j] - ave[i]) * (randCov[i][j] - ave[i])); }
2113 double fac = 1 / (double) (numSampled-numGroups);
2115 for (int i = 0; i < stdF1.size(); i++) {
2116 stdF1[i] /= (double) (numSampled-1);
2117 stdF1[i] = sqrt(stdF1[i]);
2120 vector< vector<double> > scaling; //[numOTUS][numOTUS]
2121 for (int i = 0; i < numOtus; i++) {
2122 vector<double> temp;
2123 for (int j = 0; j < numOtus; j++) {
2124 if (i == j) { temp.push_back(1.0/stdF1[i]); }
2125 else { temp.push_back(0.0); }
2128 scaling.push_back(temp);
2131 cout << "scaling = " << endl;
2132 for (int i = 0; i < scaling.size(); i++) {
2133 for (int j = 0; j < scaling[i].size(); j++) { cout << scaling[i][j] << '\t'; }
2137 //X <- sqrt(fac) * ((x - group.means[g, ]) %*% scaling)
2138 vector< vector<double> > X = randCov; //[numOTUS][numSampled]
2139 //((x - group.means[g, ]) %*% scaling)
2140 //matrix multiplication of randCov and scaling
2141 LinearAlgebra linear;
2142 X = linear.matrix_mult(scaling, randCov); //[numOTUS][numOTUS] * [numOTUS][numSampled] = [numOTUS][numSampled]
2145 for (int i = 0; i < X.size(); i++) {
2146 for (int j = 0; j < X[i].size(); j++) { X[i][j] *= fac; }
2150 vector< vector<double> > v;
2151 vector< vector<double> > Xcopy; //X = [numOTUS][numSampled]
2152 bool transpose = false; //svd requires rows < columns, so if they are not then I need to transpose and look for the results in v.
2153 if (X.size() < X[0].size()) { Xcopy = linear.transpose(X); transpose=true; }
2155 linear.svd(Xcopy, d, v); //Xcopy gets the results we want for v below, because R's version is [numSampled][numOTUS]
2157 /*cout << "Xcopy = " << endl;
2158 for (int i = 0; i < Xcopy.size(); i++) {
2159 for (int j = 0; j < Xcopy[i].size(); j++) { cout << Xcopy[i][j] << '\t'; }
2162 cout << "v = " << endl;
2163 for (int i = 0; i < v.size(); i++) {
2164 for (int j = 0; j < v[i].size(); j++) { cout << v[i][j] << '\t'; }
2170 set<int> goodColumns;
2171 //cout << "d = " << endl;
2172 for (int i = 0; i < d.size(); i++) { if (d[i] > 0.0000000001) { rank++; goodColumns.insert(i); } } //cout << d[i] << endl;
2175 ignore=true; //m->mothurOut("[ERROR]: rank = 0: variables are numerically const\n"); m->control_pressed = true;
2178 //scaling <- scaling %*% X.s$v[, 1L:rank] %*% diag(1/X.s$d[1L:rank], , rank)
2179 //X.s$v[, 1L:rank] = columns in Xcopy that correspond to "good" d values
2180 //diag(1/X.s$d[1L:rank], , rank) = matrix size rank * rank where the diagonal is 1/"good" dvalues
2183 [1] 3.721545e+00 3.034607e+00 2.296649e+00 7.986927e-16 6.922408e-16
2187 [,1] [,2] [,3] [,4] [,5] [,6]
2188 [1,] 0.31122175 0.10944725 0.20183340 -0.30136820 0.60786235 -0.13537095
2189 [2,] -0.29563726 -0.20568893 0.11233366 -0.05073289 0.48234270 0.21965978
2192 [1] "X.s$v[, 1L:rank]"
2194 [1,] 0.31122175 0.10944725 0.20183340
2195 [2,] -0.29563726 -0.20568893 0.11233366
2197 [1] "1/X.s$d[1L:rank]"
2198 [1] 0.2687056 0.3295320 0.4354170
2200 [1] "diag(1/X.s$d[1L:rank], , rank)"
2202 [1,] 0.2687056 0.000000 0.000000
2203 [2,] 0.0000000 0.329532 0.000000
2204 [3,] 0.0000000 0.000000 0.435417
2207 Xcopy = linear.transpose(v);
2209 cout << "Xcopy = " << endl;
2210 for (int i = 0; i < Xcopy.size(); i++) {
2211 for (int j = 0; j < Xcopy[i].size(); j++) { cout << Xcopy[i][j] << '\t'; }
2215 v.clear(); //store "good" columns - X.s$v[, 1L:rank]
2216 v.resize(Xcopy.size()); //[numOTUS]["good" columns]
2217 for (set<int>::iterator it = goodColumns.begin(); it != goodColumns.end(); it++) {
2218 for (int i = 0; i < Xcopy.size(); i++) {
2219 v[i].push_back(Xcopy[i][*it]);
2223 vector< vector<double> > diagRanks; diagRanks.resize(rank);
2224 for (int i = 0; i < rank; i++) { diagRanks[i].resize(rank, 0.0); }
2226 for (set<int>::iterator it = goodColumns.begin(); it != goodColumns.end(); it++) { diagRanks[count][count] = 1.0 / d[*it]; count++; }
2228 scaling = linear.matrix_mult(linear.matrix_mult(scaling, v), diagRanks); //([numOTUS][numOTUS]*[numOTUS]["good" columns]) = [numOTUS]["good" columns] then ([numOTUS]["good" columns] * ["good" columns]["good" columns] = scaling = [numOTUS]["good" columns]
2230 /*cout << "scaling = " << endl;
2231 for (int i = 0; i < scaling.size(); i++) {
2232 for (int j = 0; j < scaling[i].size(); j++) { cout << scaling[i][j] << '\t'; }
2236 //Note: linear.matrix_mult [1][numGroups] * [numGroups][numOTUs] - columns in first must match rows in second, returns matrix[1][numOTUs]
2237 vector< vector<double> > prior; prior.push_back(proportions);
2238 vector< vector<double> > xbar = linear.matrix_mult(prior, means);
2239 vector<double> xBar = xbar[0]; //length numOTUs
2241 /*cout << "xbar" << endl;
2242 for (int j = 0; j < numOtus; j++) { cout << xBar[j] <<'\t'; }cout << endl;*/
2244 fac = 1 / (double) (numGroups-1);
2245 //scale(group.means, center = xbar, scale = FALSE) %*% scaling
2246 vector< vector<double> > scaledMeans = means; //[numGroups][numOTUs]
2247 for (int i = 0; i < numGroups; i++) {
2248 for (int j = 0; j < numOtus; j++) { scaledMeans[i][j] -= xBar[j]; }
2250 scaledMeans = linear.matrix_mult(scaledMeans, scaling); //[numGroups][numOTUS]*[numOTUS]["good"columns] = [numGroups]["good"columns]
2253 //sqrt((n * prior) * fac)
2254 vector<double> temp = proportions; //[numGroups]
2255 for (int i = 0; i < temp.size(); i++) { temp[i] *= numSampled * fac; temp[i] = sqrt(temp[i]); }
2257 //X <- sqrt((n * prior) * fac) * (scale(group.means, center = xbar, scale = FALSE) %*% scaling)
2258 //X <- temp * scaledMeans
2259 X.clear(); X = scaledMeans; //[numGroups]["good"columns]
2260 for (int i = 0; i < X.size(); i++) {
2261 for (int j = 0; j < X[i].size(); j++) { X[i][j] *= temp[j]; }
2264 cout << "X = " << endl;
2265 for (int i = 0; i < X.size(); i++) {
2266 for (int j = 0; j < X[i].size(); j++) { cout << X[i][j] << '\t'; }
2271 d.clear(); v.clear();
2272 //we want to transpose so results are in Xcopy, but if that makes rows > columns then we don't since svd requires rows < cols.
2274 if (X.size() > X[0].size()) { Xcopy = X; transpose=true; }
2275 else { Xcopy = linear.transpose(X); }
2276 linear.svd(Xcopy, d, v); //Xcopy gets the results we want for v below
2277 /*cout << "Xcopy = " << endl;
2278 for (int i = 0; i < Xcopy.size(); i++) {
2279 for (int j = 0; j < Xcopy[i].size(); j++) { cout << Xcopy[i][j] << '\t'; }
2283 cout << "v = " << endl;
2284 for (int i = 0; i < v.size(); i++) {
2285 for (int j = 0; j < v[i].size(); j++) { cout << v[i][j] << '\t'; }
2289 cout << "d = " << endl;
2290 for (int i = 0; i < d.size(); i++) { cout << d[i] << endl; }*/
2292 //rank <- sum(X.s$d > tol * X.s$d[1L])
2293 //X.s$d[1L] = larger value in d vector
2294 double largeD = m->max(d);
2295 rank = 0; goodColumns.clear();
2296 for (int i = 0; i < d.size(); i++) { if (d[i] > (0.0000000001*largeD)) { rank++; goodColumns.insert(i); } }
2299 ignore=true;//m->mothurOut("[ERROR]: rank = 0: class means are numerically identical.\n"); m->control_pressed = true;
2302 if (transpose) { Xcopy = linear.transpose(v); }
2303 //scaling <- scaling %*% X.s$v[, 1L:rank] - scaling * "good" columns
2304 v.clear(); //store "good" columns - X.s$v[, 1L:rank]
2305 v.resize(Xcopy.size()); //Xcopy = ["good"columns][numGroups]
2306 for (set<int>::iterator it = goodColumns.begin(); it != goodColumns.end(); it++) {
2307 for (int i = 0; i < Xcopy.size(); i++) {
2308 v[i].push_back(Xcopy[i][*it]);
2312 scaling = linear.matrix_mult(scaling, v); //[numOTUS]["good" columns] * ["good"columns][new "good" columns]
2314 /*cout << "scaling = " << endl;
2315 for (int i = 0; i < scaling.size(); i++) {
2316 for (int j = 0; j < scaling[i].size(); j++) { cout << scaling[i][j] << '\t'; }
2322 catch(exception& e) {
2323 m->errorOut(e, "LinearAlgebra", "lda");
2327 /*********************************************************************************************************************************/
2328 //Singular value decomposition (SVD) - adapted from http://svn.lirec.eu/libs/magicsquares/src/SVD.cpp
2330 * svdcomp - SVD decomposition routine.
2331 * Takes an mxn matrix a and decomposes it into udv, where u,v are
2332 * left and right orthogonal transformation matrices, and d is a
2333 * diagonal matrix of singular values.
2335 * This routine is adapted from svdecomp.c in XLISP-STAT 2.1 which is
2336 * code from Numerical Recipes adapted by Luke Tierney and David Betz.
2338 * Input to dsvd is as follows:
2339 * a = mxn matrix to be decomposed, gets overwritten with u
2340 * m = row dimension of a
2341 * n = column dimension of a
2342 * w = returns the vector of singular values of a
2343 * v = returns the right orthogonal transformation matrix
2346 int LinearAlgebra::svd(vector< vector<double> >& a, vector<double>& w, vector< vector<double> >& v) {
2348 int flag, i, its, j, jj, k, l, nm;
2349 double c, f, h, s, x, y, z;
2350 double anorm = 0.0, g = 0.0, scale = 0.0;
2352 int numRows = a.size(); if (numRows == 0) { return 0; }
2353 int numCols = a[0].size();
2354 w.resize(numCols, 0.0);
2355 v.resize(numCols); for (int i = 0; i < numCols; i++) { v[i].resize(numRows, 0.0); }
2357 vector<double> rv1; rv1.resize(numCols, 0.0);
2358 if (numRows < numCols){ m->mothurOut("[ERROR]: numRows < numCols\n"); m->control_pressed = true; return 0; }
2360 /* Householder reduction to bidiagonal form */
2361 for (i = 0; i < numCols; i++)
2363 /* left-hand reduction */
2366 g = s = scale = 0.0;
2369 for (k = i; k < numRows; k++)
2370 scale += fabs((double)a[k][i]);
2373 for (k = i; k < numRows; k++)
2375 a[k][i] = (double)((double)a[k][i]/scale);
2376 s += ((double)a[k][i] * (double)a[k][i]);
2378 f = (double)a[i][i];
2379 g = -SIGN(sqrt(s), f);
2381 a[i][i] = (double)(f - g);
2382 if (i != numCols - 1)
2384 for (j = l; j < numCols; j++)
2386 for (s = 0.0, k = i; k < numRows; k++)
2387 s += ((double)a[k][i] * (double)a[k][j]);
2389 for (k = i; k < numRows; k++)
2390 a[k][j] += (double)(f * (double)a[k][i]);
2393 for (k = i; k < numRows; k++)
2394 a[k][i] = (double)((double)a[k][i]*scale);
2397 w[i] = (double)(scale * g);
2399 /* right-hand reduction */
2400 g = s = scale = 0.0;
2401 if (i < numRows && i != numCols - 1)
2403 for (k = l; k < numCols; k++)
2404 scale += fabs((double)a[i][k]);
2407 for (k = l; k < numCols; k++)
2409 a[i][k] = (double)((double)a[i][k]/scale);
2410 s += ((double)a[i][k] * (double)a[i][k]);
2412 f = (double)a[i][l];
2413 g = -SIGN(sqrt(s), f);
2415 a[i][l] = (double)(f - g);
2416 for (k = l; k < numCols; k++)
2417 rv1[k] = (double)a[i][k] / h;
2418 if (i != numRows - 1)
2420 for (j = l; j < numRows; j++)
2422 for (s = 0.0, k = l; k < numCols; k++)
2423 s += ((double)a[j][k] * (double)a[i][k]);
2424 for (k = l; k < numCols; k++)
2425 a[j][k] += (double)(s * rv1[k]);
2428 for (k = l; k < numCols; k++)
2429 a[i][k] = (double)((double)a[i][k]*scale);
2432 anorm = max(anorm, (fabs((double)w[i]) + fabs(rv1[i])));
2435 /* accumulate the right-hand transformation */
2436 for (i = numCols - 1; i >= 0; i--)
2438 if (i < numCols - 1)
2442 for (j = l; j < numCols; j++)
2443 v[j][i] = (double)(((double)a[i][j] / (double)a[i][l]) / g);
2444 /* double division to avoid underflow */
2445 for (j = l; j < numCols; j++)
2447 for (s = 0.0, k = l; k < numCols; k++)
2448 s += ((double)a[i][k] * (double)v[k][j]);
2449 for (k = l; k < numCols; k++)
2450 v[k][j] += (double)(s * (double)v[k][i]);
2453 for (j = l; j < numCols; j++)
2454 v[i][j] = v[j][i] = 0.0;
2461 /* accumulate the left-hand transformation */
2462 for (i = numCols - 1; i >= 0; i--)
2466 if (i < numCols - 1)
2467 for (j = l; j < numCols; j++)
2472 if (i != numCols - 1)
2474 for (j = l; j < numCols; j++)
2476 for (s = 0.0, k = l; k < numRows; k++)
2477 s += ((double)a[k][i] * (double)a[k][j]);
2478 f = (s / (double)a[i][i]) * g;
2479 for (k = i; k < numRows; k++)
2480 a[k][j] += (double)(f * (double)a[k][i]);
2483 for (j = i; j < numRows; j++)
2484 a[j][i] = (double)((double)a[j][i]*g);
2488 for (j = i; j < numRows; j++)
2494 /* diagonalize the bidiagonal form */
2495 for (k = numCols - 1; k >= 0; k--)
2496 { /* loop over singular values */
2497 for (its = 0; its < 30; its++)
2498 { /* loop over allowed iterations */
2500 for (l = k; l >= 0; l--)
2501 { /* test for splitting */
2503 if (fabs(rv1[l]) + anorm == anorm)
2508 if (fabs((double)w[nm]) + anorm == anorm)
2515 for (i = l; i <= k; i++)
2518 if (fabs(f) + anorm != anorm)
2526 for (j = 0; j < numRows; j++)
2528 y = (double)a[j][nm];
2529 z = (double)a[j][i];
2530 a[j][nm] = (double)(y * c + z * s);
2531 a[j][i] = (double)(z * c - y * s);
2540 { /* make singular value nonnegative */
2541 w[k] = (double)(-z);
2542 for (j = 0; j < numCols; j++)
2543 v[j][k] = (-v[j][k]);
2548 m->mothurOut("No convergence after 30,000! iterations \n"); m->control_pressed = true;
2552 /* shift from bottom 2 x 2 minor */
2558 f = ((y - z) * (y + z) + (g - h) * (g + h)) / (2.0 * h * y);
2560 f = ((x - z) * (x + z) + h * ((y / (f + SIGN(g, f))) - h)) / x;
2562 /* next QR transformation */
2564 for (j = l; j <= nm; j++)
2579 for (jj = 0; jj < numCols; jj++)
2581 x = (double)v[jj][j];
2582 z = (double)v[jj][i];
2583 v[jj][j] = (float)(x * c + z * s);
2584 v[jj][i] = (float)(z * c - x * s);
2594 f = (c * g) + (s * y);
2595 x = (c * y) - (s * g);
2596 for (jj = 0; jj < numRows; jj++)
2598 y = (double)a[jj][j];
2599 z = (double)a[jj][i];
2600 a[jj][j] = (double)(y * c + z * s);
2601 a[jj][i] = (double)(z * c - y * s);
2613 catch(exception& e) {
2614 m->errorOut(e, "LinearAlgebra", "svd");