linearalgebra.cpp

   1 /*
   2  *  linearalgebra.cpp
   3  *  mothur
   4  *
   5  *  Created by westcott on 1/7/11.
   6  *  Copyright 2011 Schloss Lab. All rights reserved.
   7  *
   8  */
   9
  10 #include "linearalgebra.h"
  11
  12 // This class references functions used from "Numerical Recipes in C++" //
  13 /*********************************************************************************************************************************/
  14
  15 inline double SIGN(const double a, const double b)
  16 {
  17     return b>=0 ? (a>=0 ? a:-a) : (a>=0 ? -a:a);
  18 }
  19 /*********************************************************************************************************************************/
  20
  21 vector<vector<double> > LinearAlgebra::matrix_mult(vector<vector<double> > first, vector<vector<double> > second){
  22         try {
  23                 vector<vector<double> > product;
  24
  25                 int first_rows = first.size();
  26                 int first_cols = first[0].size();
  27                 int second_cols = second[0].size();
  28
  29                 product.resize(first_rows);
  30                 for(int i=0;i<first_rows;i++){
  31                         product[i].resize(second_cols);
  32                 }
  33
  34                 for(int i=0;i<first_rows;i++){
  35                         for(int j=0;j<second_cols;j++){
  36
  37                                 if (m->control_pressed) { return product; }
  38
  39                                 product[i][j] = 0.0;
  40                                 for(int k=0;k<first_cols;k++){
  41                                         product[i][j] += first[i][k] * second[k][j];
  42                                 }
  43                         }
  44                 }
  45
  46                 return product;
  47         }
  48         catch(exception& e) {
  49                 m->errorOut(e, "LinearAlgebra", "matrix_mult");
  50                 exit(1);
  51         }
  52
  53 }
  54
  55 /*********************************************************************************************************************************/
  56
  57 void LinearAlgebra::recenter(double offset, vector<vector<double> > D, vector<vector<double> >& G){
  58         try {
  59                 int rank = D.size();
  60
  61                 vector<vector<double> > A(rank);
  62                 vector<vector<double> > C(rank);
  63                 for(int i=0;i<rank;i++){
  64                         A[i].resize(rank);
  65                         C[i].resize(rank);
  66                 }
  67
  68                 double scale = -1.0000 / (double) rank;
  69
  70                 for(int i=0;i<rank;i++){
  71                         A[i][i] = 0.0000;
  72                         C[i][i] = 1.0000 + scale;
  73                         for(int j=i+1;j<rank;j++){
  74                                 A[i][j] = A[j][i] = -0.5 * D[i][j] * D[i][j] + offset;
  75                                 C[i][j] = C[j][i] = scale;
  76                         }
  77                 }
  78
  79                 A = matrix_mult(C,A);
  80                 G = matrix_mult(A,C);
  81         }
  82         catch(exception& e) {
  83                 m->errorOut(e, "LinearAlgebra", "recenter");
  84                 exit(1);
  85         }
  86
  87 }
  88 /*********************************************************************************************************************************/
  89
  90 //  This function is taken from Numerical Recipes in C++ by Press et al., 2nd edition, pg. 479
  91
  92 int LinearAlgebra::tred2(vector<vector<double> >& a, vector<double>& d, vector<double>& e){
  93         try {
  94                 double scale, hh, h, g, f;
  95
  96                 int n = a.size();
  97
  98                 d.resize(n);
  99                 e.resize(n);
 100
 101                 for(int i=n-1;i>0;i--){
 102                         int l=i-1;
 103                         h = scale = 0.0000;
 104                         if(l>0){
 105                                 for(int k=0;k<l+1;k++){
 106                                         scale += fabs(a[i][k]);
 107                                 }
 108                                 if(scale == 0.0){
 109                                         e[i] = a[i][l];
 110                                 }
 111                                 else{
 112                                         for(int k=0;k<l+1;k++){
 113                                                 a[i][k] /= scale;
 114                                                 h += a[i][k] * a[i][k];
 115                                         }
 116                                         f = a[i][l];
 117                                         g = (f >= 0.0 ? -sqrt(h) : sqrt(h));
 118                                         e[i] = scale * g;
 119                                         h -= f * g;
 120                                         a[i][l] = f - g;
 121                                         f = 0.0;
 122                                         for(int j=0;j<l+1;j++){
 123                                                 a[j][i] = a[i][j] / h;
 124                                                 g = 0.0;
 125                                                 for(int k=0;k<j+1;k++){
 126                                                         g += a[j][k] * a[i][k];
 127                                                 }
 128                                                 for(int k=j+1;k<l+1;k++){
 129                                                         g += a[k][j] * a[i][k];
 130                                                 }
 131                                                 e[j] = g / h;
 132                                                 f += e[j] * a[i][j];
 133                                         }
 134                                         hh = f / (h + h);
 135                                         for(int j=0;j<l+1;j++){
 136                                                 f = a[i][j];
 137                                                 e[j] = g = e[j] - hh * f;
 138                                                 for(int k=0;k<j+1;k++){
 139                                                         a[j][k] -= (f * e[k] + g * a[i][k]);
 140                                                 }
 141                                         }
 142                                 }
 143                         }
 144                         else{
 145                                 e[i] = a[i][l];
 146                         }
 147
 148                         d[i] = h;
 149                 }
 150
 151                 d[0] = 0.0000;
 152                 e[0] = 0.0000;
 153
 154                 for(int i=0;i<n;i++){
 155                         int l = i;
 156                         if(d[i] != 0.0){
 157                                 for(int j=0;j<l;j++){
 158                                         g = 0.0000;
 159                                         for(int k=0;k<l;k++){
 160                                                 g += a[i][k] * a[k][j];
 161                                         }
 162                                         for(int k=0;k<l;k++){
 163                                                 a[k][j] -= g * a[k][i];
 164                                         }
 165                                 }
 166                         }
 167                         d[i] = a[i][i];
 168                         a[i][i] = 1.0000;
 169                         for(int j=0;j<l;j++){
 170                                 a[j][i] = a[i][j] = 0.0;
 171                         }
 172                 }
 173
 174                 return 0;
 175         }
 176         catch(exception& e) {
 177                 m->errorOut(e, "LinearAlgebra", "tred2");
 178                 exit(1);
 179         }
 180
 181 }
 182 /*********************************************************************************************************************************/
 183
 184 double LinearAlgebra::pythag(double a, double b)        {       return(pow(a*a+b*b,0.5));       }
 185
 186 /*********************************************************************************************************************************/
 187
 188 //  This function is taken from Numerical Recipes in C++ by Press et al., 2nd edition, pg. 479
 189
 190 int LinearAlgebra::qtli(vector<double>& d, vector<double>& e, vector<vector<double> >& z) {
 191         try {
 192                 int myM, i, iter;
 193                 double s, r, p, g, f, dd, c, b;
 194
 195                 int n = d.size();
 196                 for(int i=1;i<=n;i++){
 197                         e[i-1] = e[i];
 198                 }
 199                 e[n-1] = 0.0000;
 200
 201                 for(int l=0;l<n;l++){
 202                         iter = 0;
 203                         do {
 204                                 for(myM=l;myM<n-1;myM++){
 205                                         dd = fabs(d[myM]) + fabs(d[myM+1]);
 206                                         if(fabs(e[myM])+dd == dd) break;
 207                                 }
 208                                 if(myM != l){
 209                                         if(iter++ == 3000) cerr << "Too many iterations in tqli\n";
 210                                         g = (d[l+1]-d[l]) / (2.0 * e[l]);
 211                                         r = pythag(g, 1.0);
 212                                         g = d[myM] - d[l] + e[l] / (g + SIGN(r,g));
 213                                         s = c = 1.0;
 214                                         p = 0.0000;
 215                                         for(i=myM-1;i>=l;i--){
 216                                                 f = s * e[i];
 217                                                 b = c * e[i];
 218                                                 e[i+1] = (r=pythag(f,g));
 219                                                 if(r==0.0){
 220                                                         d[i+1] -= p;
 221                                                         e[myM] = 0.0000;
 222                                                         break;
 223                                                 }
 224                                                 s = f / r;
 225                                                 c = g / r;
 226                                                 g = d[i+1] - p;
 227                                                 r = (d[i] - g) * s + 2.0 * c * b;
 228                                                 d[i+1] = g + ( p = s * r);
 229                                                 g = c * r - b;
 230                                                 for(int k=0;k<n;k++){
 231                                                         f = z[k][i+1];
 232                                                         z[k][i+1] = s * z[k][i] + c * f;
 233                                                         z[k][i] = c * z[k][i] - s * f;
 234                                                 }
 235                                         }
 236                                         if(r == 0.00 && i >= l) continue;
 237                                         d[l] -= p;
 238                                         e[l] = g;
 239                                         e[myM] = 0.0;
 240                                 }
 241                         } while (myM != l);
 242                 }
 243
 244                 int k;
 245                 for(int i=0;i<n;i++){
 246                         p=d[k=i];
 247                         for(int j=i;j<n;j++){
 248                                 if(d[j] >= p){
 249                                         p=d[k=j];
 250                                 }
 251                         }
 252                         if(k!=i){
 253                                 d[k]=d[i];
 254                                 d[i]=p;
 255                                 for(int j=0;j<n;j++){
 256                                         p=z[j][i];
 257                                         z[j][i] = z[j][k];
 258                                         z[j][k] = p;
 259                                 }
 260                         }
 261                 }
 262
 263                 return 0;
 264         }
 265         catch(exception& e) {
 266                 m->errorOut(e, "LinearAlgebra", "qtli");
 267                 exit(1);
 268         }
 269 }
 270 /*********************************************************************************************************************************/
 271 //groups by dimension
 272 vector< vector<double> > LinearAlgebra::calculateEuclidianDistance(vector< vector<double> >& axes, int dimensions){
 273         try {
 274                 //make square matrix
 275                 vector< vector<double> > dists; dists.resize(axes.size());
 276                 for (int i = 0; i < dists.size(); i++) {  dists[i].resize(axes.size(), 0.0); }
 277
 278                 if (dimensions == 1) { //one dimension calc = abs(x-y)
 279
 280                         for (int i = 0; i < dists.size(); i++) {
 281
 282                                 if (m->control_pressed) { return dists; }
 283
 284                                 for (int j = 0; j < i; j++) {
 285                                         dists[i][j] = abs(axes[i][0] - axes[j][0]);
 286                                         dists[j][i] = dists[i][j];
 287                                 }
 288                         }
 289
 290                 }else if (dimensions > 1) { //two dimension calc = sqrt ((x1 - y1)^2 + (x2 - y2)^2)...
 291
 292                         for (int i = 0; i < dists.size(); i++) {
 293
 294                                 if (m->control_pressed) { return dists; }
 295
 296                                 for (int j = 0; j < i; j++) {
 297                                         double sum = 0.0;
 298                                         for (int k = 0; k < dimensions; k++) {
 299                                                 sum += ((axes[i][k] - axes[j][k]) * (axes[i][k] - axes[j][k]));
 300                                         }
 301
 302                                         dists[i][j] = sqrt(sum);
 303                                         dists[j][i] = dists[i][j];
 304                                 }
 305                         }
 306
 307                 }
 308
 309                 return dists;
 310         }
 311         catch(exception& e) {
 312                 m->errorOut(e, "LinearAlgebra", "calculateEuclidianDistance");
 313                 exit(1);
 314         }
 315 }
 316 /*********************************************************************************************************************************/
 317 //returns groups by dimensions from dimensions by groups
 318 vector< vector<double> > LinearAlgebra::calculateEuclidianDistance(vector< vector<double> >& axes){
 319         try {
 320                 //make square matrix
 321                 vector< vector<double> > dists; dists.resize(axes[0].size());
 322                 for (int i = 0; i < dists.size(); i++) {  dists[i].resize(axes[0].size(), 0.0); }
 323
 324                 if (axes.size() == 1) { //one dimension calc = abs(x-y)
 325
 326                         for (int i = 0; i < dists.size(); i++) {
 327
 328                                 if (m->control_pressed) { return dists; }
 329
 330                                 for (int j = 0; j < i; j++) {
 331                                         dists[i][j] = abs(axes[0][i] - axes[0][j]);
 332                                         dists[j][i] = dists[i][j];
 333                                 }
 334                         }
 335
 336                 }else if (axes.size() > 1) { //two dimension calc = sqrt ((x1 - y1)^2 + (x2 - y2)^2)...
 337
 338                         for (int i = 0; i < dists[0].size(); i++) {
 339
 340                                 if (m->control_pressed) { return dists; }
 341
 342                                 for (int j = 0; j < i; j++) {
 343                                         double sum = 0.0;
 344                                         for (int k = 0; k < axes.size(); k++) {
 345                                                 sum += ((axes[k][i] - axes[k][j]) * (axes[k][i] - axes[k][j]));
 346                                         }
 347
 348                                         dists[i][j] = sqrt(sum);
 349                                         dists[j][i] = dists[i][j];
 350                                 }
 351                         }
 352
 353                 }
 354
 355                 return dists;
 356         }
 357         catch(exception& e) {
 358                 m->errorOut(e, "LinearAlgebra", "calculateEuclidianDistance");
 359                 exit(1);
 360         }
 361 }
 362 /*********************************************************************************************************************************/
 363 //assumes both matrices are square and the same size
 364 double LinearAlgebra::calcPearson(vector< vector<double> >& euclidDists, vector< vector<double> >& userDists){
 365         try {
 366
 367                 //find average for - X
 368                 int count = 0;
 369                 float averageEuclid = 0.0;
 370                 for (int i = 0; i < euclidDists.size(); i++) {
 371                         for (int j = 0; j < i; j++) {
 372                                 averageEuclid += euclidDists[i][j];
 373                                 count++;
 374                         }
 375                 }
 376                 averageEuclid = averageEuclid / (float) count;
 377
 378                 //find average for - Y
 379                 count = 0;
 380                 float averageUser = 0.0;
 381                 for (int i = 0; i < userDists.size(); i++) {
 382                         for (int j = 0; j < i; j++) {
 383                                 averageUser += userDists[i][j];
 384                                 count++;
 385                         }
 386                 }
 387                 averageUser = averageUser / (float) count;
 388
 389                 double numerator = 0.0;
 390                 double denomTerm1 = 0.0;
 391                 double denomTerm2 = 0.0;
 392
 393                 for (int i = 0; i < euclidDists.size(); i++) {
 394
 395                         for (int k = 0; k < i; k++) { //just lt dists
 396
 397                                 float Yi = userDists[i][k];
 398                                 float Xi = euclidDists[i][k];
 399
 400                                 numerator += ((Xi - averageEuclid) * (Yi - averageUser));
 401                                 denomTerm1 += ((Xi - averageEuclid) * (Xi - averageEuclid));
 402                                 denomTerm2 += ((Yi - averageUser) * (Yi - averageUser));
 403                         }
 404                 }
 405
 406                 double denom = (sqrt(denomTerm1) * sqrt(denomTerm2));
 407                 double r = numerator / denom;
 408
 409                 //divide by zero error
 410                 if (isnan(r) || isinf(r)) { r = 0.0; }
 411
 412                 return r;
 413
 414         }
 415         catch(exception& e) {
 416                 m->errorOut(e, "LinearAlgebra", "calcPearson");
 417                 exit(1);
 418         }
 419 }
 420 /*********************************************************************************************************************************/
 421 //assumes both matrices are square and the same size
 422 double LinearAlgebra::calcSpearman(vector< vector<double> >& euclidDists, vector< vector<double> >& userDists){
 423         try {
 424                 double r;
 425
 426                 //format data
 427                 map<float, int> tableX;
 428                 map<float, int>::iterator itTable;
 429                 vector<spearmanRank> scores;
 430
 431                 for (int i = 0; i < euclidDists.size(); i++) {
 432                         for (int j = 0; j < i; j++) {
 433                                 spearmanRank member(toString(scores.size()), euclidDists[i][j]);
 434                                 scores.push_back(member);
 435
 436                                 //count number of repeats
 437                                 itTable = tableX.find(euclidDists[i][j]);
 438                                 if (itTable == tableX.end()) {
 439                                         tableX[euclidDists[i][j]] = 1;
 440                                 }else {
 441                                         tableX[euclidDists[i][j]]++;
 442                                 }
 443                         }
 444                 }
 445
 446                 //sort scores
 447                 sort(scores.begin(), scores.end(), compareSpearman);
 448
 449                 //calc LX
 450                 double Lx = 0.0;
 451                 for (itTable = tableX.begin(); itTable != tableX.end(); itTable++) {
 452                         double tx = (double) itTable->second;
 453                         Lx += ((pow(tx, 3.0) - tx) / 12.0);
 454                 }
 455
 456                 //find ranks of xi
 457                 map<string, float> rankEuclid;
 458                 vector<spearmanRank> ties;
 459                 int rankTotal = 0;
 460                 for (int j = 0; j < scores.size(); j++) {
 461                         rankTotal += (j+1);
 462                         ties.push_back(scores[j]);
 463
 464                         if (j != (scores.size()-1)) { // you are not the last so you can look ahead
 465                                 if (scores[j].score != scores[j+1].score) { // you are done with ties, rank them and continue
 466
 467                                         for (int k = 0; k < ties.size(); k++) {
 468                                                 float thisrank = rankTotal / (float) ties.size();
 469                                                 rankEuclid[ties[k].name] = thisrank;
 470                                         }
 471                                         ties.clear();
 472                                         rankTotal = 0;
 473                                 }
 474                         }else { // you are the last one
 475
 476                                 for (int k = 0; k < ties.size(); k++) {
 477                                         float thisrank = rankTotal / (float) ties.size();
 478                                         rankEuclid[ties[k].name] = thisrank;
 479                                 }
 480                         }
 481                 }
 482
 483
 484                 //format data
 485                 map<float, int> tableY;
 486                 scores.clear();
 487
 488                 for (int i = 0; i < userDists.size(); i++) {
 489                         for (int j = 0; j < i; j++) {
 490                                 spearmanRank member(toString(scores.size()), userDists[i][j]);
 491                                 scores.push_back(member);
 492
 493                                 //count number of repeats
 494                                 itTable = tableY.find(userDists[i][j]);
 495                                 if (itTable == tableY.end()) {
 496                                         tableY[userDists[i][j]] = 1;
 497                                 }else {
 498                                         tableY[userDists[i][j]]++;
 499                                 }
 500                         }
 501                 }
 502
 503                 //sort scores
 504                 sort(scores.begin(), scores.end(), compareSpearman);
 505
 506                 //calc LX
 507                 double Ly = 0.0;
 508                 for (itTable = tableY.begin(); itTable != tableY.end(); itTable++) {
 509                         double ty = (double) itTable->second;
 510                         Ly += ((pow(ty, 3.0) - ty) / 12.0);
 511                 }
 512
 513                 //find ranks of yi
 514                 map<string, float> rankUser;
 515                 ties.clear();
 516                 rankTotal = 0;
 517                 for (int j = 0; j < scores.size(); j++) {
 518                         rankTotal += (j+1);
 519                         ties.push_back(scores[j]);
 520
 521                         if (j != (scores.size()-1)) { // you are not the last so you can look ahead
 522                                 if (scores[j].score != scores[j+1].score) { // you are done with ties, rank them and continue
 523
 524                                         for (int k = 0; k < ties.size(); k++) {
 525                                                 float thisrank = rankTotal / (float) ties.size();
 526                                                 rankUser[ties[k].name] = thisrank;
 527                                         }
 528                                         ties.clear();
 529                                         rankTotal = 0;
 530                                 }
 531                         }else { // you are the last one
 532
 533                                 for (int k = 0; k < ties.size(); k++) {
 534                                         float thisrank = rankTotal / (float) ties.size();
 535                                         rankUser[ties[k].name] = thisrank;
 536                                 }
 537                         }
 538                 }
 539
 540
 541                 double di = 0.0;
 542                 int count = 0;
 543                 for (int i = 0; i < userDists.size(); i++) {
 544                         for (int j = 0; j < i; j++) {
 545
 546                                 float xi = rankEuclid[toString(count)];
 547                                 float yi = rankUser[toString(count)];
 548
 549                                 di += ((xi - yi) * (xi - yi));
 550
 551                                 count++;
 552                         }
 553                 }
 554
 555                 double n = (double) count;
 556
 557                 double SX2 = ((pow(n, 3.0) - n) / 12.0) - Lx;
 558                 double SY2 = ((pow(n, 3.0) - n) / 12.0) - Ly;
 559
 560                 r = (SX2 + SY2 - di) / (2.0 * sqrt((SX2*SY2)));
 561
 562                 //divide by zero error
 563                 if (isnan(r) || isinf(r)) { r = 0.0; }
 564
 565                 return r;
 566
 567         }
 568         catch(exception& e) {
 569                 m->errorOut(e, "LinearAlgebra", "calcSpearman");
 570                 exit(1);
 571         }
 572 }
 573
 574 /*********************************************************************************************************************************/
 575 //assumes both matrices are square and the same size
 576 double LinearAlgebra::calcKendall(vector< vector<double> >& euclidDists, vector< vector<double> >& userDists){
 577         try {
 578                 double r;
 579
 580                 //format data
 581                 vector<spearmanRank> scores;
 582                 for (int i = 0; i < euclidDists.size(); i++) {
 583                         for (int j = 0; j < i; j++) {
 584                                 spearmanRank member(toString(scores.size()), euclidDists[i][j]);
 585                                 scores.push_back(member);
 586                         }
 587                 }
 588
 589                 //sort scores
 590                 sort(scores.begin(), scores.end(), compareSpearman);
 591
 592                 //find ranks of xi
 593                 map<string, float> rankEuclid;
 594                 vector<spearmanRank> ties;
 595                 int rankTotal = 0;
 596                 for (int j = 0; j < scores.size(); j++) {
 597                         rankTotal += (j+1);
 598                         ties.push_back(scores[j]);
 599
 600                         if (j != (scores.size()-1)) { // you are not the last so you can look ahead
 601                                 if (scores[j].score != scores[j+1].score) { // you are done with ties, rank them and continue
 602
 603                                         for (int k = 0; k < ties.size(); k++) {
 604                                                 float thisrank = rankTotal / (float) ties.size();
 605                                                 rankEuclid[ties[k].name] = thisrank;
 606                                         }
 607                                         ties.clear();
 608                                         rankTotal = 0;
 609                                 }
 610                         }else { // you are the last one
 611
 612                                 for (int k = 0; k < ties.size(); k++) {
 613                                         float thisrank = rankTotal / (float) ties.size();
 614                                         rankEuclid[ties[k].name] = thisrank;
 615                                 }
 616                         }
 617                 }
 618
 619                 vector<spearmanRank> scoresUser;
 620                 for (int i = 0; i < userDists.size(); i++) {
 621                         for (int j = 0; j < i; j++) {
 622                                 spearmanRank member(toString(scoresUser.size()), userDists[i][j]);
 623                                 scoresUser.push_back(member);
 624                         }
 625                 }
 626
 627                 //sort scores
 628                 sort(scoresUser.begin(), scoresUser.end(), compareSpearman);
 629
 630                 //find ranks of yi
 631                 map<string, float> rankUser;
 632                 ties.clear();
 633                 rankTotal = 0;
 634                 for (int j = 0; j < scoresUser.size(); j++) {
 635                         rankTotal += (j+1);
 636                         ties.push_back(scoresUser[j]);
 637
 638                         if (j != (scoresUser.size()-1)) { // you are not the last so you can look ahead
 639                                 if (scoresUser[j].score != scoresUser[j+1].score) { // you are done with ties, rank them and continue
 640
 641                                         for (int k = 0; k < ties.size(); k++) {
 642                                                 float thisrank = rankTotal / (float) ties.size();
 643                                                 rankUser[ties[k].name] = thisrank;
 644                                         }
 645                                         ties.clear();
 646                                         rankTotal = 0;
 647                                 }
 648                         }else { // you are the last one
 649
 650                                 for (int k = 0; k < ties.size(); k++) {
 651                                         float thisrank = rankTotal / (float) ties.size();
 652                                         rankUser[ties[k].name] = thisrank;
 653                                 }
 654                         }
 655                 }
 656
 657                 int numCoor = 0;
 658                 int numDisCoor = 0;
 659
 660                 //order user ranks
 661                 vector<spearmanRank> user;
 662                 for (int l = 0; l < scores.size(); l++) {
 663                         spearmanRank member(scores[l].name, rankUser[scores[l].name]);
 664                         user.push_back(member);
 665                 }
 666
 667                 int count = 0;
 668                 for (int l = 0; l < scores.size(); l++) {
 669
 670                         int numWithHigherRank = 0;
 671                         int numWithLowerRank = 0;
 672                         float thisrank = user[l].score;
 673
 674                         for (int u = l+1; u < scores.size(); u++) {
 675                                 if (user[u].score > thisrank) { numWithHigherRank++; }
 676                                 else if (user[u].score < thisrank) { numWithLowerRank++; }
 677                                 count++;
 678                         }
 679
 680                         numCoor += numWithHigherRank;
 681                         numDisCoor += numWithLowerRank;
 682                 }
 683
 684                 r = (numCoor - numDisCoor) / (float) count;
 685
 686                 //divide by zero error
 687                 if (isnan(r) || isinf(r)) { r = 0.0; }
 688
 689                 return r;
 690
 691         }
 692         catch(exception& e) {
 693                 m->errorOut(e, "LinearAlgebra", "calcKendall");
 694                 exit(1);
 695         }
 696 }
 697 /*********************************************************************************************************************************/
 698 double LinearAlgebra::calcKendall(vector<double>& x, vector<double>& y, double& sig){
 699         try {
 700                 if (x.size() != y.size()) { m->mothurOut("[ERROR]: vector size mismatch."); m->mothurOutEndLine(); return 0.0; }
 701
 702                 //format data
 703                 vector<spearmanRank> xscores;
 704                 for (int i = 0; i < x.size(); i++) {
 705                         spearmanRank member(toString(i), x[i]);
 706                         xscores.push_back(member);
 707                 }
 708
 709                 //sort xscores
 710                 sort(xscores.begin(), xscores.end(), compareSpearman);
 711
 712                 //convert scores to ranks of x
 713                 vector<spearmanRank*> ties;
 714                 int rankTotal = 0;
 715                 for (int j = 0; j < xscores.size(); j++) {
 716                         rankTotal += (j+1);
 717                         ties.push_back(&(xscores[j]));
 718
 719                         if (j != xscores.size()-1) { // you are not the last so you can look ahead
 720                                 if (xscores[j].score != xscores[j+1].score) { // you are done with ties, rank them and continue
 721                                         for (int k = 0; k < ties.size(); k++) {
 722                                                 float thisrank = rankTotal / (float) ties.size();
 723                                                 (*ties[k]).score = thisrank;
 724                                         }
 725                                         ties.clear();
 726                                         rankTotal = 0;
 727                                 }
 728                         }else { // you are the last one
 729                                 for (int k = 0; k < ties.size(); k++) {
 730                                         float thisrank = rankTotal / (float) ties.size();
 731                                         (*ties[k]).score = thisrank;
 732                                 }
 733                         }
 734                 }
 735
 736
 737                 //format data
 738                 vector<spearmanRank> yscores;
 739                 for (int j = 0; j < y.size(); j++) {
 740                         spearmanRank member(toString(j), y[j]);
 741                         yscores.push_back(member);
 742                 }
 743
 744                 //sort yscores
 745                 sort(yscores.begin(), yscores.end(), compareSpearman);
 746
 747                 //convert to ranks
 748                 map<string, float> rank;
 749                 vector<spearmanRank> yties;
 750                 rankTotal = 0;
 751                 for (int j = 0; j < yscores.size(); j++) {
 752                         rankTotal += (j+1);
 753                         yties.push_back(yscores[j]);
 754
 755                         if (j != yscores.size()-1) { // you are not the last so you can look ahead
 756                                 if (yscores[j].score != yscores[j+1].score) { // you are done with ties, rank them and continue
 757                                         for (int k = 0; k < yties.size(); k++) {
 758                                                 float thisrank = rankTotal / (float) yties.size();
 759                                                 rank[yties[k].name] = thisrank;
 760                                         }
 761                                         yties.clear();
 762                                         rankTotal = 0;
 763                                 }
 764                         }else { // you are the last one
 765                                 for (int k = 0; k < yties.size(); k++) {
 766                                         float thisrank = rankTotal / (float) yties.size();
 767                                         rank[yties[k].name] = thisrank;
 768                                 }
 769                         }
 770                 }
 771
 772
 773                 int numCoor = 0;
 774                 int numDisCoor = 0;
 775
 776                 //associate x and y
 777                 vector<spearmanRank> otus;
 778                 for (int l = 0; l < xscores.size(); l++) {
 779                         spearmanRank member(xscores[l].name, rank[xscores[l].name]);
 780                         otus.push_back(member);
 781                 }
 782
 783                 int count = 0;
 784                 for (int l = 0; l < xscores.size(); l++) {
 785
 786                         int numWithHigherRank = 0;
 787                         int numWithLowerRank = 0;
 788                         float thisrank = otus[l].score;
 789
 790                         for (int u = l+1; u < xscores.size(); u++) {
 791                                 if (otus[u].score > thisrank) { numWithHigherRank++; }
 792                                 else if (otus[u].score < thisrank) { numWithLowerRank++; }
 793                                 count++;
 794                         }
 795
 796                         numCoor += numWithHigherRank;
 797                         numDisCoor += numWithLowerRank;
 798                 }
 799
 800                 double p = (numCoor - numDisCoor) / (float) count;
 801
 802                 //calc signif - zA - http://en.wikipedia.org/wiki/Kendall_tau_rank_correlation_coefficient#Significance_tests
 803                 double numer = 3.0 * (numCoor - numDisCoor);
 804         int n = xscores.size();
 805         double denom = n * (n-1) * (2*n + 5) / (double) 2.0;
 806         denom = sqrt(denom);
 807         sig = numer / denom;
 808
 809                 if (isnan(sig) || isinf(sig)) { sig = 0.0; }
 810
 811                 return p;
 812         }
 813         catch(exception& e) {
 814                 m->errorOut(e, "LinearAlgebra", "calcKendall");
 815                 exit(1);
 816         }
 817 }
 818 /*********************************************************************************************************************************/
 819 double LinearAlgebra::calcSpearman(vector<double>& x, vector<double>& y, double& sig){
 820         try {
 821                 if (x.size() != y.size()) { m->mothurOut("[ERROR]: vector size mismatch."); m->mothurOutEndLine(); return 0.0; }
 822
 823                 //format data
 824                 map<float, int> tableX;
 825                 map<float, int>::iterator itTable;
 826                 vector<spearmanRank> xscores;
 827
 828                 for (int i = 0; i < x.size(); i++) {
 829                         spearmanRank member(toString(i), x[i]);
 830                         xscores.push_back(member);
 831
 832                         //count number of repeats
 833                         itTable = tableX.find(x[i]);
 834                         if (itTable == tableX.end()) {
 835                                 tableX[x[i]] = 1;
 836                         }else {
 837                                 tableX[x[i]]++;
 838                         }
 839                 }
 840
 841
 842                 //calc LX
 843                 double Lx = 0.0;
 844                 for (itTable = tableX.begin(); itTable != tableX.end(); itTable++) {
 845                         double tx = (double) itTable->second;
 846                         Lx += ((pow(tx, 3.0) - tx) / 12.0);
 847                 }
 848
 849
 850                 //sort x
 851                 sort(xscores.begin(), xscores.end(), compareSpearman);
 852
 853                 //convert scores to ranks of x
 854                 //convert to ranks
 855                 map<string, float> rankx;
 856                 vector<spearmanRank> xties;
 857                 int rankTotal = 0;
 858                 for (int j = 0; j < xscores.size(); j++) {
 859                         rankTotal += (j+1);
 860                         xties.push_back(xscores[j]);
 861
 862                         if (j != xscores.size()-1) { // you are not the last so you can look ahead
 863                                 if (xscores[j].score != xscores[j+1].score) { // you are done with ties, rank them and continue
 864                                         for (int k = 0; k < xties.size(); k++) {
 865                                                 float thisrank = rankTotal / (float) xties.size();
 866                                                 rankx[xties[k].name] = thisrank;
 867                                         }
 868                                         xties.clear();
 869                                         rankTotal = 0;
 870                                 }
 871                         }else { // you are the last one
 872                                 for (int k = 0; k < xties.size(); k++) {
 873                                         float thisrank = rankTotal / (float) xties.size();
 874                                         rankx[xties[k].name] = thisrank;
 875                                 }
 876                         }
 877                 }
 878
 879                 //format x
 880                 vector<spearmanRank> yscores;
 881                 map<float, int> tableY;
 882                 for (int j = 0; j < y.size(); j++) {
 883                         spearmanRank member(toString(j), y[j]);
 884                         yscores.push_back(member);
 885
 886                         itTable = tableY.find(member.score);
 887                         if (itTable == tableY.end()) {
 888                                 tableY[member.score] = 1;
 889                         }else {
 890                                 tableY[member.score]++;
 891                         }
 892
 893                 }
 894
 895                 //calc Ly
 896                 double Ly = 0.0;
 897                 for (itTable = tableY.begin(); itTable != tableY.end(); itTable++) {
 898                         double ty = (double) itTable->second;
 899                         Ly += ((pow(ty, 3.0) - ty) / 12.0);
 900                 }
 901
 902                 sort(yscores.begin(), yscores.end(), compareSpearman);
 903
 904                 //convert to ranks
 905                 map<string, float> rank;
 906                 vector<spearmanRank> yties;
 907                 rankTotal = 0;
 908                 for (int j = 0; j < yscores.size(); j++) {
 909                         rankTotal += (j+1);
 910                         yties.push_back(yscores[j]);
 911
 912                         if (j != yscores.size()-1) { // you are not the last so you can look ahead
 913                                 if (yscores[j].score != yscores[j+1].score) { // you are done with ties, rank them and continue
 914                                         for (int k = 0; k < yties.size(); k++) {
 915                                                 float thisrank = rankTotal / (float) yties.size();
 916                                                 rank[yties[k].name] = thisrank;
 917                                         }
 918                                         yties.clear();
 919                                         rankTotal = 0;
 920                                 }
 921                         }else { // you are the last one
 922                                 for (int k = 0; k < yties.size(); k++) {
 923                                         float thisrank = rankTotal / (float) yties.size();
 924                                         rank[yties[k].name] = thisrank;
 925                                 }
 926                         }
 927                 }
 928
 929                 double di = 0.0;
 930                 for (int k = 0; k < x.size(); k++) {
 931
 932                         float xi = rankx[toString(k)];
 933                         float yi = rank[toString(k)];
 934
 935                         di += ((xi - yi) * (xi - yi));
 936                 }
 937
 938                 double p = 0.0;
 939
 940                 double n = (double) x.size();
 941                 double SX2 = ((pow(n, 3.0) - n) / 12.0) - Lx;
 942                 double SY2 = ((pow(n, 3.0) - n) / 12.0) - Ly;
 943
 944                 p = (SX2 + SY2 - di) / (2.0 * sqrt((SX2*SY2)));
 945
 946                 //signifigance calc - http://en.wikipedia.org/wiki/Spearman%27s_rank_correlation_coefficient
 947                 double temp = (x.size()-2) / (double) (1- (p*p));
 948                 temp = sqrt(temp);
 949                 sig = p*temp;
 950                 if (isnan(sig) || isinf(sig)) { sig = 0.0; }
 951
 952                 return p;
 953         }
 954         catch(exception& e) {
 955                 m->errorOut(e, "LinearAlgebra", "calcSpearman");
 956                 exit(1);
 957         }
 958 }
 959 /*********************************************************************************************************************************/
 960 double LinearAlgebra::calcPearson(vector<double>& x, vector<double>& y, double& sig){
 961         try {
 962                 if (x.size() != y.size()) { m->mothurOut("[ERROR]: vector size mismatch."); m->mothurOutEndLine(); return 0.0; }
 963
 964                 //find average X
 965                 float averageX = 0.0;
 966                 for (int i = 0; i < x.size(); i++) { averageX += x[i];  }
 967                 averageX = averageX / (float) x.size();
 968
 969                 //find average Y
 970                 float sumY = 0.0;
 971                 for (int j = 0; j < y.size(); j++) { sumY += y[j]; }
 972                 float Ybar = sumY / (float) y.size();
 973
 974                 double r = 0.0;
 975                 double numerator = 0.0;
 976                 double denomTerm1 = 0.0;
 977                 double denomTerm2 = 0.0;
 978
 979                 for (int j = 0; j < x.size(); j++) {
 980                         float Yi = y[j];
 981                         float Xi = x[j];
 982
 983                         numerator += ((Xi - averageX) * (Yi - Ybar));
 984                         denomTerm1 += ((Xi - averageX) * (Xi - averageX));
 985                         denomTerm2 += ((Yi - Ybar) * (Yi - Ybar));
 986                 }
 987
 988                 double denom = (sqrt(denomTerm1) * sqrt(denomTerm2));
 989
 990                 r = numerator / denom;
 991
 992                 //signifigance calc - http://faculty.vassar.edu/lowry/ch4apx.html
 993                 double temp =  (1- (r*r)) / (double) (x.size()-2);
 994                 temp = sqrt(temp);
 995                 sig = r / temp;
 996                 if (isnan(sig) || isinf(sig)) { sig = 0.0; }
 997
 998                 return r;
 999         }
1000         catch(exception& e) {
1001                 m->errorOut(e, "LinearAlgebra", "calcPearson");
1002                 exit(1);
1003         }
1004 }
1005 /*********************************************************************************************************************************/
1006
1007 vector<vector<double> > LinearAlgebra::getObservedEuclideanDistance(vector<vector<double> >& relAbundData){
1008
1009         int numSamples = relAbundData.size();
1010         int numOTUs = relAbundData[0].size();
1011
1012         vector<vector<double> > dMatrix(numSamples);
1013         for(int i=0;i<numSamples;i++){
1014                 dMatrix[i].resize(numSamples);
1015         }
1016
1017         for(int i=0;i<numSamples;i++){
1018                 for(int j=0;j<numSamples;j++){
1019
1020                         double d = 0;
1021                         for(int k=0;k<numOTUs;k++){
1022                                 d += pow((relAbundData[i][k] - relAbundData[j][k]), 2.0000);
1023                         }
1024                         dMatrix[i][j] = pow(d, 0.50000);
1025                         dMatrix[j][i] = dMatrix[i][j];
1026
1027                 }
1028         }
1029         return dMatrix;
1030
1031 }
1032
1033 /*********************************************************************************************************************************/