linearalgebra.cpp

   1 /*
   2  *  linearalgebra.cpp
   3  *  mothur
   4  *
   5  *  Created by westcott on 1/7/11.
   6  *  Copyright 2011 Schloss Lab. All rights reserved.
   7  *
   8  */
   9
  10 #include "linearalgebra.h"
  11
  12 /*********************************************************************************************************************************/
  13
  14 inline double SIGN(const double a, const double b)
  15 {
  16     return b>=0 ? (a>=0 ? a:-a) : (a>=0 ? -a:a);
  17 }
  18 /*********************************************************************************************************************************/
  19
  20 vector<vector<double> > LinearAlgebra::matrix_mult(vector<vector<double> > first, vector<vector<double> > second){
  21         try {
  22                 vector<vector<double> > product;
  23
  24                 int first_rows = first.size();
  25                 int first_cols = first[0].size();
  26                 int second_cols = second[0].size();
  27
  28                 product.resize(first_rows);
  29                 for(int i=0;i<first_rows;i++){
  30                         product[i].resize(second_cols);
  31                 }
  32
  33                 for(int i=0;i<first_rows;i++){
  34                         for(int j=0;j<second_cols;j++){
  35
  36                                 if (m->control_pressed) { return product; }
  37
  38                                 product[i][j] = 0.0;
  39                                 for(int k=0;k<first_cols;k++){
  40                                         product[i][j] += first[i][k] * second[k][j];
  41                                 }
  42                         }
  43                 }
  44
  45                 return product;
  46         }
  47         catch(exception& e) {
  48                 m->errorOut(e, "LinearAlgebra", "matrix_mult");
  49                 exit(1);
  50         }
  51
  52 }
  53
  54 /*********************************************************************************************************************************/
  55
  56 //  This function is taken from Numerical Recipes in C++ by Press et al., 2nd edition, pg. 479
  57
  58 int LinearAlgebra::tred2(vector<vector<double> >& a, vector<double>& d, vector<double>& e){
  59         try {
  60                 double scale, hh, h, g, f;
  61
  62                 int n = a.size();
  63
  64                 d.resize(n);
  65                 e.resize(n);
  66
  67                 for(int i=n-1;i>0;i--){
  68                         int l=i-1;
  69                         h = scale = 0.0000;
  70                         if(l>0){
  71                                 for(int k=0;k<l+1;k++){
  72                                         scale += fabs(a[i][k]);
  73                                 }
  74                                 if(scale == 0.0){
  75                                         e[i] = a[i][l];
  76                                 }
  77                                 else{
  78                                         for(int k=0;k<l+1;k++){
  79                                                 a[i][k] /= scale;
  80                                                 h += a[i][k] * a[i][k];
  81                                         }
  82                                         f = a[i][l];
  83                                         g = (f >= 0.0 ? -sqrt(h) : sqrt(h));
  84                                         e[i] = scale * g;
  85                                         h -= f * g;
  86                                         a[i][l] = f - g;
  87                                         f = 0.0;
  88                                         for(int j=0;j<l+1;j++){
  89                                                 a[j][i] = a[i][j] / h;
  90                                                 g = 0.0;
  91                                                 for(int k=0;k<j+1;k++){
  92                                                         g += a[j][k] * a[i][k];
  93                                                 }
  94                                                 for(int k=j+1;k<l+1;k++){
  95                                                         g += a[k][j] * a[i][k];
  96                                                 }
  97                                                 e[j] = g / h;
  98                                                 f += e[j] * a[i][j];
  99                                         }
 100                                         hh = f / (h + h);
 101                                         for(int j=0;j<l+1;j++){
 102                                                 f = a[i][j];
 103                                                 e[j] = g = e[j] - hh * f;
 104                                                 for(int k=0;k<j+1;k++){
 105                                                         a[j][k] -= (f * e[k] + g * a[i][k]);
 106                                                 }
 107                                         }
 108                                 }
 109                         }
 110                         else{
 111                                 e[i] = a[i][l];
 112                         }
 113
 114                         d[i] = h;
 115                 }
 116
 117                 d[0] = 0.0000;
 118                 e[0] = 0.0000;
 119
 120                 for(int i=0;i<n;i++){
 121                         int l = i;
 122                         if(d[i] != 0.0){
 123                                 for(int j=0;j<l;j++){
 124                                         g = 0.0000;
 125                                         for(int k=0;k<l;k++){
 126                                                 g += a[i][k] * a[k][j];
 127                                         }
 128                                         for(int k=0;k<l;k++){
 129                                                 a[k][j] -= g * a[k][i];
 130                                         }
 131                                 }
 132                         }
 133                         d[i] = a[i][i];
 134                         a[i][i] = 1.0000;
 135                         for(int j=0;j<l;j++){
 136                                 a[j][i] = a[i][j] = 0.0;
 137                         }
 138                 }
 139
 140                 return 0;
 141         }
 142         catch(exception& e) {
 143                 m->errorOut(e, "LinearAlgebra", "tred2");
 144                 exit(1);
 145         }
 146
 147 }
 148 /*********************************************************************************************************************************/
 149
 150 double LinearAlgebra::pythag(double a, double b)        {       return(pow(a*a+b*b,0.5));       }
 151
 152 /*********************************************************************************************************************************/
 153
 154 //  This function is taken from Numerical Recipes in C++ by Press et al., 2nd edition, pg. 479
 155
 156 int LinearAlgebra::qtli(vector<double>& d, vector<double>& e, vector<vector<double> >& z) {
 157         try {
 158                 int myM, i, iter;
 159                 double s, r, p, g, f, dd, c, b;
 160
 161                 int n = d.size();
 162                 for(int i=1;i<=n;i++){
 163                         e[i-1] = e[i];
 164                 }
 165                 e[n-1] = 0.0000;
 166
 167                 for(int l=0;l<n;l++){
 168                         iter = 0;
 169                         do {
 170                                 for(myM=l;myM<n-1;myM++){
 171                                         dd = fabs(d[myM]) + fabs(d[myM+1]);
 172                                         if(fabs(e[myM])+dd == dd) break;
 173                                 }
 174                                 if(myM != l){
 175                                         if(iter++ == 3000) cerr << "Too many iterations in tqli\n";
 176                                         g = (d[l+1]-d[l]) / (2.0 * e[l]);
 177                                         r = pythag(g, 1.0);
 178                                         g = d[myM] - d[l] + e[l] / (g + SIGN(r,g));
 179                                         s = c = 1.0;
 180                                         p = 0.0000;
 181                                         for(i=myM-1;i>=l;i--){
 182                                                 f = s * e[i];
 183                                                 b = c * e[i];
 184                                                 e[i+1] = (r=pythag(f,g));
 185                                                 if(r==0.0){
 186                                                         d[i+1] -= p;
 187                                                         e[myM] = 0.0000;
 188                                                         break;
 189                                                 }
 190                                                 s = f / r;
 191                                                 c = g / r;
 192                                                 g = d[i+1] - p;
 193                                                 r = (d[i] - g) * s + 2.0 * c * b;
 194                                                 d[i+1] = g + ( p = s * r);
 195                                                 g = c * r - b;
 196                                                 for(int k=0;k<n;k++){
 197                                                         f = z[k][i+1];
 198                                                         z[k][i+1] = s * z[k][i] + c * f;
 199                                                         z[k][i] = c * z[k][i] - s * f;
 200                                                 }
 201                                         }
 202                                         if(r == 0.00 && i >= l) continue;
 203                                         d[l] -= p;
 204                                         e[l] = g;
 205                                         e[myM] = 0.0;
 206                                 }
 207                         } while (myM != l);
 208                 }
 209
 210                 int k;
 211                 for(int i=0;i<n;i++){
 212                         p=d[k=i];
 213                         for(int j=i;j<n;j++){
 214                                 if(d[j] >= p){
 215                                         p=d[k=j];
 216                                 }
 217                         }
 218                         if(k!=i){
 219                                 d[k]=d[i];
 220                                 d[i]=p;
 221                                 for(int j=0;j<n;j++){
 222                                         p=z[j][i];
 223                                         z[j][i] = z[j][k];
 224                                         z[j][k] = p;
 225                                 }
 226                         }
 227                 }
 228
 229                 return 0;
 230         }
 231         catch(exception& e) {
 232                 m->errorOut(e, "LinearAlgebra", "qtli");
 233                 exit(1);
 234         }
 235 }
 236 /*********************************************************************************************************************************/
 237 //groups by dimension
 238 vector< vector<double> > LinearAlgebra::calculateEuclidianDistance(vector< vector<double> >& axes, int dimensions){
 239         try {
 240                 //make square matrix
 241                 vector< vector<double> > dists; dists.resize(axes.size());
 242                 for (int i = 0; i < dists.size(); i++) {  dists[i].resize(axes.size(), 0.0); }
 243
 244                 if (dimensions == 1) { //one dimension calc = abs(x-y)
 245
 246                         for (int i = 0; i < dists.size(); i++) {
 247
 248                                 if (m->control_pressed) { return dists; }
 249
 250                                 for (int j = 0; j < i; j++) {
 251                                         dists[i][j] = abs(axes[i][0] - axes[j][0]);
 252                                         dists[j][i] = dists[i][j];
 253                                 }
 254                         }
 255
 256                 }else if (dimensions > 1) { //two dimension calc = sqrt ((x1 - y1)^2 + (x2 - y2)^2)...
 257
 258                         for (int i = 0; i < dists.size(); i++) {
 259
 260                                 if (m->control_pressed) { return dists; }
 261
 262                                 for (int j = 0; j < i; j++) {
 263                                         double sum = 0.0;
 264                                         for (int k = 0; k < dimensions; k++) {
 265                                                 sum += ((axes[i][k] - axes[j][k]) * (axes[i][k] - axes[j][k]));
 266                                         }
 267
 268                                         dists[i][j] = sqrt(sum);
 269                                         dists[j][i] = dists[i][j];
 270                                 }
 271                         }
 272
 273                 }
 274
 275                 return dists;
 276         }
 277         catch(exception& e) {
 278                 m->errorOut(e, "LinearAlgebra", "calculateEuclidianDistance");
 279                 exit(1);
 280         }
 281 }
 282 /*********************************************************************************************************************************/
 283 //returns groups by dimensions from dimensions by groups
 284 vector< vector<double> > LinearAlgebra::calculateEuclidianDistance(vector< vector<double> >& axes){
 285         try {
 286                 //make square matrix
 287                 vector< vector<double> > dists; dists.resize(axes[0].size());
 288                 for (int i = 0; i < dists.size(); i++) {  dists[i].resize(axes[0].size(), 0.0); }
 289
 290                 if (axes.size() == 1) { //one dimension calc = abs(x-y)
 291
 292                         for (int i = 0; i < dists.size(); i++) {
 293
 294                                 if (m->control_pressed) { return dists; }
 295
 296                                 for (int j = 0; j < i; j++) {
 297                                         dists[i][j] = abs(axes[0][i] - axes[0][j]);
 298                                         dists[j][i] = dists[i][j];
 299                                 }
 300                         }
 301
 302                 }else if (axes.size() > 1) { //two dimension calc = sqrt ((x1 - y1)^2 + (x2 - y2)^2)...
 303
 304                         for (int i = 0; i < dists[0].size(); i++) {
 305
 306                                 if (m->control_pressed) { return dists; }
 307
 308                                 for (int j = 0; j < i; j++) {
 309                                         double sum = 0.0;
 310                                         for (int k = 0; k < axes.size(); k++) {
 311                                                 sum += ((axes[k][i] - axes[k][j]) * (axes[k][i] - axes[k][j]));
 312                                         }
 313
 314                                         dists[i][j] = sqrt(sum);
 315                                         dists[j][i] = dists[i][j];
 316                                 }
 317                         }
 318
 319                 }
 320
 321                 return dists;
 322         }
 323         catch(exception& e) {
 324                 m->errorOut(e, "LinearAlgebra", "calculateEuclidianDistance");
 325                 exit(1);
 326         }
 327 }
 328 /*********************************************************************************************************************************/
 329 //assumes both matrices are square and the same size
 330 double LinearAlgebra::calcPearson(vector< vector<double> >& euclidDists, vector< vector<double> >& userDists){
 331         try {
 332
 333                 //find average for - X
 334                 int count = 0;
 335                 float averageEuclid = 0.0;
 336                 for (int i = 0; i < euclidDists.size(); i++) {
 337                         for (int j = 0; j < i; j++) {
 338                                 averageEuclid += euclidDists[i][j];
 339                                 count++;
 340                         }
 341                 }
 342                 averageEuclid = averageEuclid / (float) count;
 343
 344                 //find average for - Y
 345                 count = 0;
 346                 float averageUser = 0.0;
 347                 for (int i = 0; i < userDists.size(); i++) {
 348                         for (int j = 0; j < i; j++) {
 349                                 averageUser += userDists[i][j];
 350                                 count++;
 351                         }
 352                 }
 353                 averageUser = averageUser / (float) count;
 354
 355                 double numerator = 0.0;
 356                 double denomTerm1 = 0.0;
 357                 double denomTerm2 = 0.0;
 358
 359                 for (int i = 0; i < euclidDists.size(); i++) {
 360
 361                         for (int k = 0; k < i; k++) { //just lt dists
 362
 363                                 float Yi = userDists[i][k];
 364                                 float Xi = euclidDists[i][k];
 365
 366                                 numerator += ((Xi - averageEuclid) * (Yi - averageUser));
 367                                 denomTerm1 += ((Xi - averageEuclid) * (Xi - averageEuclid));
 368                                 denomTerm2 += ((Yi - averageUser) * (Yi - averageUser));
 369                         }
 370                 }
 371
 372                 double denom = (sqrt(denomTerm1) * sqrt(denomTerm2));
 373                 double r = numerator / denom;
 374
 375                 //divide by zero error
 376                 if (isnan(r) || isinf(r)) { r = 0.0; }
 377
 378                 return r;
 379
 380         }
 381         catch(exception& e) {
 382                 m->errorOut(e, "LinearAlgebra", "calcPearson");
 383                 exit(1);
 384         }
 385 }
 386 /*********************************************************************************************************************************/
 387 //assumes both matrices are square and the same size
 388 double LinearAlgebra::calcSpearman(vector< vector<double> >& euclidDists, vector< vector<double> >& userDists){
 389         try {
 390                 double r;
 391
 392                 //format data
 393                 map<float, int> tableX;
 394                 map<float, int>::iterator itTable;
 395                 vector<spearmanRank> scores;
 396
 397                 for (int i = 0; i < euclidDists.size(); i++) {
 398                         for (int j = 0; j < i; j++) {
 399                                 spearmanRank member(toString(scores.size()), euclidDists[i][j]);
 400                                 scores.push_back(member);
 401
 402                                 //count number of repeats
 403                                 itTable = tableX.find(euclidDists[i][j]);
 404                                 if (itTable == tableX.end()) {
 405                                         tableX[euclidDists[i][j]] = 1;
 406                                 }else {
 407                                         tableX[euclidDists[i][j]]++;
 408                                 }
 409                         }
 410                 }
 411
 412                 //sort scores
 413                 sort(scores.begin(), scores.end(), compareSpearman);
 414
 415                 //calc LX
 416                 double Lx = 0.0;
 417                 for (itTable = tableX.begin(); itTable != tableX.end(); itTable++) {
 418                         double tx = (double) itTable->second;
 419                         Lx += ((pow(tx, 3.0) - tx) / 12.0);
 420                 }
 421
 422                 //find ranks of xi
 423                 map<string, float> rankEuclid;
 424                 vector<spearmanRank> ties;
 425                 int rankTotal = 0;
 426                 for (int j = 0; j < scores.size(); j++) {
 427                         rankTotal += (j+1);
 428                         ties.push_back(scores[j]);
 429
 430                         if (j != (scores.size()-1)) { // you are not the last so you can look ahead
 431                                 if (scores[j].score != scores[j+1].score) { // you are done with ties, rank them and continue
 432
 433                                         for (int k = 0; k < ties.size(); k++) {
 434                                                 float thisrank = rankTotal / (float) ties.size();
 435                                                 rankEuclid[ties[k].name] = thisrank;
 436                                         }
 437                                         ties.clear();
 438                                         rankTotal = 0;
 439                                 }
 440                         }else { // you are the last one
 441
 442                                 for (int k = 0; k < ties.size(); k++) {
 443                                         float thisrank = rankTotal / (float) ties.size();
 444                                         rankEuclid[ties[k].name] = thisrank;
 445                                 }
 446                         }
 447                 }
 448
 449
 450                 //format data
 451                 map<float, int> tableY;
 452                 scores.clear();
 453
 454                 for (int i = 0; i < userDists.size(); i++) {
 455                         for (int j = 0; j < i; j++) {
 456                                 spearmanRank member(toString(scores.size()), userDists[i][j]);
 457                                 scores.push_back(member);
 458
 459                                 //count number of repeats
 460                                 itTable = tableY.find(userDists[i][j]);
 461                                 if (itTable == tableY.end()) {
 462                                         tableY[userDists[i][j]] = 1;
 463                                 }else {
 464                                         tableY[userDists[i][j]]++;
 465                                 }
 466                         }
 467                 }
 468
 469                 //sort scores
 470                 sort(scores.begin(), scores.end(), compareSpearman);
 471
 472                 //calc LX
 473                 double Ly = 0.0;
 474                 for (itTable = tableY.begin(); itTable != tableY.end(); itTable++) {
 475                         double ty = (double) itTable->second;
 476                         Ly += ((pow(ty, 3.0) - ty) / 12.0);
 477                 }
 478
 479                 //find ranks of yi
 480                 map<string, float> rankUser;
 481                 ties.clear();
 482                 rankTotal = 0;
 483                 for (int j = 0; j < scores.size(); j++) {
 484                         rankTotal += (j+1);
 485                         ties.push_back(scores[j]);
 486
 487                         if (j != (scores.size()-1)) { // you are not the last so you can look ahead
 488                                 if (scores[j].score != scores[j+1].score) { // you are done with ties, rank them and continue
 489
 490                                         for (int k = 0; k < ties.size(); k++) {
 491                                                 float thisrank = rankTotal / (float) ties.size();
 492                                                 rankUser[ties[k].name] = thisrank;
 493                                         }
 494                                         ties.clear();
 495                                         rankTotal = 0;
 496                                 }
 497                         }else { // you are the last one
 498
 499                                 for (int k = 0; k < ties.size(); k++) {
 500                                         float thisrank = rankTotal / (float) ties.size();
 501                                         rankUser[ties[k].name] = thisrank;
 502                                 }
 503                         }
 504                 }
 505
 506
 507                 double di = 0.0;
 508                 int count = 0;
 509                 for (int i = 0; i < userDists.size(); i++) {
 510                         for (int j = 0; j < i; j++) {
 511
 512                                 float xi = rankEuclid[toString(count)];
 513                                 float yi = rankUser[toString(count)];
 514
 515                                 di += ((xi - yi) * (xi - yi));
 516
 517                                 count++;
 518                         }
 519                 }
 520
 521                 double n = (double) count;
 522
 523                 double SX2 = ((pow(n, 3.0) - n) / 12.0) - Lx;
 524                 double SY2 = ((pow(n, 3.0) - n) / 12.0) - Ly;
 525
 526                 r = (SX2 + SY2 - di) / (2.0 * sqrt((SX2*SY2)));
 527
 528                 //divide by zero error
 529                 if (isnan(r) || isinf(r)) { r = 0.0; }
 530
 531                 return r;
 532
 533         }
 534         catch(exception& e) {
 535                 m->errorOut(e, "LinearAlgebra", "calcSpearman");
 536                 exit(1);
 537         }
 538 }
 539
 540 /*********************************************************************************************************************************/
 541 //assumes both matrices are square and the same size
 542 double LinearAlgebra::calcKendall(vector< vector<double> >& euclidDists, vector< vector<double> >& userDists){
 543         try {
 544                 double r;
 545
 546                 //format data
 547                 vector<spearmanRank> scores;
 548                 for (int i = 0; i < euclidDists.size(); i++) {
 549                         for (int j = 0; j < i; j++) {
 550                                 spearmanRank member(toString(scores.size()), euclidDists[i][j]);
 551                                 scores.push_back(member);
 552                         }
 553                 }
 554
 555                 //sort scores
 556                 sort(scores.begin(), scores.end(), compareSpearman);
 557
 558                 //find ranks of xi
 559                 map<string, float> rankEuclid;
 560                 vector<spearmanRank> ties;
 561                 int rankTotal = 0;
 562                 for (int j = 0; j < scores.size(); j++) {
 563                         rankTotal += (j+1);
 564                         ties.push_back(scores[j]);
 565
 566                         if (j != (scores.size()-1)) { // you are not the last so you can look ahead
 567                                 if (scores[j].score != scores[j+1].score) { // you are done with ties, rank them and continue
 568
 569                                         for (int k = 0; k < ties.size(); k++) {
 570                                                 float thisrank = rankTotal / (float) ties.size();
 571                                                 rankEuclid[ties[k].name] = thisrank;
 572                                         }
 573                                         ties.clear();
 574                                         rankTotal = 0;
 575                                 }
 576                         }else { // you are the last one
 577
 578                                 for (int k = 0; k < ties.size(); k++) {
 579                                         float thisrank = rankTotal / (float) ties.size();
 580                                         rankEuclid[ties[k].name] = thisrank;
 581                                 }
 582                         }
 583                 }
 584
 585                 vector<spearmanRank> scoresUser;
 586                 for (int i = 0; i < userDists.size(); i++) {
 587                         for (int j = 0; j < i; j++) {
 588                                 spearmanRank member(toString(scoresUser.size()), userDists[i][j]);
 589                                 scoresUser.push_back(member);
 590                         }
 591                 }
 592
 593                 //sort scores
 594                 sort(scoresUser.begin(), scoresUser.end(), compareSpearman);
 595
 596                 //find ranks of yi
 597                 map<string, float> rankUser;
 598                 ties.clear();
 599                 rankTotal = 0;
 600                 for (int j = 0; j < scoresUser.size(); j++) {
 601                         rankTotal += (j+1);
 602                         ties.push_back(scoresUser[j]);
 603
 604                         if (j != (scoresUser.size()-1)) { // you are not the last so you can look ahead
 605                                 if (scoresUser[j].score != scoresUser[j+1].score) { // you are done with ties, rank them and continue
 606
 607                                         for (int k = 0; k < ties.size(); k++) {
 608                                                 float thisrank = rankTotal / (float) ties.size();
 609                                                 rankUser[ties[k].name] = thisrank;
 610                                         }
 611                                         ties.clear();
 612                                         rankTotal = 0;
 613                                 }
 614                         }else { // you are the last one
 615
 616                                 for (int k = 0; k < ties.size(); k++) {
 617                                         float thisrank = rankTotal / (float) ties.size();
 618                                         rankUser[ties[k].name] = thisrank;
 619                                 }
 620                         }
 621                 }
 622
 623                 int numCoor = 0;
 624                 int numDisCoor = 0;
 625
 626                 //order user ranks
 627                 vector<spearmanRank> user;
 628                 for (int l = 0; l < scores.size(); l++) {
 629                         spearmanRank member(scores[l].name, rankUser[scores[l].name]);
 630                         user.push_back(member);
 631                 }
 632
 633                 int count = 0;
 634                 for (int l = 0; l < scores.size(); l++) {
 635
 636                         int numWithHigherRank = 0;
 637                         int numWithLowerRank = 0;
 638                         float thisrank = user[l].score;
 639
 640                         for (int u = l+1; u < scores.size(); u++) {
 641                                 if (user[u].score > thisrank) { numWithHigherRank++; }
 642                                 else if (user[u].score < thisrank) { numWithLowerRank++; }
 643                                 count++;
 644                         }
 645
 646                         numCoor += numWithHigherRank;
 647                         numDisCoor += numWithLowerRank;
 648                 }
 649
 650                 r = (numCoor - numDisCoor) / (float) count;
 651
 652                 //divide by zero error
 653                 if (isnan(r) || isinf(r)) { r = 0.0; }
 654
 655                 return r;
 656
 657         }
 658         catch(exception& e) {
 659                 m->errorOut(e, "LinearAlgebra", "calcKendall");
 660                 exit(1);
 661         }
 662 }
 663
 664 /*********************************************************************************************************************************/
 665
 666