]> git.donarmstrong.com Git - mothur.git/blob - linearalgebra.cpp
working on pca
[mothur.git] / linearalgebra.cpp
1 /*
2  *  linearalgebra.cpp
3  *  mothur
4  *
5  *  Created by westcott on 1/7/11.
6  *  Copyright 2011 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "linearalgebra.h"
11
12 /*********************************************************************************************************************************/
13
14 inline double SIGN(const double a, const double b)
15 {
16     return b>=0 ? (a>=0 ? a:-a) : (a>=0 ? -a:a);
17 }
18 /*********************************************************************************************************************************/
19
20 vector<vector<double> > LinearAlgebra::matrix_mult(vector<vector<double> > first, vector<vector<double> > second){
21         try {
22                 vector<vector<double> > product;
23                 
24                 int first_rows = first.size();
25                 int first_cols = first[0].size();
26                 int second_cols = second[0].size();
27                 
28                 product.resize(first_rows);
29                 for(int i=0;i<first_rows;i++){
30                         product[i].resize(second_cols);
31                 }
32                 
33                 for(int i=0;i<first_rows;i++){
34                         for(int j=0;j<second_cols;j++){
35                                 
36                                 if (m->control_pressed) { return product; }
37                                         
38                                 product[i][j] = 0.0;
39                                 for(int k=0;k<first_cols;k++){
40                                         product[i][j] += first[i][k] * second[k][j];
41                                 }
42                         }
43                 }
44                 
45                 return product;
46         }
47         catch(exception& e) {
48                 m->errorOut(e, "LinearAlgebra", "matrix_mult");
49                 exit(1);
50         }
51         
52 }
53
54 /*********************************************************************************************************************************/
55
56 void LinearAlgebra::recenter(double offset, vector<vector<double> > D, vector<vector<double> >& G){
57         try {
58                 int rank = D.size();
59                 
60                 vector<vector<double> > A(rank);
61                 vector<vector<double> > C(rank);
62                 for(int i=0;i<rank;i++){
63                         A[i].resize(rank);
64                         C[i].resize(rank);
65                 }
66                 
67                 double scale = -1.0000 / (double) rank;
68                 
69                 for(int i=0;i<rank;i++){
70                         A[i][i] = 0.0000;
71                         C[i][i] = 1.0000 + scale;
72                         for(int j=i+1;j<rank;j++){
73                                 A[i][j] = A[j][i] = -0.5 * D[i][j] * D[i][j] + offset;
74                                 C[i][j] = C[j][i] = scale;
75                         }
76                 }
77                 
78                 A = matrix_mult(C,A);
79                 G = matrix_mult(A,C);
80         }
81         catch(exception& e) {
82                 m->errorOut(e, "LinearAlgebra", "recenter");
83                 exit(1);
84         }
85         
86 }
87 /*********************************************************************************************************************************/
88
89 //  This function is taken from Numerical Recipes in C++ by Press et al., 2nd edition, pg. 479
90
91 int LinearAlgebra::tred2(vector<vector<double> >& a, vector<double>& d, vector<double>& e){
92         try {
93                 double scale, hh, h, g, f;
94                 
95                 int n = a.size();
96                 
97                 d.resize(n);
98                 e.resize(n);
99                 
100                 for(int i=n-1;i>0;i--){
101                         int l=i-1;
102                         h = scale = 0.0000;
103                         if(l>0){
104                                 for(int k=0;k<l+1;k++){
105                                         scale += fabs(a[i][k]);
106                                 }
107                                 if(scale == 0.0){
108                                         e[i] = a[i][l];
109                                 }
110                                 else{
111                                         for(int k=0;k<l+1;k++){
112                                                 a[i][k] /= scale;
113                                                 h += a[i][k] * a[i][k];
114                                         }
115                                         f = a[i][l];
116                                         g = (f >= 0.0 ? -sqrt(h) : sqrt(h));
117                                         e[i] = scale * g;
118                                         h -= f * g;
119                                         a[i][l] = f - g;
120                                         f = 0.0;
121                                         for(int j=0;j<l+1;j++){
122                                                 a[j][i] = a[i][j] / h;
123                                                 g = 0.0;
124                                                 for(int k=0;k<j+1;k++){
125                                                         g += a[j][k] * a[i][k];
126                                                 }
127                                                 for(int k=j+1;k<l+1;k++){
128                                                         g += a[k][j] * a[i][k];
129                                                 }
130                                                 e[j] = g / h;
131                                                 f += e[j] * a[i][j];
132                                         }
133                                         hh = f / (h + h);
134                                         for(int j=0;j<l+1;j++){
135                                                 f = a[i][j];
136                                                 e[j] = g = e[j] - hh * f;
137                                                 for(int k=0;k<j+1;k++){
138                                                         a[j][k] -= (f * e[k] + g * a[i][k]);
139                                                 }
140                                         }
141                                 }
142                         }
143                         else{
144                                 e[i] = a[i][l];
145                         }
146                         
147                         d[i] = h;
148                 }
149                 
150                 d[0] = 0.0000;
151                 e[0] = 0.0000;
152                 
153                 for(int i=0;i<n;i++){
154                         int l = i;
155                         if(d[i] != 0.0){
156                                 for(int j=0;j<l;j++){
157                                         g = 0.0000;
158                                         for(int k=0;k<l;k++){
159                                                 g += a[i][k] * a[k][j];
160                                         }
161                                         for(int k=0;k<l;k++){
162                                                 a[k][j] -= g * a[k][i];
163                                         }
164                                 }
165                         }
166                         d[i] = a[i][i];
167                         a[i][i] = 1.0000;
168                         for(int j=0;j<l;j++){
169                                 a[j][i] = a[i][j] = 0.0;
170                         }
171                 }
172                 
173                 return 0;
174         }
175         catch(exception& e) {
176                 m->errorOut(e, "LinearAlgebra", "tred2");
177                 exit(1);
178         }
179         
180 }
181 /*********************************************************************************************************************************/
182
183 double LinearAlgebra::pythag(double a, double b)        {       return(pow(a*a+b*b,0.5));       }
184
185 /*********************************************************************************************************************************/
186
187 //  This function is taken from Numerical Recipes in C++ by Press et al., 2nd edition, pg. 479
188
189 int LinearAlgebra::qtli(vector<double>& d, vector<double>& e, vector<vector<double> >& z) {
190         try {
191                 int myM, i, iter;
192                 double s, r, p, g, f, dd, c, b;
193                 
194                 int n = d.size();
195                 for(int i=1;i<=n;i++){
196                         e[i-1] = e[i];
197                 }
198                 e[n-1] = 0.0000;
199                 
200                 for(int l=0;l<n;l++){
201                         iter = 0;
202                         do {
203                                 for(myM=l;myM<n-1;myM++){
204                                         dd = fabs(d[myM]) + fabs(d[myM+1]);
205                                         if(fabs(e[myM])+dd == dd) break;
206                                 }
207                                 if(myM != l){
208                                         if(iter++ == 3000) cerr << "Too many iterations in tqli\n";
209                                         g = (d[l+1]-d[l]) / (2.0 * e[l]);
210                                         r = pythag(g, 1.0);
211                                         g = d[myM] - d[l] + e[l] / (g + SIGN(r,g));
212                                         s = c = 1.0;
213                                         p = 0.0000;
214                                         for(i=myM-1;i>=l;i--){
215                                                 f = s * e[i];
216                                                 b = c * e[i];
217                                                 e[i+1] = (r=pythag(f,g));
218                                                 if(r==0.0){
219                                                         d[i+1] -= p;
220                                                         e[myM] = 0.0000;
221                                                         break;
222                                                 }
223                                                 s = f / r;
224                                                 c = g / r;
225                                                 g = d[i+1] - p;
226                                                 r = (d[i] - g) * s + 2.0 * c * b;
227                                                 d[i+1] = g + ( p = s * r);
228                                                 g = c * r - b;
229                                                 for(int k=0;k<n;k++){
230                                                         f = z[k][i+1];
231                                                         z[k][i+1] = s * z[k][i] + c * f;
232                                                         z[k][i] = c * z[k][i] - s * f;
233                                                 }
234                                         }
235                                         if(r == 0.00 && i >= l) continue;
236                                         d[l] -= p;
237                                         e[l] = g;
238                                         e[myM] = 0.0;
239                                 }
240                         } while (myM != l);
241                 }
242                 
243                 int k;
244                 for(int i=0;i<n;i++){
245                         p=d[k=i];
246                         for(int j=i;j<n;j++){
247                                 if(d[j] >= p){
248                                         p=d[k=j];
249                                 }
250                         }
251                         if(k!=i){
252                                 d[k]=d[i];
253                                 d[i]=p;
254                                 for(int j=0;j<n;j++){
255                                         p=z[j][i];
256                                         z[j][i] = z[j][k];
257                                         z[j][k] = p;
258                                 }
259                         }
260                 }
261                 
262                 return 0;
263         }
264         catch(exception& e) {
265                 m->errorOut(e, "LinearAlgebra", "qtli");
266                 exit(1);
267         }
268 }
269 /*********************************************************************************************************************************/
270 //groups by dimension
271 vector< vector<double> > LinearAlgebra::calculateEuclidianDistance(vector< vector<double> >& axes, int dimensions){
272         try {
273                 //make square matrix
274                 vector< vector<double> > dists; dists.resize(axes.size());
275                 for (int i = 0; i < dists.size(); i++) {  dists[i].resize(axes.size(), 0.0); }
276                 
277                 if (dimensions == 1) { //one dimension calc = abs(x-y)
278                         
279                         for (int i = 0; i < dists.size(); i++) {
280                                 
281                                 if (m->control_pressed) { return dists; }
282                                 
283                                 for (int j = 0; j < i; j++) {
284                                         dists[i][j] = abs(axes[i][0] - axes[j][0]);
285                                         dists[j][i] = dists[i][j];
286                                 }
287                         }
288                         
289                 }else if (dimensions > 1) { //two dimension calc = sqrt ((x1 - y1)^2 + (x2 - y2)^2)...
290                         
291                         for (int i = 0; i < dists.size(); i++) {
292                                 
293                                 if (m->control_pressed) { return dists; }
294                                 
295                                 for (int j = 0; j < i; j++) {
296                                         double sum = 0.0;
297                                         for (int k = 0; k < dimensions; k++) {
298                                                 sum += ((axes[i][k] - axes[j][k]) * (axes[i][k] - axes[j][k]));
299                                         }
300                                         
301                                         dists[i][j] = sqrt(sum);
302                                         dists[j][i] = dists[i][j];
303                                 }
304                         }
305                         
306                 }
307                 
308                 return dists;
309         }
310         catch(exception& e) {
311                 m->errorOut(e, "LinearAlgebra", "calculateEuclidianDistance");
312                 exit(1);
313         }
314 }
315 /*********************************************************************************************************************************/
316 //returns groups by dimensions from dimensions by groups
317 vector< vector<double> > LinearAlgebra::calculateEuclidianDistance(vector< vector<double> >& axes){
318         try {
319                 //make square matrix
320                 vector< vector<double> > dists; dists.resize(axes[0].size());
321                 for (int i = 0; i < dists.size(); i++) {  dists[i].resize(axes[0].size(), 0.0); }
322                 
323                 if (axes.size() == 1) { //one dimension calc = abs(x-y)
324                         
325                         for (int i = 0; i < dists.size(); i++) {
326                                 
327                                 if (m->control_pressed) { return dists; }
328                                 
329                                 for (int j = 0; j < i; j++) {
330                                         dists[i][j] = abs(axes[0][i] - axes[0][j]);
331                                         dists[j][i] = dists[i][j];
332                                 }
333                         }
334                         
335                 }else if (axes.size() > 1) { //two dimension calc = sqrt ((x1 - y1)^2 + (x2 - y2)^2)...
336                         
337                         for (int i = 0; i < dists[0].size(); i++) {
338                                 
339                                 if (m->control_pressed) { return dists; }
340                                 
341                                 for (int j = 0; j < i; j++) {
342                                         double sum = 0.0;
343                                         for (int k = 0; k < axes.size(); k++) {
344                                                 sum += ((axes[k][i] - axes[k][j]) * (axes[k][i] - axes[k][j]));
345                                         }
346                                         
347                                         dists[i][j] = sqrt(sum);
348                                         dists[j][i] = dists[i][j];
349                                 }
350                         }
351                         
352                 }
353                 
354                 return dists;
355         }
356         catch(exception& e) {
357                 m->errorOut(e, "LinearAlgebra", "calculateEuclidianDistance");
358                 exit(1);
359         }
360 }
361 /*********************************************************************************************************************************/
362 //assumes both matrices are square and the same size
363 double LinearAlgebra::calcPearson(vector< vector<double> >& euclidDists, vector< vector<double> >& userDists){
364         try {
365                 
366                 //find average for - X
367                 int count = 0;
368                 float averageEuclid = 0.0; 
369                 for (int i = 0; i < euclidDists.size(); i++) {
370                         for (int j = 0; j < i; j++) {
371                                 averageEuclid += euclidDists[i][j];  
372                                 count++;
373                         }
374                 }
375                 averageEuclid = averageEuclid / (float) count;   
376                         
377                 //find average for - Y
378                 count = 0;
379                 float averageUser = 0.0; 
380                 for (int i = 0; i < userDists.size(); i++) {
381                         for (int j = 0; j < i; j++) {
382                                 averageUser += userDists[i][j]; 
383                                 count++;
384                         }
385                 }
386                 averageUser = averageUser / (float) count;  
387
388                 double numerator = 0.0;
389                 double denomTerm1 = 0.0;
390                 double denomTerm2 = 0.0;
391                 
392                 for (int i = 0; i < euclidDists.size(); i++) {
393                         
394                         for (int k = 0; k < i; k++) { //just lt dists
395                                 
396                                 float Yi = userDists[i][k];
397                                 float Xi = euclidDists[i][k];
398                                 
399                                 numerator += ((Xi - averageEuclid) * (Yi - averageUser));
400                                 denomTerm1 += ((Xi - averageEuclid) * (Xi - averageEuclid));
401                                 denomTerm2 += ((Yi - averageUser) * (Yi - averageUser));
402                         }
403                 }
404                 
405                 double denom = (sqrt(denomTerm1) * sqrt(denomTerm2));
406                 double r = numerator / denom;
407                 
408                 //divide by zero error
409                 if (isnan(r) || isinf(r)) { r = 0.0; }
410                 
411                 return r;
412                 
413         }
414         catch(exception& e) {
415                 m->errorOut(e, "LinearAlgebra", "calcPearson");
416                 exit(1);
417         }
418 }
419 /*********************************************************************************************************************************/
420 //assumes both matrices are square and the same size
421 double LinearAlgebra::calcSpearman(vector< vector<double> >& euclidDists, vector< vector<double> >& userDists){
422         try {
423                 double r; 
424                 
425                 //format data
426                 map<float, int> tableX; 
427                 map<float, int>::iterator itTable;
428                 vector<spearmanRank> scores; 
429                 
430                 for (int i = 0; i < euclidDists.size(); i++) {
431                         for (int j = 0; j < i; j++) {
432                                 spearmanRank member(toString(scores.size()), euclidDists[i][j]);
433                                 scores.push_back(member);  
434                                 
435                                 //count number of repeats
436                                 itTable = tableX.find(euclidDists[i][j]);
437                                 if (itTable == tableX.end()) { 
438                                         tableX[euclidDists[i][j]] = 1;
439                                 }else {
440                                         tableX[euclidDists[i][j]]++;
441                                 }
442                         }
443                 }
444                 
445                 //sort scores
446                 sort(scores.begin(), scores.end(), compareSpearman); 
447
448                 //calc LX
449                 double Lx = 0.0; 
450                 for (itTable = tableX.begin(); itTable != tableX.end(); itTable++) {
451                         double tx = (double) itTable->second;
452                         Lx += ((pow(tx, 3.0) - tx) / 12.0);
453                 }
454                 
455                 //find ranks of xi
456                 map<string, float> rankEuclid;
457                 vector<spearmanRank> ties;
458                 int rankTotal = 0;
459                 for (int j = 0; j < scores.size(); j++) {
460                         rankTotal += (j+1);
461                         ties.push_back(scores[j]);
462                         
463                         if (j != (scores.size()-1)) { // you are not the last so you can look ahead
464                                 if (scores[j].score != scores[j+1].score) { // you are done with ties, rank them and continue
465                                         
466                                         for (int k = 0; k < ties.size(); k++) {
467                                                 float thisrank = rankTotal / (float) ties.size();
468                                                 rankEuclid[ties[k].name] = thisrank;
469                                         }
470                                         ties.clear();
471                                         rankTotal = 0;
472                                 }
473                         }else { // you are the last one
474                                 
475                                 for (int k = 0; k < ties.size(); k++) {
476                                         float thisrank = rankTotal / (float) ties.size();
477                                         rankEuclid[ties[k].name] = thisrank;
478                                 }
479                         }
480                 }
481                 
482                 
483                 //format data
484                 map<float, int> tableY; 
485                 scores.clear(); 
486                 
487                 for (int i = 0; i < userDists.size(); i++) {
488                         for (int j = 0; j < i; j++) {
489                                 spearmanRank member(toString(scores.size()), userDists[i][j]);
490                                 scores.push_back(member);  
491                                 
492                                 //count number of repeats
493                                 itTable = tableY.find(userDists[i][j]);
494                                 if (itTable == tableY.end()) { 
495                                         tableY[userDists[i][j]] = 1;
496                                 }else {
497                                         tableY[userDists[i][j]]++;
498                                 }
499                         }
500                 }
501                 
502                 //sort scores
503                 sort(scores.begin(), scores.end(), compareSpearman); 
504                 
505                 //calc LX
506                 double Ly = 0.0; 
507                 for (itTable = tableY.begin(); itTable != tableY.end(); itTable++) {
508                         double ty = (double) itTable->second;
509                         Ly += ((pow(ty, 3.0) - ty) / 12.0);
510                 }
511                 
512                 //find ranks of yi
513                 map<string, float> rankUser;
514                 ties.clear();
515                 rankTotal = 0;
516                 for (int j = 0; j < scores.size(); j++) {
517                         rankTotal += (j+1);
518                         ties.push_back(scores[j]);
519                         
520                         if (j != (scores.size()-1)) { // you are not the last so you can look ahead
521                                 if (scores[j].score != scores[j+1].score) { // you are done with ties, rank them and continue
522                                         
523                                         for (int k = 0; k < ties.size(); k++) {
524                                                 float thisrank = rankTotal / (float) ties.size();
525                                                 rankUser[ties[k].name] = thisrank;
526                                         }
527                                         ties.clear();
528                                         rankTotal = 0;
529                                 }
530                         }else { // you are the last one
531                                 
532                                 for (int k = 0; k < ties.size(); k++) {
533                                         float thisrank = rankTotal / (float) ties.size();
534                                         rankUser[ties[k].name] = thisrank;
535                                 }
536                         }
537                 }
538                 
539                         
540                 double di = 0.0;        
541                 int count = 0;
542                 for (int i = 0; i < userDists.size(); i++) {
543                         for (int j = 0; j < i; j++) {
544                         
545                                 float xi = rankEuclid[toString(count)];
546                                 float yi = rankUser[toString(count)];
547                         
548                                 di += ((xi - yi) * (xi - yi));
549                                 
550                                 count++;
551                         }
552                 }
553                 
554                 double n = (double) count;
555                 
556                 double SX2 = ((pow(n, 3.0) - n) / 12.0) - Lx;
557                 double SY2 = ((pow(n, 3.0) - n) / 12.0) - Ly;
558                 
559                 r = (SX2 + SY2 - di) / (2.0 * sqrt((SX2*SY2)));
560                 
561                 //divide by zero error
562                 if (isnan(r) || isinf(r)) { r = 0.0; }
563         
564                 return r;
565                 
566         }
567         catch(exception& e) {
568                 m->errorOut(e, "LinearAlgebra", "calcSpearman");
569                 exit(1);
570         }
571 }
572
573 /*********************************************************************************************************************************/
574 //assumes both matrices are square and the same size
575 double LinearAlgebra::calcKendall(vector< vector<double> >& euclidDists, vector< vector<double> >& userDists){
576         try {
577                 double r;
578                 
579                 //format data
580                 vector<spearmanRank> scores; 
581                 for (int i = 0; i < euclidDists.size(); i++) {
582                         for (int j = 0; j < i; j++) {
583                                 spearmanRank member(toString(scores.size()), euclidDists[i][j]);
584                                 scores.push_back(member);
585                         }
586                 }
587                         
588                 //sort scores
589                 sort(scores.begin(), scores.end(), compareSpearman);    
590                 
591                 //find ranks of xi
592                 map<string, float> rankEuclid;
593                 vector<spearmanRank> ties;
594                 int rankTotal = 0;
595                 for (int j = 0; j < scores.size(); j++) {
596                         rankTotal += (j+1);
597                         ties.push_back(scores[j]);
598                         
599                         if (j != (scores.size()-1)) { // you are not the last so you can look ahead
600                                 if (scores[j].score != scores[j+1].score) { // you are done with ties, rank them and continue
601                                         
602                                         for (int k = 0; k < ties.size(); k++) {
603                                                 float thisrank = rankTotal / (float) ties.size();
604                                                 rankEuclid[ties[k].name] = thisrank;
605                                         }
606                                         ties.clear();
607                                         rankTotal = 0;
608                                 }
609                         }else { // you are the last one
610                                 
611                                 for (int k = 0; k < ties.size(); k++) {
612                                         float thisrank = rankTotal / (float) ties.size();
613                                         rankEuclid[ties[k].name] = thisrank;
614                                 }
615                         }
616                 }
617                 
618                 vector<spearmanRank> scoresUser; 
619                 for (int i = 0; i < userDists.size(); i++) {
620                         for (int j = 0; j < i; j++) {
621                                 spearmanRank member(toString(scoresUser.size()), userDists[i][j]);
622                                 scoresUser.push_back(member);  
623                         }
624                 }
625                 
626                 //sort scores
627                 sort(scoresUser.begin(), scoresUser.end(), compareSpearman);    
628                 
629                 //find ranks of yi
630                 map<string, float> rankUser;
631                 ties.clear();
632                 rankTotal = 0;
633                 for (int j = 0; j < scoresUser.size(); j++) {
634                         rankTotal += (j+1);
635                         ties.push_back(scoresUser[j]);
636                         
637                         if (j != (scoresUser.size()-1)) { // you are not the last so you can look ahead
638                                 if (scoresUser[j].score != scoresUser[j+1].score) { // you are done with ties, rank them and continue
639                                         
640                                         for (int k = 0; k < ties.size(); k++) {
641                                                 float thisrank = rankTotal / (float) ties.size();
642                                                 rankUser[ties[k].name] = thisrank;
643                                         }
644                                         ties.clear();
645                                         rankTotal = 0;
646                                 }
647                         }else { // you are the last one
648                                 
649                                 for (int k = 0; k < ties.size(); k++) {
650                                         float thisrank = rankTotal / (float) ties.size();
651                                         rankUser[ties[k].name] = thisrank;
652                                 }
653                         }
654                 }
655                 
656                 int numCoor = 0;
657                 int numDisCoor = 0;
658                 
659                 //order user ranks
660                 vector<spearmanRank> user; 
661                 for (int l = 0; l < scores.size(); l++) {   
662                         spearmanRank member(scores[l].name, rankUser[scores[l].name]);
663                         user.push_back(member);
664                 }
665                                 
666                 int count = 0;
667                 for (int l = 0; l < scores.size(); l++) {
668                                         
669                         int numWithHigherRank = 0;
670                         int numWithLowerRank = 0;
671                         float thisrank = user[l].score;
672                                         
673                         for (int u = l+1; u < scores.size(); u++) {
674                                 if (user[u].score > thisrank) { numWithHigherRank++; }
675                                 else if (user[u].score < thisrank) { numWithLowerRank++; }
676                                 count++;
677                         }
678                                         
679                         numCoor += numWithHigherRank;
680                         numDisCoor += numWithLowerRank;
681                 }
682                                 
683                 r = (numCoor - numDisCoor) / (float) count;
684                 
685                 //divide by zero error
686                 if (isnan(r) || isinf(r)) { r = 0.0; }
687                 
688                 return r;
689                 
690         }
691         catch(exception& e) {
692                 m->errorOut(e, "LinearAlgebra", "calcKendall");
693                 exit(1);
694         }
695 }
696
697 /*********************************************************************************************************************************/
698
699