]> git.donarmstrong.com Git - mothur.git/blob - linearalgebra.cpp
added otu.association command. added calcSpearman, calcKendall and calcPearson functi...
[mothur.git] / linearalgebra.cpp
1 /*
2  *  linearalgebra.cpp
3  *  mothur
4  *
5  *  Created by westcott on 1/7/11.
6  *  Copyright 2011 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "linearalgebra.h"
11
12 /*********************************************************************************************************************************/
13
14 inline double SIGN(const double a, const double b)
15 {
16     return b>=0 ? (a>=0 ? a:-a) : (a>=0 ? -a:a);
17 }
18 /*********************************************************************************************************************************/
19
20 vector<vector<double> > LinearAlgebra::matrix_mult(vector<vector<double> > first, vector<vector<double> > second){
21         try {
22                 vector<vector<double> > product;
23                 
24                 int first_rows = first.size();
25                 int first_cols = first[0].size();
26                 int second_cols = second[0].size();
27                 
28                 product.resize(first_rows);
29                 for(int i=0;i<first_rows;i++){
30                         product[i].resize(second_cols);
31                 }
32                 
33                 for(int i=0;i<first_rows;i++){
34                         for(int j=0;j<second_cols;j++){
35                                 
36                                 if (m->control_pressed) { return product; }
37                                         
38                                 product[i][j] = 0.0;
39                                 for(int k=0;k<first_cols;k++){
40                                         product[i][j] += first[i][k] * second[k][j];
41                                 }
42                         }
43                 }
44                 
45                 return product;
46         }
47         catch(exception& e) {
48                 m->errorOut(e, "LinearAlgebra", "matrix_mult");
49                 exit(1);
50         }
51         
52 }
53
54 /*********************************************************************************************************************************/
55
56 void LinearAlgebra::recenter(double offset, vector<vector<double> > D, vector<vector<double> >& G){
57         try {
58                 int rank = D.size();
59                 
60                 vector<vector<double> > A(rank);
61                 vector<vector<double> > C(rank);
62                 for(int i=0;i<rank;i++){
63                         A[i].resize(rank);
64                         C[i].resize(rank);
65                 }
66                 
67                 double scale = -1.0000 / (double) rank;
68                 
69                 for(int i=0;i<rank;i++){
70                         A[i][i] = 0.0000;
71                         C[i][i] = 1.0000 + scale;
72                         for(int j=i+1;j<rank;j++){
73                                 A[i][j] = A[j][i] = -0.5 * D[i][j] * D[i][j] + offset;
74                                 C[i][j] = C[j][i] = scale;
75                         }
76                 }
77                 
78                 A = matrix_mult(C,A);
79                 G = matrix_mult(A,C);
80         }
81         catch(exception& e) {
82                 m->errorOut(e, "LinearAlgebra", "recenter");
83                 exit(1);
84         }
85         
86 }
87 /*********************************************************************************************************************************/
88
89 //  This function is taken from Numerical Recipes in C++ by Press et al., 2nd edition, pg. 479
90
91 int LinearAlgebra::tred2(vector<vector<double> >& a, vector<double>& d, vector<double>& e){
92         try {
93                 double scale, hh, h, g, f;
94                 
95                 int n = a.size();
96                 
97                 d.resize(n);
98                 e.resize(n);
99                 
100                 for(int i=n-1;i>0;i--){
101                         int l=i-1;
102                         h = scale = 0.0000;
103                         if(l>0){
104                                 for(int k=0;k<l+1;k++){
105                                         scale += fabs(a[i][k]);
106                                 }
107                                 if(scale == 0.0){
108                                         e[i] = a[i][l];
109                                 }
110                                 else{
111                                         for(int k=0;k<l+1;k++){
112                                                 a[i][k] /= scale;
113                                                 h += a[i][k] * a[i][k];
114                                         }
115                                         f = a[i][l];
116                                         g = (f >= 0.0 ? -sqrt(h) : sqrt(h));
117                                         e[i] = scale * g;
118                                         h -= f * g;
119                                         a[i][l] = f - g;
120                                         f = 0.0;
121                                         for(int j=0;j<l+1;j++){
122                                                 a[j][i] = a[i][j] / h;
123                                                 g = 0.0;
124                                                 for(int k=0;k<j+1;k++){
125                                                         g += a[j][k] * a[i][k];
126                                                 }
127                                                 for(int k=j+1;k<l+1;k++){
128                                                         g += a[k][j] * a[i][k];
129                                                 }
130                                                 e[j] = g / h;
131                                                 f += e[j] * a[i][j];
132                                         }
133                                         hh = f / (h + h);
134                                         for(int j=0;j<l+1;j++){
135                                                 f = a[i][j];
136                                                 e[j] = g = e[j] - hh * f;
137                                                 for(int k=0;k<j+1;k++){
138                                                         a[j][k] -= (f * e[k] + g * a[i][k]);
139                                                 }
140                                         }
141                                 }
142                         }
143                         else{
144                                 e[i] = a[i][l];
145                         }
146                         
147                         d[i] = h;
148                 }
149                 
150                 d[0] = 0.0000;
151                 e[0] = 0.0000;
152                 
153                 for(int i=0;i<n;i++){
154                         int l = i;
155                         if(d[i] != 0.0){
156                                 for(int j=0;j<l;j++){
157                                         g = 0.0000;
158                                         for(int k=0;k<l;k++){
159                                                 g += a[i][k] * a[k][j];
160                                         }
161                                         for(int k=0;k<l;k++){
162                                                 a[k][j] -= g * a[k][i];
163                                         }
164                                 }
165                         }
166                         d[i] = a[i][i];
167                         a[i][i] = 1.0000;
168                         for(int j=0;j<l;j++){
169                                 a[j][i] = a[i][j] = 0.0;
170                         }
171                 }
172                 
173                 return 0;
174         }
175         catch(exception& e) {
176                 m->errorOut(e, "LinearAlgebra", "tred2");
177                 exit(1);
178         }
179         
180 }
181 /*********************************************************************************************************************************/
182
183 double LinearAlgebra::pythag(double a, double b)        {       return(pow(a*a+b*b,0.5));       }
184
185 /*********************************************************************************************************************************/
186
187 //  This function is taken from Numerical Recipes in C++ by Press et al., 2nd edition, pg. 479
188
189 int LinearAlgebra::qtli(vector<double>& d, vector<double>& e, vector<vector<double> >& z) {
190         try {
191                 int myM, i, iter;
192                 double s, r, p, g, f, dd, c, b;
193                 
194                 int n = d.size();
195                 for(int i=1;i<=n;i++){
196                         e[i-1] = e[i];
197                 }
198                 e[n-1] = 0.0000;
199                 
200                 for(int l=0;l<n;l++){
201                         iter = 0;
202                         do {
203                                 for(myM=l;myM<n-1;myM++){
204                                         dd = fabs(d[myM]) + fabs(d[myM+1]);
205                                         if(fabs(e[myM])+dd == dd) break;
206                                 }
207                                 if(myM != l){
208                                         if(iter++ == 3000) cerr << "Too many iterations in tqli\n";
209                                         g = (d[l+1]-d[l]) / (2.0 * e[l]);
210                                         r = pythag(g, 1.0);
211                                         g = d[myM] - d[l] + e[l] / (g + SIGN(r,g));
212                                         s = c = 1.0;
213                                         p = 0.0000;
214                                         for(i=myM-1;i>=l;i--){
215                                                 f = s * e[i];
216                                                 b = c * e[i];
217                                                 e[i+1] = (r=pythag(f,g));
218                                                 if(r==0.0){
219                                                         d[i+1] -= p;
220                                                         e[myM] = 0.0000;
221                                                         break;
222                                                 }
223                                                 s = f / r;
224                                                 c = g / r;
225                                                 g = d[i+1] - p;
226                                                 r = (d[i] - g) * s + 2.0 * c * b;
227                                                 d[i+1] = g + ( p = s * r);
228                                                 g = c * r - b;
229                                                 for(int k=0;k<n;k++){
230                                                         f = z[k][i+1];
231                                                         z[k][i+1] = s * z[k][i] + c * f;
232                                                         z[k][i] = c * z[k][i] - s * f;
233                                                 }
234                                         }
235                                         if(r == 0.00 && i >= l) continue;
236                                         d[l] -= p;
237                                         e[l] = g;
238                                         e[myM] = 0.0;
239                                 }
240                         } while (myM != l);
241                 }
242                 
243                 int k;
244                 for(int i=0;i<n;i++){
245                         p=d[k=i];
246                         for(int j=i;j<n;j++){
247                                 if(d[j] >= p){
248                                         p=d[k=j];
249                                 }
250                         }
251                         if(k!=i){
252                                 d[k]=d[i];
253                                 d[i]=p;
254                                 for(int j=0;j<n;j++){
255                                         p=z[j][i];
256                                         z[j][i] = z[j][k];
257                                         z[j][k] = p;
258                                 }
259                         }
260                 }
261                 
262                 return 0;
263         }
264         catch(exception& e) {
265                 m->errorOut(e, "LinearAlgebra", "qtli");
266                 exit(1);
267         }
268 }
269 /*********************************************************************************************************************************/
270 //groups by dimension
271 vector< vector<double> > LinearAlgebra::calculateEuclidianDistance(vector< vector<double> >& axes, int dimensions){
272         try {
273                 //make square matrix
274                 vector< vector<double> > dists; dists.resize(axes.size());
275                 for (int i = 0; i < dists.size(); i++) {  dists[i].resize(axes.size(), 0.0); }
276                 
277                 if (dimensions == 1) { //one dimension calc = abs(x-y)
278                         
279                         for (int i = 0; i < dists.size(); i++) {
280                                 
281                                 if (m->control_pressed) { return dists; }
282                                 
283                                 for (int j = 0; j < i; j++) {
284                                         dists[i][j] = abs(axes[i][0] - axes[j][0]);
285                                         dists[j][i] = dists[i][j];
286                                 }
287                         }
288                         
289                 }else if (dimensions > 1) { //two dimension calc = sqrt ((x1 - y1)^2 + (x2 - y2)^2)...
290                         
291                         for (int i = 0; i < dists.size(); i++) {
292                                 
293                                 if (m->control_pressed) { return dists; }
294                                 
295                                 for (int j = 0; j < i; j++) {
296                                         double sum = 0.0;
297                                         for (int k = 0; k < dimensions; k++) {
298                                                 sum += ((axes[i][k] - axes[j][k]) * (axes[i][k] - axes[j][k]));
299                                         }
300                                         
301                                         dists[i][j] = sqrt(sum);
302                                         dists[j][i] = dists[i][j];
303                                 }
304                         }
305                         
306                 }
307                 
308                 return dists;
309         }
310         catch(exception& e) {
311                 m->errorOut(e, "LinearAlgebra", "calculateEuclidianDistance");
312                 exit(1);
313         }
314 }
315 /*********************************************************************************************************************************/
316 //returns groups by dimensions from dimensions by groups
317 vector< vector<double> > LinearAlgebra::calculateEuclidianDistance(vector< vector<double> >& axes){
318         try {
319                 //make square matrix
320                 vector< vector<double> > dists; dists.resize(axes[0].size());
321                 for (int i = 0; i < dists.size(); i++) {  dists[i].resize(axes[0].size(), 0.0); }
322                 
323                 if (axes.size() == 1) { //one dimension calc = abs(x-y)
324                         
325                         for (int i = 0; i < dists.size(); i++) {
326                                 
327                                 if (m->control_pressed) { return dists; }
328                                 
329                                 for (int j = 0; j < i; j++) {
330                                         dists[i][j] = abs(axes[0][i] - axes[0][j]);
331                                         dists[j][i] = dists[i][j];
332                                 }
333                         }
334                         
335                 }else if (axes.size() > 1) { //two dimension calc = sqrt ((x1 - y1)^2 + (x2 - y2)^2)...
336                         
337                         for (int i = 0; i < dists[0].size(); i++) {
338                                 
339                                 if (m->control_pressed) { return dists; }
340                                 
341                                 for (int j = 0; j < i; j++) {
342                                         double sum = 0.0;
343                                         for (int k = 0; k < axes.size(); k++) {
344                                                 sum += ((axes[k][i] - axes[k][j]) * (axes[k][i] - axes[k][j]));
345                                         }
346                                         
347                                         dists[i][j] = sqrt(sum);
348                                         dists[j][i] = dists[i][j];
349                                 }
350                         }
351                         
352                 }
353                 
354                 return dists;
355         }
356         catch(exception& e) {
357                 m->errorOut(e, "LinearAlgebra", "calculateEuclidianDistance");
358                 exit(1);
359         }
360 }
361 /*********************************************************************************************************************************/
362 //assumes both matrices are square and the same size
363 double LinearAlgebra::calcPearson(vector< vector<double> >& euclidDists, vector< vector<double> >& userDists){
364         try {
365                 
366                 //find average for - X
367                 int count = 0;
368                 float averageEuclid = 0.0; 
369                 for (int i = 0; i < euclidDists.size(); i++) {
370                         for (int j = 0; j < i; j++) {
371                                 averageEuclid += euclidDists[i][j];  
372                                 count++;
373                         }
374                 }
375                 averageEuclid = averageEuclid / (float) count;   
376                         
377                 //find average for - Y
378                 count = 0;
379                 float averageUser = 0.0; 
380                 for (int i = 0; i < userDists.size(); i++) {
381                         for (int j = 0; j < i; j++) {
382                                 averageUser += userDists[i][j]; 
383                                 count++;
384                         }
385                 }
386                 averageUser = averageUser / (float) count;  
387
388                 double numerator = 0.0;
389                 double denomTerm1 = 0.0;
390                 double denomTerm2 = 0.0;
391                 
392                 for (int i = 0; i < euclidDists.size(); i++) {
393                         
394                         for (int k = 0; k < i; k++) { //just lt dists
395                                 
396                                 float Yi = userDists[i][k];
397                                 float Xi = euclidDists[i][k];
398                                 
399                                 numerator += ((Xi - averageEuclid) * (Yi - averageUser));
400                                 denomTerm1 += ((Xi - averageEuclid) * (Xi - averageEuclid));
401                                 denomTerm2 += ((Yi - averageUser) * (Yi - averageUser));
402                         }
403                 }
404                 
405                 double denom = (sqrt(denomTerm1) * sqrt(denomTerm2));
406                 double r = numerator / denom;
407                 
408                 //divide by zero error
409                 if (isnan(r) || isinf(r)) { r = 0.0; }
410                 
411                 return r;
412                 
413         }
414         catch(exception& e) {
415                 m->errorOut(e, "LinearAlgebra", "calcPearson");
416                 exit(1);
417         }
418 }
419 /*********************************************************************************************************************************/
420 //assumes both matrices are square and the same size
421 double LinearAlgebra::calcSpearman(vector< vector<double> >& euclidDists, vector< vector<double> >& userDists){
422         try {
423                 double r; 
424                 
425                 //format data
426                 map<float, int> tableX; 
427                 map<float, int>::iterator itTable;
428                 vector<spearmanRank> scores; 
429                 
430                 for (int i = 0; i < euclidDists.size(); i++) {
431                         for (int j = 0; j < i; j++) {
432                                 spearmanRank member(toString(scores.size()), euclidDists[i][j]);
433                                 scores.push_back(member);  
434                                 
435                                 //count number of repeats
436                                 itTable = tableX.find(euclidDists[i][j]);
437                                 if (itTable == tableX.end()) { 
438                                         tableX[euclidDists[i][j]] = 1;
439                                 }else {
440                                         tableX[euclidDists[i][j]]++;
441                                 }
442                         }
443                 }
444                 
445                 //sort scores
446                 sort(scores.begin(), scores.end(), compareSpearman); 
447
448                 //calc LX
449                 double Lx = 0.0; 
450                 for (itTable = tableX.begin(); itTable != tableX.end(); itTable++) {
451                         double tx = (double) itTable->second;
452                         Lx += ((pow(tx, 3.0) - tx) / 12.0);
453                 }
454                 
455                 //find ranks of xi
456                 map<string, float> rankEuclid;
457                 vector<spearmanRank> ties;
458                 int rankTotal = 0;
459                 for (int j = 0; j < scores.size(); j++) {
460                         rankTotal += (j+1);
461                         ties.push_back(scores[j]);
462                         
463                         if (j != (scores.size()-1)) { // you are not the last so you can look ahead
464                                 if (scores[j].score != scores[j+1].score) { // you are done with ties, rank them and continue
465                                         
466                                         for (int k = 0; k < ties.size(); k++) {
467                                                 float thisrank = rankTotal / (float) ties.size();
468                                                 rankEuclid[ties[k].name] = thisrank;
469                                         }
470                                         ties.clear();
471                                         rankTotal = 0;
472                                 }
473                         }else { // you are the last one
474                                 
475                                 for (int k = 0; k < ties.size(); k++) {
476                                         float thisrank = rankTotal / (float) ties.size();
477                                         rankEuclid[ties[k].name] = thisrank;
478                                 }
479                         }
480                 }
481                 
482                 
483                 //format data
484                 map<float, int> tableY; 
485                 scores.clear(); 
486                 
487                 for (int i = 0; i < userDists.size(); i++) {
488                         for (int j = 0; j < i; j++) {
489                                 spearmanRank member(toString(scores.size()), userDists[i][j]);
490                                 scores.push_back(member);  
491                                 
492                                 //count number of repeats
493                                 itTable = tableY.find(userDists[i][j]);
494                                 if (itTable == tableY.end()) { 
495                                         tableY[userDists[i][j]] = 1;
496                                 }else {
497                                         tableY[userDists[i][j]]++;
498                                 }
499                         }
500                 }
501                 
502                 //sort scores
503                 sort(scores.begin(), scores.end(), compareSpearman); 
504                 
505                 //calc LX
506                 double Ly = 0.0; 
507                 for (itTable = tableY.begin(); itTable != tableY.end(); itTable++) {
508                         double ty = (double) itTable->second;
509                         Ly += ((pow(ty, 3.0) - ty) / 12.0);
510                 }
511                 
512                 //find ranks of yi
513                 map<string, float> rankUser;
514                 ties.clear();
515                 rankTotal = 0;
516                 for (int j = 0; j < scores.size(); j++) {
517                         rankTotal += (j+1);
518                         ties.push_back(scores[j]);
519                         
520                         if (j != (scores.size()-1)) { // you are not the last so you can look ahead
521                                 if (scores[j].score != scores[j+1].score) { // you are done with ties, rank them and continue
522                                         
523                                         for (int k = 0; k < ties.size(); k++) {
524                                                 float thisrank = rankTotal / (float) ties.size();
525                                                 rankUser[ties[k].name] = thisrank;
526                                         }
527                                         ties.clear();
528                                         rankTotal = 0;
529                                 }
530                         }else { // you are the last one
531                                 
532                                 for (int k = 0; k < ties.size(); k++) {
533                                         float thisrank = rankTotal / (float) ties.size();
534                                         rankUser[ties[k].name] = thisrank;
535                                 }
536                         }
537                 }
538                 
539                         
540                 double di = 0.0;        
541                 int count = 0;
542                 for (int i = 0; i < userDists.size(); i++) {
543                         for (int j = 0; j < i; j++) {
544                         
545                                 float xi = rankEuclid[toString(count)];
546                                 float yi = rankUser[toString(count)];
547                         
548                                 di += ((xi - yi) * (xi - yi));
549                                 
550                                 count++;
551                         }
552                 }
553                 
554                 double n = (double) count;
555                 
556                 double SX2 = ((pow(n, 3.0) - n) / 12.0) - Lx;
557                 double SY2 = ((pow(n, 3.0) - n) / 12.0) - Ly;
558                 
559                 r = (SX2 + SY2 - di) / (2.0 * sqrt((SX2*SY2)));
560                 
561                 //divide by zero error
562                 if (isnan(r) || isinf(r)) { r = 0.0; }
563         
564                 return r;
565                 
566         }
567         catch(exception& e) {
568                 m->errorOut(e, "LinearAlgebra", "calcSpearman");
569                 exit(1);
570         }
571 }
572
573 /*********************************************************************************************************************************/
574 //assumes both matrices are square and the same size
575 double LinearAlgebra::calcKendall(vector< vector<double> >& euclidDists, vector< vector<double> >& userDists){
576         try {
577                 double r;
578                 
579                 //format data
580                 vector<spearmanRank> scores; 
581                 for (int i = 0; i < euclidDists.size(); i++) {
582                         for (int j = 0; j < i; j++) {
583                                 spearmanRank member(toString(scores.size()), euclidDists[i][j]);
584                                 scores.push_back(member);
585                         }
586                 }
587                         
588                 //sort scores
589                 sort(scores.begin(), scores.end(), compareSpearman);    
590                 
591                 //find ranks of xi
592                 map<string, float> rankEuclid;
593                 vector<spearmanRank> ties;
594                 int rankTotal = 0;
595                 for (int j = 0; j < scores.size(); j++) {
596                         rankTotal += (j+1);
597                         ties.push_back(scores[j]);
598                         
599                         if (j != (scores.size()-1)) { // you are not the last so you can look ahead
600                                 if (scores[j].score != scores[j+1].score) { // you are done with ties, rank them and continue
601                                         
602                                         for (int k = 0; k < ties.size(); k++) {
603                                                 float thisrank = rankTotal / (float) ties.size();
604                                                 rankEuclid[ties[k].name] = thisrank;
605                                         }
606                                         ties.clear();
607                                         rankTotal = 0;
608                                 }
609                         }else { // you are the last one
610                                 
611                                 for (int k = 0; k < ties.size(); k++) {
612                                         float thisrank = rankTotal / (float) ties.size();
613                                         rankEuclid[ties[k].name] = thisrank;
614                                 }
615                         }
616                 }
617                 
618                 vector<spearmanRank> scoresUser; 
619                 for (int i = 0; i < userDists.size(); i++) {
620                         for (int j = 0; j < i; j++) {
621                                 spearmanRank member(toString(scoresUser.size()), userDists[i][j]);
622                                 scoresUser.push_back(member);  
623                         }
624                 }
625                 
626                 //sort scores
627                 sort(scoresUser.begin(), scoresUser.end(), compareSpearman);    
628                 
629                 //find ranks of yi
630                 map<string, float> rankUser;
631                 ties.clear();
632                 rankTotal = 0;
633                 for (int j = 0; j < scoresUser.size(); j++) {
634                         rankTotal += (j+1);
635                         ties.push_back(scoresUser[j]);
636                         
637                         if (j != (scoresUser.size()-1)) { // you are not the last so you can look ahead
638                                 if (scoresUser[j].score != scoresUser[j+1].score) { // you are done with ties, rank them and continue
639                                         
640                                         for (int k = 0; k < ties.size(); k++) {
641                                                 float thisrank = rankTotal / (float) ties.size();
642                                                 rankUser[ties[k].name] = thisrank;
643                                         }
644                                         ties.clear();
645                                         rankTotal = 0;
646                                 }
647                         }else { // you are the last one
648                                 
649                                 for (int k = 0; k < ties.size(); k++) {
650                                         float thisrank = rankTotal / (float) ties.size();
651                                         rankUser[ties[k].name] = thisrank;
652                                 }
653                         }
654                 }
655                 
656                 int numCoor = 0;
657                 int numDisCoor = 0;
658                 
659                 //order user ranks
660                 vector<spearmanRank> user; 
661                 for (int l = 0; l < scores.size(); l++) {   
662                         spearmanRank member(scores[l].name, rankUser[scores[l].name]);
663                         user.push_back(member);
664                 }
665                                 
666                 int count = 0;
667                 for (int l = 0; l < scores.size(); l++) {
668                                         
669                         int numWithHigherRank = 0;
670                         int numWithLowerRank = 0;
671                         float thisrank = user[l].score;
672                                         
673                         for (int u = l+1; u < scores.size(); u++) {
674                                 if (user[u].score > thisrank) { numWithHigherRank++; }
675                                 else if (user[u].score < thisrank) { numWithLowerRank++; }
676                                 count++;
677                         }
678                                         
679                         numCoor += numWithHigherRank;
680                         numDisCoor += numWithLowerRank;
681                 }
682                                 
683                 r = (numCoor - numDisCoor) / (float) count;
684                 
685                 //divide by zero error
686                 if (isnan(r) || isinf(r)) { r = 0.0; }
687                 
688                 return r;
689                 
690         }
691         catch(exception& e) {
692                 m->errorOut(e, "LinearAlgebra", "calcKendall");
693                 exit(1);
694         }
695 }
696 /*********************************************************************************************************************************/
697 double LinearAlgebra::calcKendall(vector<double>& x, vector<double>& y, double& sig){
698         try {
699                 if (x.size() != y.size()) { m->mothurOut("[ERROR]: vector size mismatch."); m->mothurOutEndLine(); return 0.0; }
700                 
701                 //format data
702                 vector<spearmanRank> xscores; 
703                 for (int i = 0; i < x.size(); i++) {
704                         spearmanRank member(toString(i), x[i]);
705                         xscores.push_back(member);  
706                 }
707                 
708                 //sort xscores
709                 sort(xscores.begin(), xscores.end(), compareSpearman);
710                 
711                 //convert scores to ranks of x
712                 vector<spearmanRank*> ties;
713                 int rankTotal = 0;
714                 for (int j = 0; j < xscores.size(); j++) {
715                         rankTotal += (j+1);
716                         ties.push_back(&(xscores[j]));
717                                 
718                         if (j != xscores.size()-1) { // you are not the last so you can look ahead
719                                 if (xscores[j].score != xscores[j+1].score) { // you are done with ties, rank them and continue
720                                         for (int k = 0; k < ties.size(); k++) {
721                                                 float thisrank = rankTotal / (float) ties.size();
722                                                 (*ties[k]).score = thisrank;
723                                         }
724                                         ties.clear();
725                                         rankTotal = 0;
726                                 }
727                         }else { // you are the last one
728                                 for (int k = 0; k < ties.size(); k++) {
729                                         float thisrank = rankTotal / (float) ties.size();
730                                         (*ties[k]).score = thisrank;
731                                 }
732                         }
733                 }
734                 
735                         
736                 //format data
737                 vector<spearmanRank> yscores;
738                 for (int j = 0; j < y.size(); j++) {
739                         spearmanRank member(toString(j), y[j]);
740                         yscores.push_back(member);
741                 }
742                 
743                 //sort yscores
744                 sort(yscores.begin(), yscores.end(), compareSpearman);
745                 
746                 //convert to ranks
747                 map<string, float> rank;
748                 vector<spearmanRank> yties;
749                 rankTotal = 0;
750                 for (int j = 0; j < yscores.size(); j++) {
751                         rankTotal += (j+1);
752                         yties.push_back(yscores[j]);
753                                 
754                         if (j != yscores.size()-1) { // you are not the last so you can look ahead
755                                 if (yscores[j].score != yscores[j+1].score) { // you are done with ties, rank them and continue
756                                         for (int k = 0; k < yties.size(); k++) {
757                                                 float thisrank = rankTotal / (float) yties.size();
758                                                 rank[yties[k].name] = thisrank;
759                                         }
760                                         yties.clear();
761                                         rankTotal = 0;
762                                 }
763                         }else { // you are the last one
764                                 for (int k = 0; k < yties.size(); k++) {
765                                         float thisrank = rankTotal / (float) yties.size();
766                                         rank[yties[k].name] = thisrank;
767                                 }
768                         }
769                 }
770                         
771                         
772                 int numCoor = 0;
773                 int numDisCoor = 0;
774                 
775                 //associate x and y
776                 vector<spearmanRank> otus; 
777                 for (int l = 0; l < xscores.size(); l++) {   
778                         spearmanRank member(xscores[l].name, rank[xscores[l].name]);
779                         otus.push_back(member);
780                 }
781                                 
782                 int count = 0;
783                 for (int l = 0; l < xscores.size(); l++) {
784                                         
785                         int numWithHigherRank = 0;
786                         int numWithLowerRank = 0;
787                         float thisrank = otus[l].score;
788                                         
789                         for (int u = l+1; u < xscores.size(); u++) {
790                                 if (otus[u].score > thisrank) { numWithHigherRank++; }
791                                 else if (otus[u].score < thisrank) { numWithLowerRank++; }
792                                 count++;
793                         }
794                                         
795                         numCoor += numWithHigherRank;
796                         numDisCoor += numWithLowerRank;
797                 }
798                                 
799                 double p = (numCoor - numDisCoor) / (float) count;
800                 
801                 //calc signif - zA - http://en.wikipedia.org/wiki/Kendall_tau_rank_correlation_coefficient#Significance_tests
802                 double numer = 3.0 * (numCoor - numDisCoor);
803         int n = xscores.size();
804         double denom = n * (n-1) * (2*n + 5) / (double) 2.0;
805         denom = sqrt(denom);
806         sig = numer / denom;
807                 
808                 if (isnan(sig) || isinf(sig)) { sig = 0.0; }
809                 
810                 return p;
811         }
812         catch(exception& e) {
813                 m->errorOut(e, "LinearAlgebra", "calcKendall");
814                 exit(1);
815         }
816 }
817 /*********************************************************************************************************************************/
818 double LinearAlgebra::calcSpearman(vector<double>& x, vector<double>& y, double& sig){
819         try {
820                 if (x.size() != y.size()) { m->mothurOut("[ERROR]: vector size mismatch."); m->mothurOutEndLine(); return 0.0; }
821                 
822                 //format data
823                 map<float, int> tableX; 
824                 map<float, int>::iterator itTable;
825                 vector<spearmanRank> xscores; 
826                 
827                 for (int i = 0; i < x.size(); i++) {
828                         spearmanRank member(toString(i), x[i]);
829                         xscores.push_back(member);  
830                                 
831                         //count number of repeats
832                         itTable = tableX.find(x[i]);
833                         if (itTable == tableX.end()) { 
834                                 tableX[x[i]] = 1;
835                         }else {
836                                 tableX[x[i]]++;
837                         }
838                 }
839                 
840                 
841                 //calc LX
842                 double Lx = 0.0;
843                 for (itTable = tableX.begin(); itTable != tableX.end(); itTable++) {
844                         double tx = (double) itTable->second;
845                         Lx += ((pow(tx, 3.0) - tx) / 12.0);
846                 }
847                 
848                 
849                 //sort x
850                 sort(xscores.begin(), xscores.end(), compareSpearman);
851                 
852                 //convert scores to ranks of x
853                 //convert to ranks
854                 map<string, float> rankx;
855                 vector<spearmanRank> xties;
856                 int rankTotal = 0;
857                 for (int j = 0; j < xscores.size(); j++) {
858                         rankTotal += (j+1);
859                         xties.push_back(xscores[j]);
860                         
861                         if (j != xscores.size()-1) { // you are not the last so you can look ahead
862                                 if (xscores[j].score != xscores[j+1].score) { // you are done with ties, rank them and continue
863                                         for (int k = 0; k < xties.size(); k++) {
864                                                 float thisrank = rankTotal / (float) xties.size();
865                                                 rankx[xties[k].name] = thisrank;
866                                         }
867                                         xties.clear();
868                                         rankTotal = 0;
869                                 }
870                         }else { // you are the last one
871                                 for (int k = 0; k < xties.size(); k++) {
872                                         float thisrank = rankTotal / (float) xties.size();
873                                         rankx[xties[k].name] = thisrank;
874                                 }
875                         }
876                 }               
877                         
878                 //format x
879                 vector<spearmanRank> yscores;
880                 map<float, int> tableY;
881                 for (int j = 0; j < y.size(); j++) {
882                         spearmanRank member(toString(j), y[j]);
883                         yscores.push_back(member);
884                                 
885                         itTable = tableY.find(member.score);
886                         if (itTable == tableY.end()) { 
887                                 tableY[member.score] = 1;
888                         }else {
889                                 tableY[member.score]++;
890                         }
891                                 
892                 }
893                         
894                 //calc Ly
895                 double Ly = 0.0;
896                 for (itTable = tableY.begin(); itTable != tableY.end(); itTable++) {
897                         double ty = (double) itTable->second;
898                         Ly += ((pow(ty, 3.0) - ty) / 12.0);
899                 }
900                         
901                 sort(yscores.begin(), yscores.end(), compareSpearman);
902                         
903                 //convert to ranks
904                 map<string, float> rank;
905                 vector<spearmanRank> yties;
906                 rankTotal = 0;
907                 for (int j = 0; j < yscores.size(); j++) {
908                         rankTotal += (j+1);
909                         yties.push_back(yscores[j]);
910                         
911                         if (j != yscores.size()-1) { // you are not the last so you can look ahead
912                                 if (yscores[j].score != yscores[j+1].score) { // you are done with ties, rank them and continue
913                                         for (int k = 0; k < yties.size(); k++) {
914                                                 float thisrank = rankTotal / (float) yties.size();
915                                                 rank[yties[k].name] = thisrank;
916                                         }
917                                         yties.clear();
918                                         rankTotal = 0;
919                                 }
920                         }else { // you are the last one
921                                 for (int k = 0; k < yties.size(); k++) {
922                                         float thisrank = rankTotal / (float) yties.size();
923                                         rank[yties[k].name] = thisrank;
924                                 }
925                         }
926                 }
927                 
928                 double di = 0.0;
929                 for (int k = 0; k < x.size(); k++) {
930                                         
931                         float xi = rankx[toString(k)];
932                         float yi = rank[toString(k)];
933                                         
934                         di += ((xi - yi) * (xi - yi));
935                 }
936                                 
937                 double p = 0.0;
938                                 
939                 double n = (double) x.size();
940                 double SX2 = ((pow(n, 3.0) - n) / 12.0) - Lx;
941                 double SY2 = ((pow(n, 3.0) - n) / 12.0) - Ly;
942                                 
943                 p = (SX2 + SY2 - di) / (2.0 * sqrt((SX2*SY2)));
944                 
945                 //signifigance calc - http://en.wikipedia.org/wiki/Spearman%27s_rank_correlation_coefficient
946                 double temp = (x.size()-2) / (double) (1- (p*p));
947                 temp = sqrt(temp);
948                 sig = p*temp;
949                 if (isnan(sig) || isinf(sig)) { sig = 0.0; }
950                                 
951                 return p;
952         }
953         catch(exception& e) {
954                 m->errorOut(e, "LinearAlgebra", "calcSpearman");
955                 exit(1);
956         }
957 }               
958 /*********************************************************************************************************************************/
959 double LinearAlgebra::calcPearson(vector<double>& x, vector<double>& y, double& sig){
960         try {
961                 if (x.size() != y.size()) { m->mothurOut("[ERROR]: vector size mismatch."); m->mothurOutEndLine(); return 0.0; }
962                 
963                 //find average X
964                 float averageX = 0.0; 
965                 for (int i = 0; i < x.size(); i++) { averageX += x[i];  }
966                 averageX = averageX / (float) x.size(); 
967                 
968                 //find average Y
969                 float sumY = 0.0;
970                 for (int j = 0; j < y.size(); j++) { sumY += y[j]; }
971                 float Ybar = sumY / (float) y.size();
972                         
973                 double r = 0.0;
974                 double numerator = 0.0;
975                 double denomTerm1 = 0.0;
976                 double denomTerm2 = 0.0;
977                                 
978                 for (int j = 0; j < x.size(); j++) {
979                         float Yi = y[j];
980                         float Xi = x[j];
981                                         
982                         numerator += ((Xi - averageX) * (Yi - Ybar));
983                         denomTerm1 += ((Xi - averageX) * (Xi - averageX));
984                         denomTerm2 += ((Yi - Ybar) * (Yi - Ybar));
985                 }
986                                 
987                 double denom = (sqrt(denomTerm1) * sqrt(denomTerm2));
988                                 
989                 r = numerator / denom;
990                 
991                 //signifigance calc - http://faculty.vassar.edu/lowry/ch4apx.html
992                 double temp =  (1- (r*r)) / (double) (x.size()-2);
993                 temp = sqrt(temp);
994                 sig = r / temp;
995                 if (isnan(sig) || isinf(sig)) { sig = 0.0; }
996                 
997                 return r;
998         }
999         catch(exception& e) {
1000                 m->errorOut(e, "LinearAlgebra", "calcPearson");
1001                 exit(1);
1002         }
1003 }                       
1004 /*********************************************************************************************************************************/
1005
1006 vector<vector<double> > LinearAlgebra::getObservedEuclideanDistance(vector<vector<double> >& relAbundData){
1007
1008         int numSamples = relAbundData.size();
1009         int numOTUs = relAbundData[0].size();
1010         
1011         vector<vector<double> > dMatrix(numSamples);
1012         for(int i=0;i<numSamples;i++){
1013                 dMatrix[i].resize(numSamples);
1014         }
1015         
1016         for(int i=0;i<numSamples;i++){
1017                 for(int j=0;j<numSamples;j++){
1018                         
1019                         double d = 0;
1020                         for(int k=0;k<numOTUs;k++){
1021                                 d += pow((relAbundData[i][k] - relAbundData[j][k]), 2.0000);
1022                         }
1023                         dMatrix[i][j] = pow(d, 0.50000);
1024                         dMatrix[j][i] = dMatrix[i][j];
1025                         
1026                 }
1027         }
1028         return dMatrix;
1029         
1030 }
1031
1032 /*********************************************************************************************************************************/