]> git.donarmstrong.com Git - mothur.git/blob - linearalgebra.cpp
changed sffinfo flow default to true. fixed bug in trim.seqs and filter.seqs related...
[mothur.git] / linearalgebra.cpp
1 /*
2  *  linearalgebra.cpp
3  *  mothur
4  *
5  *  Created by westcott on 1/7/11.
6  *  Copyright 2011 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "linearalgebra.h"
11
12 // This class references functions used from "Numerical Recipes in C++" //
13 /*********************************************************************************************************************************/
14
15 inline double SIGN(const double a, const double b)
16 {
17     return b>=0 ? (a>=0 ? a:-a) : (a>=0 ? -a:a);
18 }
19 /*********************************************************************************************************************************/
20
21 vector<vector<double> > LinearAlgebra::matrix_mult(vector<vector<double> > first, vector<vector<double> > second){
22         try {
23                 vector<vector<double> > product;
24                 
25                 int first_rows = first.size();
26                 int first_cols = first[0].size();
27                 int second_cols = second[0].size();
28                 
29                 product.resize(first_rows);
30                 for(int i=0;i<first_rows;i++){
31                         product[i].resize(second_cols);
32                 }
33                 
34                 for(int i=0;i<first_rows;i++){
35                         for(int j=0;j<second_cols;j++){
36                                 
37                                 if (m->control_pressed) { return product; }
38                                         
39                                 product[i][j] = 0.0;
40                                 for(int k=0;k<first_cols;k++){
41                                         product[i][j] += first[i][k] * second[k][j];
42                                 }
43                         }
44                 }
45                 
46                 return product;
47         }
48         catch(exception& e) {
49                 m->errorOut(e, "LinearAlgebra", "matrix_mult");
50                 exit(1);
51         }
52         
53 }
54
55 /*********************************************************************************************************************************/
56
57 void LinearAlgebra::recenter(double offset, vector<vector<double> > D, vector<vector<double> >& G){
58         try {
59                 int rank = D.size();
60                 
61                 vector<vector<double> > A(rank);
62                 vector<vector<double> > C(rank);
63                 for(int i=0;i<rank;i++){
64                         A[i].resize(rank);
65                         C[i].resize(rank);
66                 }
67                 
68                 double scale = -1.0000 / (double) rank;
69                 
70                 for(int i=0;i<rank;i++){
71                         A[i][i] = 0.0000;
72                         C[i][i] = 1.0000 + scale;
73                         for(int j=i+1;j<rank;j++){
74                                 A[i][j] = A[j][i] = -0.5 * D[i][j] * D[i][j] + offset;
75                                 C[i][j] = C[j][i] = scale;
76                         }
77                 }
78                 
79                 A = matrix_mult(C,A);
80                 G = matrix_mult(A,C);
81         }
82         catch(exception& e) {
83                 m->errorOut(e, "LinearAlgebra", "recenter");
84                 exit(1);
85         }
86         
87 }
88 /*********************************************************************************************************************************/
89
90 //  This function is taken from Numerical Recipes in C++ by Press et al., 2nd edition, pg. 479
91
92 int LinearAlgebra::tred2(vector<vector<double> >& a, vector<double>& d, vector<double>& e){
93         try {
94                 double scale, hh, h, g, f;
95                 
96                 int n = a.size();
97                 
98                 d.resize(n);
99                 e.resize(n);
100                 
101                 for(int i=n-1;i>0;i--){
102                         int l=i-1;
103                         h = scale = 0.0000;
104                         if(l>0){
105                                 for(int k=0;k<l+1;k++){
106                                         scale += fabs(a[i][k]);
107                                 }
108                                 if(scale == 0.0){
109                                         e[i] = a[i][l];
110                                 }
111                                 else{
112                                         for(int k=0;k<l+1;k++){
113                                                 a[i][k] /= scale;
114                                                 h += a[i][k] * a[i][k];
115                                         }
116                                         f = a[i][l];
117                                         g = (f >= 0.0 ? -sqrt(h) : sqrt(h));
118                                         e[i] = scale * g;
119                                         h -= f * g;
120                                         a[i][l] = f - g;
121                                         f = 0.0;
122                                         for(int j=0;j<l+1;j++){
123                                                 a[j][i] = a[i][j] / h;
124                                                 g = 0.0;
125                                                 for(int k=0;k<j+1;k++){
126                                                         g += a[j][k] * a[i][k];
127                                                 }
128                                                 for(int k=j+1;k<l+1;k++){
129                                                         g += a[k][j] * a[i][k];
130                                                 }
131                                                 e[j] = g / h;
132                                                 f += e[j] * a[i][j];
133                                         }
134                                         hh = f / (h + h);
135                                         for(int j=0;j<l+1;j++){
136                                                 f = a[i][j];
137                                                 e[j] = g = e[j] - hh * f;
138                                                 for(int k=0;k<j+1;k++){
139                                                         a[j][k] -= (f * e[k] + g * a[i][k]);
140                                                 }
141                                         }
142                                 }
143                         }
144                         else{
145                                 e[i] = a[i][l];
146                         }
147                         
148                         d[i] = h;
149                 }
150                 
151                 d[0] = 0.0000;
152                 e[0] = 0.0000;
153                 
154                 for(int i=0;i<n;i++){
155                         int l = i;
156                         if(d[i] != 0.0){
157                                 for(int j=0;j<l;j++){
158                                         g = 0.0000;
159                                         for(int k=0;k<l;k++){
160                                                 g += a[i][k] * a[k][j];
161                                         }
162                                         for(int k=0;k<l;k++){
163                                                 a[k][j] -= g * a[k][i];
164                                         }
165                                 }
166                         }
167                         d[i] = a[i][i];
168                         a[i][i] = 1.0000;
169                         for(int j=0;j<l;j++){
170                                 a[j][i] = a[i][j] = 0.0;
171                         }
172                 }
173                 
174                 return 0;
175         }
176         catch(exception& e) {
177                 m->errorOut(e, "LinearAlgebra", "tred2");
178                 exit(1);
179         }
180         
181 }
182 /*********************************************************************************************************************************/
183
184 double LinearAlgebra::pythag(double a, double b)        {       return(pow(a*a+b*b,0.5));       }
185
186 /*********************************************************************************************************************************/
187
188 //  This function is taken from Numerical Recipes in C++ by Press et al., 2nd edition, pg. 479
189
190 int LinearAlgebra::qtli(vector<double>& d, vector<double>& e, vector<vector<double> >& z) {
191         try {
192                 int myM, i, iter;
193                 double s, r, p, g, f, dd, c, b;
194                 
195                 int n = d.size();
196                 for(int i=1;i<=n;i++){
197                         e[i-1] = e[i];
198                 }
199                 e[n-1] = 0.0000;
200                 
201                 for(int l=0;l<n;l++){
202                         iter = 0;
203                         do {
204                                 for(myM=l;myM<n-1;myM++){
205                                         dd = fabs(d[myM]) + fabs(d[myM+1]);
206                                         if(fabs(e[myM])+dd == dd) break;
207                                 }
208                                 if(myM != l){
209                                         if(iter++ == 3000) cerr << "Too many iterations in tqli\n";
210                                         g = (d[l+1]-d[l]) / (2.0 * e[l]);
211                                         r = pythag(g, 1.0);
212                                         g = d[myM] - d[l] + e[l] / (g + SIGN(r,g));
213                                         s = c = 1.0;
214                                         p = 0.0000;
215                                         for(i=myM-1;i>=l;i--){
216                                                 f = s * e[i];
217                                                 b = c * e[i];
218                                                 e[i+1] = (r=pythag(f,g));
219                                                 if(r==0.0){
220                                                         d[i+1] -= p;
221                                                         e[myM] = 0.0000;
222                                                         break;
223                                                 }
224                                                 s = f / r;
225                                                 c = g / r;
226                                                 g = d[i+1] - p;
227                                                 r = (d[i] - g) * s + 2.0 * c * b;
228                                                 d[i+1] = g + ( p = s * r);
229                                                 g = c * r - b;
230                                                 for(int k=0;k<n;k++){
231                                                         f = z[k][i+1];
232                                                         z[k][i+1] = s * z[k][i] + c * f;
233                                                         z[k][i] = c * z[k][i] - s * f;
234                                                 }
235                                         }
236                                         if(r == 0.00 && i >= l) continue;
237                                         d[l] -= p;
238                                         e[l] = g;
239                                         e[myM] = 0.0;
240                                 }
241                         } while (myM != l);
242                 }
243                 
244                 int k;
245                 for(int i=0;i<n;i++){
246                         p=d[k=i];
247                         for(int j=i;j<n;j++){
248                                 if(d[j] >= p){
249                                         p=d[k=j];
250                                 }
251                         }
252                         if(k!=i){
253                                 d[k]=d[i];
254                                 d[i]=p;
255                                 for(int j=0;j<n;j++){
256                                         p=z[j][i];
257                                         z[j][i] = z[j][k];
258                                         z[j][k] = p;
259                                 }
260                         }
261                 }
262                 
263                 return 0;
264         }
265         catch(exception& e) {
266                 m->errorOut(e, "LinearAlgebra", "qtli");
267                 exit(1);
268         }
269 }
270 /*********************************************************************************************************************************/
271 //groups by dimension
272 vector< vector<double> > LinearAlgebra::calculateEuclidianDistance(vector< vector<double> >& axes, int dimensions){
273         try {
274                 //make square matrix
275                 vector< vector<double> > dists; dists.resize(axes.size());
276                 for (int i = 0; i < dists.size(); i++) {  dists[i].resize(axes.size(), 0.0); }
277                 
278                 if (dimensions == 1) { //one dimension calc = abs(x-y)
279                         
280                         for (int i = 0; i < dists.size(); i++) {
281                                 
282                                 if (m->control_pressed) { return dists; }
283                                 
284                                 for (int j = 0; j < i; j++) {
285                                         dists[i][j] = abs(axes[i][0] - axes[j][0]);
286                                         dists[j][i] = dists[i][j];
287                                 }
288                         }
289                         
290                 }else if (dimensions > 1) { //two dimension calc = sqrt ((x1 - y1)^2 + (x2 - y2)^2)...
291                         
292                         for (int i = 0; i < dists.size(); i++) {
293                                 
294                                 if (m->control_pressed) { return dists; }
295                                 
296                                 for (int j = 0; j < i; j++) {
297                                         double sum = 0.0;
298                                         for (int k = 0; k < dimensions; k++) {
299                                                 sum += ((axes[i][k] - axes[j][k]) * (axes[i][k] - axes[j][k]));
300                                         }
301                                         
302                                         dists[i][j] = sqrt(sum);
303                                         dists[j][i] = dists[i][j];
304                                 }
305                         }
306                         
307                 }
308                 
309                 return dists;
310         }
311         catch(exception& e) {
312                 m->errorOut(e, "LinearAlgebra", "calculateEuclidianDistance");
313                 exit(1);
314         }
315 }
316 /*********************************************************************************************************************************/
317 //returns groups by dimensions from dimensions by groups
318 vector< vector<double> > LinearAlgebra::calculateEuclidianDistance(vector< vector<double> >& axes){
319         try {
320                 //make square matrix
321                 vector< vector<double> > dists; dists.resize(axes[0].size());
322                 for (int i = 0; i < dists.size(); i++) {  dists[i].resize(axes[0].size(), 0.0); }
323                 
324                 if (axes.size() == 1) { //one dimension calc = abs(x-y)
325                         
326                         for (int i = 0; i < dists.size(); i++) {
327                                 
328                                 if (m->control_pressed) { return dists; }
329                                 
330                                 for (int j = 0; j < i; j++) {
331                                         dists[i][j] = abs(axes[0][i] - axes[0][j]);
332                                         dists[j][i] = dists[i][j];
333                                 }
334                         }
335                         
336                 }else if (axes.size() > 1) { //two dimension calc = sqrt ((x1 - y1)^2 + (x2 - y2)^2)...
337                         
338                         for (int i = 0; i < dists[0].size(); i++) {
339                                 
340                                 if (m->control_pressed) { return dists; }
341                                 
342                                 for (int j = 0; j < i; j++) {
343                                         double sum = 0.0;
344                                         for (int k = 0; k < axes.size(); k++) {
345                                                 sum += ((axes[k][i] - axes[k][j]) * (axes[k][i] - axes[k][j]));
346                                         }
347                                         
348                                         dists[i][j] = sqrt(sum);
349                                         dists[j][i] = dists[i][j];
350                                 }
351                         }
352                         
353                 }
354                 
355                 return dists;
356         }
357         catch(exception& e) {
358                 m->errorOut(e, "LinearAlgebra", "calculateEuclidianDistance");
359                 exit(1);
360         }
361 }
362 /*********************************************************************************************************************************/
363 //assumes both matrices are square and the same size
364 double LinearAlgebra::calcPearson(vector< vector<double> >& euclidDists, vector< vector<double> >& userDists){
365         try {
366                 
367                 //find average for - X
368                 int count = 0;
369                 float averageEuclid = 0.0; 
370                 for (int i = 0; i < euclidDists.size(); i++) {
371                         for (int j = 0; j < i; j++) {
372                                 averageEuclid += euclidDists[i][j];  
373                                 count++;
374                         }
375                 }
376                 averageEuclid = averageEuclid / (float) count;   
377                         
378                 //find average for - Y
379                 count = 0;
380                 float averageUser = 0.0; 
381                 for (int i = 0; i < userDists.size(); i++) {
382                         for (int j = 0; j < i; j++) {
383                                 averageUser += userDists[i][j]; 
384                                 count++;
385                         }
386                 }
387                 averageUser = averageUser / (float) count;  
388
389                 double numerator = 0.0;
390                 double denomTerm1 = 0.0;
391                 double denomTerm2 = 0.0;
392                 
393                 for (int i = 0; i < euclidDists.size(); i++) {
394                         
395                         for (int k = 0; k < i; k++) { //just lt dists
396                                 
397                                 float Yi = userDists[i][k];
398                                 float Xi = euclidDists[i][k];
399                                 
400                                 numerator += ((Xi - averageEuclid) * (Yi - averageUser));
401                                 denomTerm1 += ((Xi - averageEuclid) * (Xi - averageEuclid));
402                                 denomTerm2 += ((Yi - averageUser) * (Yi - averageUser));
403                         }
404                 }
405                 
406                 double denom = (sqrt(denomTerm1) * sqrt(denomTerm2));
407                 double r = numerator / denom;
408                 
409                 //divide by zero error
410                 if (isnan(r) || isinf(r)) { r = 0.0; }
411                 
412                 return r;
413                 
414         }
415         catch(exception& e) {
416                 m->errorOut(e, "LinearAlgebra", "calcPearson");
417                 exit(1);
418         }
419 }
420 /*********************************************************************************************************************************/
421 //assumes both matrices are square and the same size
422 double LinearAlgebra::calcSpearman(vector< vector<double> >& euclidDists, vector< vector<double> >& userDists){
423         try {
424                 double r; 
425                 
426                 //format data
427                 map<float, int> tableX; 
428                 map<float, int>::iterator itTable;
429                 vector<spearmanRank> scores; 
430                 
431                 for (int i = 0; i < euclidDists.size(); i++) {
432                         for (int j = 0; j < i; j++) {
433                                 spearmanRank member(toString(scores.size()), euclidDists[i][j]);
434                                 scores.push_back(member);  
435                                 
436                                 //count number of repeats
437                                 itTable = tableX.find(euclidDists[i][j]);
438                                 if (itTable == tableX.end()) { 
439                                         tableX[euclidDists[i][j]] = 1;
440                                 }else {
441                                         tableX[euclidDists[i][j]]++;
442                                 }
443                         }
444                 }
445                 
446                 //sort scores
447                 sort(scores.begin(), scores.end(), compareSpearman); 
448
449                 //calc LX
450                 double Lx = 0.0; 
451                 for (itTable = tableX.begin(); itTable != tableX.end(); itTable++) {
452                         double tx = (double) itTable->second;
453                         Lx += ((pow(tx, 3.0) - tx) / 12.0);
454                 }
455                 
456                 //find ranks of xi
457                 map<string, float> rankEuclid;
458                 vector<spearmanRank> ties;
459                 int rankTotal = 0;
460                 for (int j = 0; j < scores.size(); j++) {
461                         rankTotal += (j+1);
462                         ties.push_back(scores[j]);
463                         
464                         if (j != (scores.size()-1)) { // you are not the last so you can look ahead
465                                 if (scores[j].score != scores[j+1].score) { // you are done with ties, rank them and continue
466                                         
467                                         for (int k = 0; k < ties.size(); k++) {
468                                                 float thisrank = rankTotal / (float) ties.size();
469                                                 rankEuclid[ties[k].name] = thisrank;
470                                         }
471                                         ties.clear();
472                                         rankTotal = 0;
473                                 }
474                         }else { // you are the last one
475                                 
476                                 for (int k = 0; k < ties.size(); k++) {
477                                         float thisrank = rankTotal / (float) ties.size();
478                                         rankEuclid[ties[k].name] = thisrank;
479                                 }
480                         }
481                 }
482                 
483                 
484                 //format data
485                 map<float, int> tableY; 
486                 scores.clear(); 
487                 
488                 for (int i = 0; i < userDists.size(); i++) {
489                         for (int j = 0; j < i; j++) {
490                                 spearmanRank member(toString(scores.size()), userDists[i][j]);
491                                 scores.push_back(member);  
492                                 
493                                 //count number of repeats
494                                 itTable = tableY.find(userDists[i][j]);
495                                 if (itTable == tableY.end()) { 
496                                         tableY[userDists[i][j]] = 1;
497                                 }else {
498                                         tableY[userDists[i][j]]++;
499                                 }
500                         }
501                 }
502                 
503                 //sort scores
504                 sort(scores.begin(), scores.end(), compareSpearman); 
505                 
506                 //calc LX
507                 double Ly = 0.0; 
508                 for (itTable = tableY.begin(); itTable != tableY.end(); itTable++) {
509                         double ty = (double) itTable->second;
510                         Ly += ((pow(ty, 3.0) - ty) / 12.0);
511                 }
512                 
513                 //find ranks of yi
514                 map<string, float> rankUser;
515                 ties.clear();
516                 rankTotal = 0;
517                 for (int j = 0; j < scores.size(); j++) {
518                         rankTotal += (j+1);
519                         ties.push_back(scores[j]);
520                         
521                         if (j != (scores.size()-1)) { // you are not the last so you can look ahead
522                                 if (scores[j].score != scores[j+1].score) { // you are done with ties, rank them and continue
523                                         
524                                         for (int k = 0; k < ties.size(); k++) {
525                                                 float thisrank = rankTotal / (float) ties.size();
526                                                 rankUser[ties[k].name] = thisrank;
527                                         }
528                                         ties.clear();
529                                         rankTotal = 0;
530                                 }
531                         }else { // you are the last one
532                                 
533                                 for (int k = 0; k < ties.size(); k++) {
534                                         float thisrank = rankTotal / (float) ties.size();
535                                         rankUser[ties[k].name] = thisrank;
536                                 }
537                         }
538                 }
539                 
540                         
541                 double di = 0.0;        
542                 int count = 0;
543                 for (int i = 0; i < userDists.size(); i++) {
544                         for (int j = 0; j < i; j++) {
545                         
546                                 float xi = rankEuclid[toString(count)];
547                                 float yi = rankUser[toString(count)];
548                         
549                                 di += ((xi - yi) * (xi - yi));
550                                 
551                                 count++;
552                         }
553                 }
554                 
555                 double n = (double) count;
556                 
557                 double SX2 = ((pow(n, 3.0) - n) / 12.0) - Lx;
558                 double SY2 = ((pow(n, 3.0) - n) / 12.0) - Ly;
559                 
560                 r = (SX2 + SY2 - di) / (2.0 * sqrt((SX2*SY2)));
561                 
562                 //divide by zero error
563                 if (isnan(r) || isinf(r)) { r = 0.0; }
564         
565                 return r;
566                 
567         }
568         catch(exception& e) {
569                 m->errorOut(e, "LinearAlgebra", "calcSpearman");
570                 exit(1);
571         }
572 }
573
574 /*********************************************************************************************************************************/
575 //assumes both matrices are square and the same size
576 double LinearAlgebra::calcKendall(vector< vector<double> >& euclidDists, vector< vector<double> >& userDists){
577         try {
578                 double r;
579                 
580                 //format data
581                 vector<spearmanRank> scores; 
582                 for (int i = 0; i < euclidDists.size(); i++) {
583                         for (int j = 0; j < i; j++) {
584                                 spearmanRank member(toString(scores.size()), euclidDists[i][j]);
585                                 scores.push_back(member);
586                         }
587                 }
588                         
589                 //sort scores
590                 sort(scores.begin(), scores.end(), compareSpearman);    
591                 
592                 //find ranks of xi
593                 map<string, float> rankEuclid;
594                 vector<spearmanRank> ties;
595                 int rankTotal = 0;
596                 for (int j = 0; j < scores.size(); j++) {
597                         rankTotal += (j+1);
598                         ties.push_back(scores[j]);
599                         
600                         if (j != (scores.size()-1)) { // you are not the last so you can look ahead
601                                 if (scores[j].score != scores[j+1].score) { // you are done with ties, rank them and continue
602                                         
603                                         for (int k = 0; k < ties.size(); k++) {
604                                                 float thisrank = rankTotal / (float) ties.size();
605                                                 rankEuclid[ties[k].name] = thisrank;
606                                         }
607                                         ties.clear();
608                                         rankTotal = 0;
609                                 }
610                         }else { // you are the last one
611                                 
612                                 for (int k = 0; k < ties.size(); k++) {
613                                         float thisrank = rankTotal / (float) ties.size();
614                                         rankEuclid[ties[k].name] = thisrank;
615                                 }
616                         }
617                 }
618                 
619                 vector<spearmanRank> scoresUser; 
620                 for (int i = 0; i < userDists.size(); i++) {
621                         for (int j = 0; j < i; j++) {
622                                 spearmanRank member(toString(scoresUser.size()), userDists[i][j]);
623                                 scoresUser.push_back(member);  
624                         }
625                 }
626                 
627                 //sort scores
628                 sort(scoresUser.begin(), scoresUser.end(), compareSpearman);    
629                 
630                 //find ranks of yi
631                 map<string, float> rankUser;
632                 ties.clear();
633                 rankTotal = 0;
634                 for (int j = 0; j < scoresUser.size(); j++) {
635                         rankTotal += (j+1);
636                         ties.push_back(scoresUser[j]);
637                         
638                         if (j != (scoresUser.size()-1)) { // you are not the last so you can look ahead
639                                 if (scoresUser[j].score != scoresUser[j+1].score) { // you are done with ties, rank them and continue
640                                         
641                                         for (int k = 0; k < ties.size(); k++) {
642                                                 float thisrank = rankTotal / (float) ties.size();
643                                                 rankUser[ties[k].name] = thisrank;
644                                         }
645                                         ties.clear();
646                                         rankTotal = 0;
647                                 }
648                         }else { // you are the last one
649                                 
650                                 for (int k = 0; k < ties.size(); k++) {
651                                         float thisrank = rankTotal / (float) ties.size();
652                                         rankUser[ties[k].name] = thisrank;
653                                 }
654                         }
655                 }
656                 
657                 int numCoor = 0;
658                 int numDisCoor = 0;
659                 
660                 //order user ranks
661                 vector<spearmanRank> user; 
662                 for (int l = 0; l < scores.size(); l++) {   
663                         spearmanRank member(scores[l].name, rankUser[scores[l].name]);
664                         user.push_back(member);
665                 }
666                                 
667                 int count = 0;
668                 for (int l = 0; l < scores.size(); l++) {
669                                         
670                         int numWithHigherRank = 0;
671                         int numWithLowerRank = 0;
672                         float thisrank = user[l].score;
673                                         
674                         for (int u = l+1; u < scores.size(); u++) {
675                                 if (user[u].score > thisrank) { numWithHigherRank++; }
676                                 else if (user[u].score < thisrank) { numWithLowerRank++; }
677                                 count++;
678                         }
679                                         
680                         numCoor += numWithHigherRank;
681                         numDisCoor += numWithLowerRank;
682                 }
683                                 
684                 r = (numCoor - numDisCoor) / (float) count;
685                 
686                 //divide by zero error
687                 if (isnan(r) || isinf(r)) { r = 0.0; }
688                 
689                 return r;
690                 
691         }
692         catch(exception& e) {
693                 m->errorOut(e, "LinearAlgebra", "calcKendall");
694                 exit(1);
695         }
696 }
697 /*********************************************************************************************************************************/
698 double LinearAlgebra::calcKendall(vector<double>& x, vector<double>& y, double& sig){
699         try {
700                 if (x.size() != y.size()) { m->mothurOut("[ERROR]: vector size mismatch."); m->mothurOutEndLine(); return 0.0; }
701                 
702                 //format data
703                 vector<spearmanRank> xscores; 
704                 for (int i = 0; i < x.size(); i++) {
705                         spearmanRank member(toString(i), x[i]);
706                         xscores.push_back(member);  
707                 }
708                 
709                 //sort xscores
710                 sort(xscores.begin(), xscores.end(), compareSpearman);
711                 
712                 //convert scores to ranks of x
713                 vector<spearmanRank*> ties;
714                 int rankTotal = 0;
715                 for (int j = 0; j < xscores.size(); j++) {
716                         rankTotal += (j+1);
717                         ties.push_back(&(xscores[j]));
718                                 
719                         if (j != xscores.size()-1) { // you are not the last so you can look ahead
720                                 if (xscores[j].score != xscores[j+1].score) { // you are done with ties, rank them and continue
721                                         for (int k = 0; k < ties.size(); k++) {
722                                                 float thisrank = rankTotal / (float) ties.size();
723                                                 (*ties[k]).score = thisrank;
724                                         }
725                                         ties.clear();
726                                         rankTotal = 0;
727                                 }
728                         }else { // you are the last one
729                                 for (int k = 0; k < ties.size(); k++) {
730                                         float thisrank = rankTotal / (float) ties.size();
731                                         (*ties[k]).score = thisrank;
732                                 }
733                         }
734                 }
735                 
736                         
737                 //format data
738                 vector<spearmanRank> yscores;
739                 for (int j = 0; j < y.size(); j++) {
740                         spearmanRank member(toString(j), y[j]);
741                         yscores.push_back(member);
742                 }
743                 
744                 //sort yscores
745                 sort(yscores.begin(), yscores.end(), compareSpearman);
746                 
747                 //convert to ranks
748                 map<string, float> rank;
749                 vector<spearmanRank> yties;
750                 rankTotal = 0;
751                 for (int j = 0; j < yscores.size(); j++) {
752                         rankTotal += (j+1);
753                         yties.push_back(yscores[j]);
754                                 
755                         if (j != yscores.size()-1) { // you are not the last so you can look ahead
756                                 if (yscores[j].score != yscores[j+1].score) { // you are done with ties, rank them and continue
757                                         for (int k = 0; k < yties.size(); k++) {
758                                                 float thisrank = rankTotal / (float) yties.size();
759                                                 rank[yties[k].name] = thisrank;
760                                         }
761                                         yties.clear();
762                                         rankTotal = 0;
763                                 }
764                         }else { // you are the last one
765                                 for (int k = 0; k < yties.size(); k++) {
766                                         float thisrank = rankTotal / (float) yties.size();
767                                         rank[yties[k].name] = thisrank;
768                                 }
769                         }
770                 }
771                         
772                         
773                 int numCoor = 0;
774                 int numDisCoor = 0;
775                 
776                 //associate x and y
777                 vector<spearmanRank> otus; 
778                 for (int l = 0; l < xscores.size(); l++) {   
779                         spearmanRank member(xscores[l].name, rank[xscores[l].name]);
780                         otus.push_back(member);
781                 }
782                                 
783                 int count = 0;
784                 for (int l = 0; l < xscores.size(); l++) {
785                                         
786                         int numWithHigherRank = 0;
787                         int numWithLowerRank = 0;
788                         float thisrank = otus[l].score;
789                                         
790                         for (int u = l+1; u < xscores.size(); u++) {
791                                 if (otus[u].score > thisrank) { numWithHigherRank++; }
792                                 else if (otus[u].score < thisrank) { numWithLowerRank++; }
793                                 count++;
794                         }
795                                         
796                         numCoor += numWithHigherRank;
797                         numDisCoor += numWithLowerRank;
798                 }
799                                 
800                 double p = (numCoor - numDisCoor) / (float) count;
801                 
802                 //calc signif - zA - http://en.wikipedia.org/wiki/Kendall_tau_rank_correlation_coefficient#Significance_tests
803                 double numer = 3.0 * (numCoor - numDisCoor);
804         int n = xscores.size();
805         double denom = n * (n-1) * (2*n + 5) / (double) 2.0;
806         denom = sqrt(denom);
807         sig = numer / denom;
808                 
809                 if (isnan(sig) || isinf(sig)) { sig = 0.0; }
810                 
811                 return p;
812         }
813         catch(exception& e) {
814                 m->errorOut(e, "LinearAlgebra", "calcKendall");
815                 exit(1);
816         }
817 }
818 /*********************************************************************************************************************************/
819 double LinearAlgebra::calcSpearman(vector<double>& x, vector<double>& y, double& sig){
820         try {
821                 if (x.size() != y.size()) { m->mothurOut("[ERROR]: vector size mismatch."); m->mothurOutEndLine(); return 0.0; }
822                 
823                 //format data
824                 map<float, int> tableX; 
825                 map<float, int>::iterator itTable;
826                 vector<spearmanRank> xscores; 
827                 
828                 for (int i = 0; i < x.size(); i++) {
829                         spearmanRank member(toString(i), x[i]);
830                         xscores.push_back(member);  
831                                 
832                         //count number of repeats
833                         itTable = tableX.find(x[i]);
834                         if (itTable == tableX.end()) { 
835                                 tableX[x[i]] = 1;
836                         }else {
837                                 tableX[x[i]]++;
838                         }
839                 }
840                 
841                 
842                 //calc LX
843                 double Lx = 0.0;
844                 for (itTable = tableX.begin(); itTable != tableX.end(); itTable++) {
845                         double tx = (double) itTable->second;
846                         Lx += ((pow(tx, 3.0) - tx) / 12.0);
847                 }
848                 
849                 
850                 //sort x
851                 sort(xscores.begin(), xscores.end(), compareSpearman);
852                 
853                 //convert scores to ranks of x
854                 //convert to ranks
855                 map<string, float> rankx;
856                 vector<spearmanRank> xties;
857                 int rankTotal = 0;
858                 for (int j = 0; j < xscores.size(); j++) {
859                         rankTotal += (j+1);
860                         xties.push_back(xscores[j]);
861                         
862                         if (j != xscores.size()-1) { // you are not the last so you can look ahead
863                                 if (xscores[j].score != xscores[j+1].score) { // you are done with ties, rank them and continue
864                                         for (int k = 0; k < xties.size(); k++) {
865                                                 float thisrank = rankTotal / (float) xties.size();
866                                                 rankx[xties[k].name] = thisrank;
867                                         }
868                                         xties.clear();
869                                         rankTotal = 0;
870                                 }
871                         }else { // you are the last one
872                                 for (int k = 0; k < xties.size(); k++) {
873                                         float thisrank = rankTotal / (float) xties.size();
874                                         rankx[xties[k].name] = thisrank;
875                                 }
876                         }
877                 }               
878                         
879                 //format x
880                 vector<spearmanRank> yscores;
881                 map<float, int> tableY;
882                 for (int j = 0; j < y.size(); j++) {
883                         spearmanRank member(toString(j), y[j]);
884                         yscores.push_back(member);
885                                 
886                         itTable = tableY.find(member.score);
887                         if (itTable == tableY.end()) { 
888                                 tableY[member.score] = 1;
889                         }else {
890                                 tableY[member.score]++;
891                         }
892                                 
893                 }
894                         
895                 //calc Ly
896                 double Ly = 0.0;
897                 for (itTable = tableY.begin(); itTable != tableY.end(); itTable++) {
898                         double ty = (double) itTable->second;
899                         Ly += ((pow(ty, 3.0) - ty) / 12.0);
900                 }
901                         
902                 sort(yscores.begin(), yscores.end(), compareSpearman);
903                         
904                 //convert to ranks
905                 map<string, float> rank;
906                 vector<spearmanRank> yties;
907                 rankTotal = 0;
908                 for (int j = 0; j < yscores.size(); j++) {
909                         rankTotal += (j+1);
910                         yties.push_back(yscores[j]);
911                         
912                         if (j != yscores.size()-1) { // you are not the last so you can look ahead
913                                 if (yscores[j].score != yscores[j+1].score) { // you are done with ties, rank them and continue
914                                         for (int k = 0; k < yties.size(); k++) {
915                                                 float thisrank = rankTotal / (float) yties.size();
916                                                 rank[yties[k].name] = thisrank;
917                                         }
918                                         yties.clear();
919                                         rankTotal = 0;
920                                 }
921                         }else { // you are the last one
922                                 for (int k = 0; k < yties.size(); k++) {
923                                         float thisrank = rankTotal / (float) yties.size();
924                                         rank[yties[k].name] = thisrank;
925                                 }
926                         }
927                 }
928                 
929                 double di = 0.0;
930                 for (int k = 0; k < x.size(); k++) {
931                                         
932                         float xi = rankx[toString(k)];
933                         float yi = rank[toString(k)];
934                                         
935                         di += ((xi - yi) * (xi - yi));
936                 }
937                                 
938                 double p = 0.0;
939                                 
940                 double n = (double) x.size();
941                 double SX2 = ((pow(n, 3.0) - n) / 12.0) - Lx;
942                 double SY2 = ((pow(n, 3.0) - n) / 12.0) - Ly;
943                                 
944                 p = (SX2 + SY2 - di) / (2.0 * sqrt((SX2*SY2)));
945                 
946                 //signifigance calc - http://en.wikipedia.org/wiki/Spearman%27s_rank_correlation_coefficient
947                 double temp = (x.size()-2) / (double) (1- (p*p));
948                 temp = sqrt(temp);
949                 sig = p*temp;
950                 if (isnan(sig) || isinf(sig)) { sig = 0.0; }
951                                 
952                 return p;
953         }
954         catch(exception& e) {
955                 m->errorOut(e, "LinearAlgebra", "calcSpearman");
956                 exit(1);
957         }
958 }               
959 /*********************************************************************************************************************************/
960 double LinearAlgebra::calcPearson(vector<double>& x, vector<double>& y, double& sig){
961         try {
962                 if (x.size() != y.size()) { m->mothurOut("[ERROR]: vector size mismatch."); m->mothurOutEndLine(); return 0.0; }
963                 
964                 //find average X
965                 float averageX = 0.0; 
966                 for (int i = 0; i < x.size(); i++) { averageX += x[i];  }
967                 averageX = averageX / (float) x.size(); 
968                 
969                 //find average Y
970                 float sumY = 0.0;
971                 for (int j = 0; j < y.size(); j++) { sumY += y[j]; }
972                 float Ybar = sumY / (float) y.size();
973                         
974                 double r = 0.0;
975                 double numerator = 0.0;
976                 double denomTerm1 = 0.0;
977                 double denomTerm2 = 0.0;
978                                 
979                 for (int j = 0; j < x.size(); j++) {
980                         float Yi = y[j];
981                         float Xi = x[j];
982                                         
983                         numerator += ((Xi - averageX) * (Yi - Ybar));
984                         denomTerm1 += ((Xi - averageX) * (Xi - averageX));
985                         denomTerm2 += ((Yi - Ybar) * (Yi - Ybar));
986                 }
987                                 
988                 double denom = (sqrt(denomTerm1) * sqrt(denomTerm2));
989                                 
990                 r = numerator / denom;
991                 
992                 //signifigance calc - http://faculty.vassar.edu/lowry/ch4apx.html
993                 double temp =  (1- (r*r)) / (double) (x.size()-2);
994                 temp = sqrt(temp);
995                 sig = r / temp;
996                 if (isnan(sig) || isinf(sig)) { sig = 0.0; }
997                 
998                 return r;
999         }
1000         catch(exception& e) {
1001                 m->errorOut(e, "LinearAlgebra", "calcPearson");
1002                 exit(1);
1003         }
1004 }                       
1005 /*********************************************************************************************************************************/
1006
1007 vector<vector<double> > LinearAlgebra::getObservedEuclideanDistance(vector<vector<double> >& relAbundData){
1008
1009         int numSamples = relAbundData.size();
1010         int numOTUs = relAbundData[0].size();
1011         
1012         vector<vector<double> > dMatrix(numSamples);
1013         for(int i=0;i<numSamples;i++){
1014                 dMatrix[i].resize(numSamples);
1015         }
1016         
1017         for(int i=0;i<numSamples;i++){
1018                 for(int j=0;j<numSamples;j++){
1019                         
1020                         double d = 0;
1021                         for(int k=0;k<numOTUs;k++){
1022                                 d += pow((relAbundData[i][k] - relAbundData[j][k]), 2.0000);
1023                         }
1024                         dMatrix[i][j] = pow(d, 0.50000);
1025                         dMatrix[j][i] = dMatrix[i][j];
1026                         
1027                 }
1028         }
1029         return dMatrix;
1030         
1031 }
1032
1033 /*********************************************************************************************************************************/