]> git.donarmstrong.com Git - mothur.git/blobdiff - linearalgebra.cpp
pat's fixes to pca command
[mothur.git] / linearalgebra.cpp
index 192701af4738ffebc9190a54e5a4da1095679bfc..e19861912d9c7fc78f9b2cbbb87ecd76e9388ba9 100644 (file)
@@ -53,6 +53,39 @@ vector<vector<double> > LinearAlgebra::matrix_mult(vector<vector<double> > first
 
 /*********************************************************************************************************************************/
 
+void LinearAlgebra::recenter(double offset, vector<vector<double> > D, vector<vector<double> >& G){
+       try {
+               int rank = D.size();
+               
+               vector<vector<double> > A(rank);
+               vector<vector<double> > C(rank);
+               for(int i=0;i<rank;i++){
+                       A[i].resize(rank);
+                       C[i].resize(rank);
+               }
+               
+               double scale = -1.0000 / (double) rank;
+               
+               for(int i=0;i<rank;i++){
+                       A[i][i] = 0.0000;
+                       C[i][i] = 1.0000 + scale;
+                       for(int j=i+1;j<rank;j++){
+                               A[i][j] = A[j][i] = -0.5 * D[i][j] * D[i][j] + offset;
+                               C[i][j] = C[j][i] = scale;
+                       }
+               }
+               
+               A = matrix_mult(C,A);
+               G = matrix_mult(A,C);
+       }
+       catch(exception& e) {
+               m->errorOut(e, "LinearAlgebra", "recenter");
+               exit(1);
+       }
+       
+}
+/*********************************************************************************************************************************/
+
 //  This function is taken from Numerical Recipes in C++ by Press et al., 2nd edition, pg. 479
 
 int LinearAlgebra::tred2(vector<vector<double> >& a, vector<double>& d, vector<double>& e){
@@ -234,6 +267,7 @@ int LinearAlgebra::qtli(vector<double>& d, vector<double>& e, vector<vector<doub
        }
 }
 /*********************************************************************************************************************************/
+//groups by dimension
 vector< vector<double> > LinearAlgebra::calculateEuclidianDistance(vector< vector<double> >& axes, int dimensions){
        try {
                //make square matrix
@@ -252,38 +286,70 @@ vector< vector<double> > LinearAlgebra::calculateEuclidianDistance(vector< vecto
                                }
                        }
                        
-               }else if (dimensions == 2) { //two dimension calc = sqrt ((x1 - y1)^2 + (x2 - y2)^2)
+               }else if (dimensions > 1) { //two dimension calc = sqrt ((x1 - y1)^2 + (x2 - y2)^2)...
                        
                        for (int i = 0; i < dists.size(); i++) {
                                
                                if (m->control_pressed) { return dists; }
                                
                                for (int j = 0; j < i; j++) {
-                                       double firstDim = ((axes[i][0] - axes[j][0]) * (axes[i][0] - axes[j][0]));
-                                       double secondDim = ((axes[i][1] - axes[j][1]) * (axes[i][1] - axes[j][1]));
+                                       double sum = 0.0;
+                                       for (int k = 0; k < dimensions; k++) {
+                                               sum += ((axes[i][k] - axes[j][k]) * (axes[i][k] - axes[j][k]));
+                                       }
                                        
-                                       dists[i][j] = sqrt((firstDim + secondDim));
+                                       dists[i][j] = sqrt(sum);
                                        dists[j][i] = dists[i][j];
                                }
                        }
                        
-               }else if (dimensions == 3) { //two dimension calc = sqrt ((x1 - y1)^2 + (x2 - y2)^2 + (x3 - y3)^2)
+               }
+               
+               return dists;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "LinearAlgebra", "calculateEuclidianDistance");
+               exit(1);
+       }
+}
+/*********************************************************************************************************************************/
+//returns groups by dimensions from dimensions by groups
+vector< vector<double> > LinearAlgebra::calculateEuclidianDistance(vector< vector<double> >& axes){
+       try {
+               //make square matrix
+               vector< vector<double> > dists; dists.resize(axes[0].size());
+               for (int i = 0; i < dists.size(); i++) {  dists[i].resize(axes[0].size(), 0.0); }
+               
+               if (axes.size() == 1) { //one dimension calc = abs(x-y)
                        
                        for (int i = 0; i < dists.size(); i++) {
                                
                                if (m->control_pressed) { return dists; }
                                
                                for (int j = 0; j < i; j++) {
-                                       double firstDim = ((axes[i][0] - axes[j][0]) * (axes[i][0] - axes[j][0]));
-                                       double secondDim = ((axes[i][1] - axes[j][1]) * (axes[i][1] - axes[j][1]));
-                                       double thirdDim = ((axes[i][2] - axes[j][2]) * (axes[i][2] - axes[j][2]));
+                                       dists[i][j] = abs(axes[0][i] - axes[0][j]);
+                                       dists[j][i] = dists[i][j];
+                               }
+                       }
+                       
+               }else if (axes.size() > 1) { //two dimension calc = sqrt ((x1 - y1)^2 + (x2 - y2)^2)...
+                       
+                       for (int i = 0; i < dists[0].size(); i++) {
+                               
+                               if (m->control_pressed) { return dists; }
+                               
+                               for (int j = 0; j < i; j++) {
+                                       double sum = 0.0;
+                                       for (int k = 0; k < axes.size(); k++) {
+                                               sum += ((axes[k][i] - axes[k][j]) * (axes[k][i] - axes[k][j]));
+                                       }
                                        
-                                       dists[i][j] = sqrt((firstDim + secondDim + thirdDim));
+                                       dists[i][j] = sqrt(sum);
                                        dists[j][i] = dists[i][j];
                                }
                        }
                        
-               }else { m->mothurOut("[ERROR]: too many dimensions, aborting."); m->mothurOutEndLine(); m->control_pressed = true; }
+               }
                
                return dists;
        }
@@ -293,54 +359,367 @@ vector< vector<double> > LinearAlgebra::calculateEuclidianDistance(vector< vecto
        }
 }
 /*********************************************************************************************************************************/
+//assumes both matrices are square and the same size
 double LinearAlgebra::calcPearson(vector< vector<double> >& euclidDists, vector< vector<double> >& userDists){
        try {
                
                //find average for - X
-               vector<float> averageEuclid; averageEuclid.resize(euclidDists.size(), 0.0);
+               int count = 0;
+               float averageEuclid = 0.0; 
                for (int i = 0; i < euclidDists.size(); i++) {
-                       for (int j = 0; j < euclidDists[i].size(); j++) {
-                               averageEuclid[i] += euclidDists[i][j];  
+                       for (int j = 0; j < i; j++) {
+                               averageEuclid += euclidDists[i][j];  
+                               count++;
                        }
                }
-               for (int i = 0; i < averageEuclid.size(); i++) {  averageEuclid[i] = averageEuclid[i] / (float) euclidDists.size();   }
-               
+               averageEuclid = averageEuclid / (float) count;   
+                       
                //find average for - Y
-               vector<float> averageUser; averageUser.resize(userDists.size(), 0.0);
+               count = 0;
+               float averageUser = 0.0; 
                for (int i = 0; i < userDists.size(); i++) {
-                       for (int j = 0; j < userDists[i].size(); j++) {
-                               averageUser[i] += userDists[i][j];  
+                       for (int j = 0; j < i; j++) {
+                               averageUser += userDists[i][j]; 
+                               count++;
                        }
                }
-               for (int i = 0; i < averageUser.size(); i++) {  averageUser[i] = averageUser[i] / (float) userDists.size();  }
-               
+               averageUser = averageUser / (float) count;  
+
                double numerator = 0.0;
                double denomTerm1 = 0.0;
                double denomTerm2 = 0.0;
                
                for (int i = 0; i < euclidDists.size(); i++) {
                        
-                       for (int k = 0; k < i; k++) {
+                       for (int k = 0; k < i; k++) { //just lt dists
                                
                                float Yi = userDists[i][k];
                                float Xi = euclidDists[i][k];
                                
-                               numerator += ((Xi - averageEuclid[k]) * (Yi - averageUser[k]));
-                               denomTerm1 += ((Xi - averageEuclid[k]) * (Xi - averageEuclid[k]));
-                               denomTerm2 += ((Yi - averageUser[k]) * (Yi - averageUser[k]));
+                               numerator += ((Xi - averageEuclid) * (Yi - averageUser));
+                               denomTerm1 += ((Xi - averageEuclid) * (Xi - averageEuclid));
+                               denomTerm2 += ((Yi - averageUser) * (Yi - averageUser));
                        }
                }
                
                double denom = (sqrt(denomTerm1) * sqrt(denomTerm2));
                double r = numerator / denom;
                
+               //divide by zero error
+               if (isnan(r) || isinf(r)) { r = 0.0; }
+               
                return r;
+               
        }
        catch(exception& e) {
-               m->errorOut(e, "LinearAlgebra", "calculateEuclidianDistance");
+               m->errorOut(e, "LinearAlgebra", "calcPearson");
+               exit(1);
+       }
+}
+/*********************************************************************************************************************************/
+//assumes both matrices are square and the same size
+double LinearAlgebra::calcSpearman(vector< vector<double> >& euclidDists, vector< vector<double> >& userDists){
+       try {
+               double r; 
+               
+               //format data
+               map<float, int> tableX; 
+               map<float, int>::iterator itTable;
+               vector<spearmanRank> scores; 
+               
+               for (int i = 0; i < euclidDists.size(); i++) {
+                       for (int j = 0; j < i; j++) {
+                               spearmanRank member(toString(scores.size()), euclidDists[i][j]);
+                               scores.push_back(member);  
+                               
+                               //count number of repeats
+                               itTable = tableX.find(euclidDists[i][j]);
+                               if (itTable == tableX.end()) { 
+                                       tableX[euclidDists[i][j]] = 1;
+                               }else {
+                                       tableX[euclidDists[i][j]]++;
+                               }
+                       }
+               }
+               
+               //sort scores
+               sort(scores.begin(), scores.end(), compareSpearman); 
+
+               //calc LX
+               double Lx = 0.0; 
+               for (itTable = tableX.begin(); itTable != tableX.end(); itTable++) {
+                       double tx = (double) itTable->second;
+                       Lx += ((pow(tx, 3.0) - tx) / 12.0);
+               }
+               
+               //find ranks of xi
+               map<string, float> rankEuclid;
+               vector<spearmanRank> ties;
+               int rankTotal = 0;
+               for (int j = 0; j < scores.size(); j++) {
+                       rankTotal += (j+1);
+                       ties.push_back(scores[j]);
+                       
+                       if (j != (scores.size()-1)) { // you are not the last so you can look ahead
+                               if (scores[j].score != scores[j+1].score) { // you are done with ties, rank them and continue
+                                       
+                                       for (int k = 0; k < ties.size(); k++) {
+                                               float thisrank = rankTotal / (float) ties.size();
+                                               rankEuclid[ties[k].name] = thisrank;
+                                       }
+                                       ties.clear();
+                                       rankTotal = 0;
+                               }
+                       }else { // you are the last one
+                               
+                               for (int k = 0; k < ties.size(); k++) {
+                                       float thisrank = rankTotal / (float) ties.size();
+                                       rankEuclid[ties[k].name] = thisrank;
+                               }
+                       }
+               }
+               
+               
+               //format data
+               map<float, int> tableY; 
+               scores.clear(); 
+               
+               for (int i = 0; i < userDists.size(); i++) {
+                       for (int j = 0; j < i; j++) {
+                               spearmanRank member(toString(scores.size()), userDists[i][j]);
+                               scores.push_back(member);  
+                               
+                               //count number of repeats
+                               itTable = tableY.find(userDists[i][j]);
+                               if (itTable == tableY.end()) { 
+                                       tableY[userDists[i][j]] = 1;
+                               }else {
+                                       tableY[userDists[i][j]]++;
+                               }
+                       }
+               }
+               
+               //sort scores
+               sort(scores.begin(), scores.end(), compareSpearman); 
+               
+               //calc LX
+               double Ly = 0.0; 
+               for (itTable = tableY.begin(); itTable != tableY.end(); itTable++) {
+                       double ty = (double) itTable->second;
+                       Ly += ((pow(ty, 3.0) - ty) / 12.0);
+               }
+               
+               //find ranks of yi
+               map<string, float> rankUser;
+               ties.clear();
+               rankTotal = 0;
+               for (int j = 0; j < scores.size(); j++) {
+                       rankTotal += (j+1);
+                       ties.push_back(scores[j]);
+                       
+                       if (j != (scores.size()-1)) { // you are not the last so you can look ahead
+                               if (scores[j].score != scores[j+1].score) { // you are done with ties, rank them and continue
+                                       
+                                       for (int k = 0; k < ties.size(); k++) {
+                                               float thisrank = rankTotal / (float) ties.size();
+                                               rankUser[ties[k].name] = thisrank;
+                                       }
+                                       ties.clear();
+                                       rankTotal = 0;
+                               }
+                       }else { // you are the last one
+                               
+                               for (int k = 0; k < ties.size(); k++) {
+                                       float thisrank = rankTotal / (float) ties.size();
+                                       rankUser[ties[k].name] = thisrank;
+                               }
+                       }
+               }
+               
+                       
+               double di = 0.0;        
+               int count = 0;
+               for (int i = 0; i < userDists.size(); i++) {
+                       for (int j = 0; j < i; j++) {
+                       
+                               float xi = rankEuclid[toString(count)];
+                               float yi = rankUser[toString(count)];
+                       
+                               di += ((xi - yi) * (xi - yi));
+                               
+                               count++;
+                       }
+               }
+               
+               double n = (double) count;
+               
+               double SX2 = ((pow(n, 3.0) - n) / 12.0) - Lx;
+               double SY2 = ((pow(n, 3.0) - n) / 12.0) - Ly;
+               
+               r = (SX2 + SY2 - di) / (2.0 * sqrt((SX2*SY2)));
+               
+               //divide by zero error
+               if (isnan(r) || isinf(r)) { r = 0.0; }
+       
+               return r;
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "LinearAlgebra", "calcSpearman");
+               exit(1);
+       }
+}
+
+/*********************************************************************************************************************************/
+//assumes both matrices are square and the same size
+double LinearAlgebra::calcKendall(vector< vector<double> >& euclidDists, vector< vector<double> >& userDists){
+       try {
+               double r;
+               
+               //format data
+               vector<spearmanRank> scores; 
+               for (int i = 0; i < euclidDists.size(); i++) {
+                       for (int j = 0; j < i; j++) {
+                               spearmanRank member(toString(scores.size()), euclidDists[i][j]);
+                               scores.push_back(member);
+                       }
+               }
+                       
+               //sort scores
+               sort(scores.begin(), scores.end(), compareSpearman);    
+               
+               //find ranks of xi
+               map<string, float> rankEuclid;
+               vector<spearmanRank> ties;
+               int rankTotal = 0;
+               for (int j = 0; j < scores.size(); j++) {
+                       rankTotal += (j+1);
+                       ties.push_back(scores[j]);
+                       
+                       if (j != (scores.size()-1)) { // you are not the last so you can look ahead
+                               if (scores[j].score != scores[j+1].score) { // you are done with ties, rank them and continue
+                                       
+                                       for (int k = 0; k < ties.size(); k++) {
+                                               float thisrank = rankTotal / (float) ties.size();
+                                               rankEuclid[ties[k].name] = thisrank;
+                                       }
+                                       ties.clear();
+                                       rankTotal = 0;
+                               }
+                       }else { // you are the last one
+                               
+                               for (int k = 0; k < ties.size(); k++) {
+                                       float thisrank = rankTotal / (float) ties.size();
+                                       rankEuclid[ties[k].name] = thisrank;
+                               }
+                       }
+               }
+               
+               vector<spearmanRank> scoresUser; 
+               for (int i = 0; i < userDists.size(); i++) {
+                       for (int j = 0; j < i; j++) {
+                               spearmanRank member(toString(scoresUser.size()), userDists[i][j]);
+                               scoresUser.push_back(member);  
+                       }
+               }
+               
+               //sort scores
+               sort(scoresUser.begin(), scoresUser.end(), compareSpearman);    
+               
+               //find ranks of yi
+               map<string, float> rankUser;
+               ties.clear();
+               rankTotal = 0;
+               for (int j = 0; j < scoresUser.size(); j++) {
+                       rankTotal += (j+1);
+                       ties.push_back(scoresUser[j]);
+                       
+                       if (j != (scoresUser.size()-1)) { // you are not the last so you can look ahead
+                               if (scoresUser[j].score != scoresUser[j+1].score) { // you are done with ties, rank them and continue
+                                       
+                                       for (int k = 0; k < ties.size(); k++) {
+                                               float thisrank = rankTotal / (float) ties.size();
+                                               rankUser[ties[k].name] = thisrank;
+                                       }
+                                       ties.clear();
+                                       rankTotal = 0;
+                               }
+                       }else { // you are the last one
+                               
+                               for (int k = 0; k < ties.size(); k++) {
+                                       float thisrank = rankTotal / (float) ties.size();
+                                       rankUser[ties[k].name] = thisrank;
+                               }
+                       }
+               }
+               
+               int numCoor = 0;
+               int numDisCoor = 0;
+               
+               //order user ranks
+               vector<spearmanRank> user; 
+               for (int l = 0; l < scores.size(); l++) {   
+                       spearmanRank member(scores[l].name, rankUser[scores[l].name]);
+                       user.push_back(member);
+               }
+                               
+               int count = 0;
+               for (int l = 0; l < scores.size(); l++) {
+                                       
+                       int numWithHigherRank = 0;
+                       int numWithLowerRank = 0;
+                       float thisrank = user[l].score;
+                                       
+                       for (int u = l+1; u < scores.size(); u++) {
+                               if (user[u].score > thisrank) { numWithHigherRank++; }
+                               else if (user[u].score < thisrank) { numWithLowerRank++; }
+                               count++;
+                       }
+                                       
+                       numCoor += numWithHigherRank;
+                       numDisCoor += numWithLowerRank;
+               }
+                               
+               r = (numCoor - numDisCoor) / (float) count;
+               
+               //divide by zero error
+               if (isnan(r) || isinf(r)) { r = 0.0; }
+               
+               return r;
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "LinearAlgebra", "calcKendall");
                exit(1);
        }
 }
+
 /*********************************************************************************************************************************/
 
+vector<vector<double> > LinearAlgebra::getObservedEuclideanDistance(vector<vector<double> >& relAbundData){
 
+       int numSamples = relAbundData.size();
+       int numOTUs = relAbundData[0].size();
+       
+       vector<vector<double> > dMatrix(numSamples);
+       for(int i=0;i<numSamples;i++){
+               dMatrix[i].resize(numSamples);
+       }
+       
+       for(int i=0;i<numSamples;i++){
+               for(int j=0;j<numSamples;j++){
+                       
+                       double d = 0;
+                       for(int k=0;k<numOTUs;k++){
+                               d += pow((relAbundData[i][k] - relAbundData[j][k]), 2.0000);
+                       }
+                       dMatrix[i][j] = pow(d, 0.50000);
+                       dMatrix[j][i] = dMatrix[i][j];
+                       
+               }
+       }
+       return dMatrix;
+       
+}
+
+/*********************************************************************************************************************************/