]> git.donarmstrong.com Git - mothur.git/blobdiff - corraxescommand.cpp
fixed kendall method in corr.axes
[mothur.git] / corraxescommand.cpp
index 8ebd22c50144bf60ee5c7ff0a6f608745707a045..26cdcda0178bcfdbfc8c89fbbbced8866edb796c 100644 (file)
 #include "sharedutilities.h"
 
 //********************************************************************************************************************
-//sorts highes to lowest
+//sorts highest to lowest
 inline bool compareSpearman(spearmanRank left, spearmanRank right){
        return (left.score > right.score);      
 } 
+//********************************************************************************************************************
+//sorts lowest to highest
+inline bool compareSpearmanReverse(spearmanRank left, spearmanRank right){
+       return (left.score < right.score);      
+} 
 //**********************************************************************************************************************
 vector<string> CorrAxesCommand::getValidParameters(){  
        try {
@@ -283,11 +288,11 @@ int CorrAxesCommand::execute(){
                out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
                
                //output headings
-               if (metadatafile == "") {  out << "OTU\t";      }
-               else {  out << "Feature\t";                                             }
+               if (metadatafile == "") {  out << "OTU";        }
+               else {  out << "Feature";                                               }
 
-               for (int i = 0; i < numaxes; i++) { out << "axis" << (i+1) << '\t'; }
-               out << endl;
+               for (int i = 0; i < numaxes; i++) { out << '\t' << "axis" << (i+1); }
+               out << "\tlength" << endl;
                
                if (method == "pearson")                {  calcPearson(axes, out);      }
                else if (method == "spearman")  {  calcSpearman(axes, out); }
@@ -329,9 +334,9 @@ int CorrAxesCommand::calcPearson(map<string, vector<float> >& axes, ofstream& ou
           //for each otu
           for (int i = 0; i < lookupFloat[0]->getNumBins(); i++) {
                   
-                  if (metadatafile == "") {  out << i+1 << '\t';       }
-                  else {  out << metadataLabels[i] << '\t';            }
-                  
+                  if (metadatafile == "") {  out << i+1;       }
+                  else {  out << metadataLabels[i];            }
+                                  
                   //find the averages this otu - Y
                   float sumOtu = 0.0;
                   for (int j = 0; j < lookupFloat.size(); j++) {
@@ -339,6 +344,8 @@ int CorrAxesCommand::calcPearson(map<string, vector<float> >& axes, ofstream& ou
                   }
                   float Ybar = sumOtu / (float) lookupFloat.size();
                   
+                  vector<float> rValues(averageAxes.size());
+
                   //find r value for each axis
                   for (int k = 0; k < averageAxes.size(); k++) {
                           
@@ -358,11 +365,15 @@ int CorrAxesCommand::calcPearson(map<string, vector<float> >& axes, ofstream& ou
                           double denom = (sqrt(denomTerm1) * sqrt(denomTerm2));
                           
                           r = numerator / denom;
-                          
-                          out << r << '\t'
+                          rValues[k] = r;
+                          out << '\t' << r
                   }
                   
-                  out << endl;
+                  double sum = 0;
+                  for(int k=0;k<rValues.size();k++){
+                          sum += rValues[k] * rValues[k];
+                  }
+                  out << '\t' << sqrt(sum) << endl;
           }
                   
           return 0;
@@ -377,12 +388,32 @@ int CorrAxesCommand::calcSpearman(map<string, vector<float> >& axes, ofstream& o
        try {
                
                //format data
+               vector< map<float, int> > tableX; tableX.resize(numaxes);
+               map<float, int>::iterator itTable;
                vector< vector<spearmanRank> > scores; scores.resize(numaxes);
                for (map<string, vector<float> >::iterator it = axes.begin(); it != axes.end(); it++) {
                        vector<float> temp = it->second;
                        for (int i = 0; i < temp.size(); i++) {
                                spearmanRank member(it->first, temp[i]);
                                scores[i].push_back(member);  
+                               
+                               //count number of repeats
+                               itTable = tableX[i].find(temp[i]);
+                               if (itTable == tableX[i].end()) { 
+                                       tableX[i][temp[i]] = 1;
+                               }else {
+                                       tableX[i][temp[i]]++;
+                               }
+                       }
+               }
+               
+               //calc LX
+               //for each axis
+               vector<double> Lx; Lx.resize(numaxes, 0.0);
+               for (int i = 0; i < numaxes; i++) {
+                       for (itTable = tableX[i].begin(); itTable != tableX[i].end(); itTable++) {
+                               double tx = (double) itTable->second;
+                               Lx[i] += ((pow(tx, 3.0) - tx) / 12.0);
                        }
                }
                
@@ -396,11 +427,12 @@ int CorrAxesCommand::calcSpearman(map<string, vector<float> >& axes, ofstream& o
                        vector<spearmanRank> ties;
                        int rankTotal = 0;
                        for (int j = 0; j < scores[i].size(); j++) {
-                               rankTotal += j;
+                               rankTotal += (j+1);
                                ties.push_back(scores[i][j]);
                                
-                               if (j != scores.size()-1) { // you are not the last so you can look ahead
+                               if (j != (scores[i].size()-1)) { // you are not the last so you can look ahead
                                        if (scores[i][j].score != scores[i][j+1].score) { // you are done with ties, rank them and continue
+
                                                for (int k = 0; k < ties.size(); k++) {
                                                        float thisrank = rankTotal / (float) ties.size();
                                                        rankAxes[ties[k].name].push_back(thisrank);
@@ -409,27 +441,44 @@ int CorrAxesCommand::calcSpearman(map<string, vector<float> >& axes, ofstream& o
                                                rankTotal = 0;
                                        }
                                }else { // you are the last one
+                                       
                                        for (int k = 0; k < ties.size(); k++) {
                                                float thisrank = rankTotal / (float) ties.size();
                                                rankAxes[ties[k].name].push_back(thisrank);
+                                               
                                        }
                                }
                        }
                }
                
                                
-               
                //for each otu
                for (int i = 0; i < lookupFloat[0]->getNumBins(); i++) {
                        
-                       if (metadatafile == "") {  out << i+1 << '\t';  }
-                       else {  out << metadataLabels[i] << '\t';               }
+                       if (metadatafile == "") {  out << i+1;  }
+                       else {  out << metadataLabels[i];               }
                        
                        //find the ranks of this otu - Y
                        vector<spearmanRank> otuScores;
+                       map<float, int> tableY;
                        for (int j = 0; j < lookupFloat.size(); j++) {
                                spearmanRank member(lookupFloat[j]->getGroup(), lookupFloat[j]->getAbundance(i));
                                otuScores.push_back(member);
+                               
+                               itTable = tableY.find(member.score);
+                               if (itTable == tableY.end()) { 
+                                       tableY[member.score] = 1;
+                               }else {
+                                       tableY[member.score]++;
+                               }
+                               
+                       }
+                       
+                       //calc Ly
+                       double Ly = 0.0;
+                       for (itTable = tableY.begin(); itTable != tableY.end(); itTable++) {
+                               double ty = (double) itTable->second;
+                               Ly += ((pow(ty, 3.0) - ty) / 12.0);
                        }
                        
                        sort(otuScores.begin(), otuScores.end(), compareSpearman);
@@ -438,11 +487,12 @@ int CorrAxesCommand::calcSpearman(map<string, vector<float> >& axes, ofstream& o
                        vector<spearmanRank> ties;
                        int rankTotal = 0;
                        for (int j = 0; j < otuScores.size(); j++) {
-                               rankTotal += j;
+                               rankTotal += (j+1);
                                ties.push_back(otuScores[j]);
                                
-                               if (j != scores.size()-1) { // you are not the last so you can look ahead
+                               if (j != (otuScores.size()-1)) { // you are not the last so you can look ahead
                                        if (otuScores[j].score != otuScores[j+1].score) { // you are done with ties, rank them and continue
+                                               
                                                for (int k = 0; k < ties.size(); k++) {
                                                        float thisrank = rankTotal / (float) ties.size();
                                                        rankOtus[ties[k].name] = thisrank;
@@ -451,13 +501,15 @@ int CorrAxesCommand::calcSpearman(map<string, vector<float> >& axes, ofstream& o
                                                rankTotal = 0;
                                        }
                                }else { // you are the last one
+                                       
                                        for (int k = 0; k < ties.size(); k++) {
                                                float thisrank = rankTotal / (float) ties.size();
                                                rankOtus[ties[k].name] = thisrank;
                                        }
                                }
                        }
-                       
+                       vector<double> pValues(numaxes);        
+
                        //calc spearman ranks for each axis for this otu
                        for (int j = 0; j < numaxes; j++) {
                                
@@ -470,14 +522,25 @@ int CorrAxesCommand::calcSpearman(map<string, vector<float> >& axes, ofstream& o
                                        di += ((xi - yi) * (xi - yi));
                                }
                                
-                               int n = lookupFloat.size();
-                               double p = 1.0 - ((6 * di) / (float) (n * ((n*n) - 1)));
+                               double p = 0.0;
                                
-                               out << p << '\t';
+                               double n = (double) lookupFloat.size();
+                               
+                               double SX2 = ((pow(n, 3.0) - n) / 12.0) - Lx[j];
+                               double SY2 = ((pow(n, 3.0) - n) / 12.0) - Ly;
+                               
+                               p = (SX2 + SY2 - di) / (2.0 * sqrt((SX2*SY2)));
+                               
+                               out  << '\t' << p;
+                               
+                               pValues[j] = p;
                        }
-                       
-                       
-                       out << endl;
+
+                       double sum = 0;
+                       for(int k=0;k<numaxes;k++){
+                               sum += pValues[k] * pValues[k];
+                       }
+                       out << '\t' << sqrt(sum) << endl;
                }
                
                return 0;
@@ -510,10 +573,10 @@ int CorrAxesCommand::calcKendall(map<string, vector<float> >& axes, ofstream& ou
                        vector<spearmanRank*> ties;
                        int rankTotal = 0;
                        for (int j = 0; j < scores[i].size(); j++) {
-                               rankTotal += j;
+                               rankTotal += (j+1);
                                ties.push_back(&(scores[i][j]));
                                
-                               if (j != scores.size()-1) { // you are not the last so you can look ahead
+                               if (j != scores[i].size()-1) { // you are not the last so you can look ahead
                                        if (scores[i][j].score != scores[i][j+1].score) { // you are done with ties, rank them and continue
                                                for (int k = 0; k < ties.size(); k++) {
                                                        float thisrank = rankTotal / (float) ties.size();
@@ -534,8 +597,8 @@ int CorrAxesCommand::calcKendall(map<string, vector<float> >& axes, ofstream& ou
                //for each otu
                for (int i = 0; i < lookupFloat[0]->getNumBins(); i++) {
                
-                       if (metadatafile == "") {  out << i+1 << '\t';  }
-                       else {  out << metadataLabels[i] << '\t';               }
+                       if (metadatafile == "") {  out << i+1;  }
+                       else {  out << metadataLabels[i];               }
                        
                        //find the ranks of this otu - Y
                        vector<spearmanRank> otuScores;
@@ -550,10 +613,10 @@ int CorrAxesCommand::calcKendall(map<string, vector<float> >& axes, ofstream& ou
                        vector<spearmanRank> ties;
                        int rankTotal = 0;
                        for (int j = 0; j < otuScores.size(); j++) {
-                               rankTotal += j;
+                               rankTotal += (j+1);
                                ties.push_back(otuScores[j]);
                                
-                               if (j != scores.size()-1) { // you are not the last so you can look ahead
+                               if (j != otuScores.size()-1) { // you are not the last so you can look ahead
                                        if (otuScores[j].score != otuScores[j+1].score) { // you are done with ties, rank them and continue
                                                for (int k = 0; k < ties.size(); k++) {
                                                        float thisrank = rankTotal / (float) ties.size();
@@ -570,37 +633,54 @@ int CorrAxesCommand::calcKendall(map<string, vector<float> >& axes, ofstream& ou
                                }
                        }
                        
+                       
+                       vector<double> pValues(numaxes);
+                       
                        //calc spearman ranks for each axis for this otu
                        for (int j = 0; j < numaxes; j++) {
                        
-                               int P = 0;
-                               //assemble otus ranks in same order as axis ranks
+                               int numCoor = 0;
+                               int numDisCoor = 0;
+                               
                                vector<spearmanRank> otus; 
+                               vector<spearmanRank> otusTemp;
                                for (int l = 0; l < scores[j].size(); l++) {   
                                        spearmanRank member(scores[j][l].name, rankOtus[scores[j][l].name]);
                                        otus.push_back(member);
                                }
                                
+                               int count = 0;
                                for (int l = 0; l < scores[j].size(); l++) {
                                        
                                        int numWithHigherRank = 0;
+                                       int numWithLowerRank = 0;
                                        float thisrank = otus[l].score;
                                        
                                        for (int u = l; u < scores[j].size(); u++) {
                                                if (otus[u].score > thisrank) { numWithHigherRank++; }
+                                               else if (otus[u].score < thisrank) { numWithLowerRank++; }
+                                               count++;
                                        }
                                        
-                                       P += numWithHigherRank;
+                                       numCoor += numWithHigherRank;
+                                       numDisCoor += numWithLowerRank;
                                }
                                
-                               int n = lookupFloat.size();
-                               
-                               double p = ( (4 * P) / (float) (n * (n - 1)) ) - 1.0;
+                               //comparing to yourself
+                               count -= lookupFloat.size();
                                
-                               out << p << '\t';
+                               double p = (numCoor - numDisCoor) / (float) count;
+
+                               out << '\t' << p;
+                               pValues[j] = p;
+
                        }
                        
-                       out << endl;
+                       double sum = 0;
+                       for(int k=0;k<numaxes;k++){
+                               sum += pValues[k] * pValues[k];
+                       }
+                       out << '\t' << sqrt(sum) << endl;
                }
                
                return 0;