]> git.donarmstrong.com Git - mothur.git/blobdiff - corraxescommand.cpp
working on kendall
[mothur.git] / corraxescommand.cpp
index eac1f3505b1bca098ab01d100a513d24bd5c9d29..64ac9b931545f853ae2b960259d60edce06564eb 100644 (file)
 #include "sharedutilities.h"
 
 //********************************************************************************************************************
-//sorts highes to lowest
+//sorts highest to lowest
 inline bool compareSpearman(spearmanRank left, spearmanRank right){
        return (left.score > right.score);      
 } 
+//********************************************************************************************************************
+//sorts lowest to highest
+inline bool compareSpearmanReverse(spearmanRank left, spearmanRank right){
+       return (left.score < right.score);      
+} 
 //**********************************************************************************************************************
 vector<string> CorrAxesCommand::getValidParameters(){  
        try {
@@ -382,15 +387,50 @@ int CorrAxesCommand::calcPearson(map<string, vector<float> >& axes, ofstream& ou
 int CorrAxesCommand::calcSpearman(map<string, vector<float> >& axes, ofstream& out) {
        try {
                
-               bool hasTies = false;
+               /*axes.clear();
+               axes["1a"].push_back(1);
+               axes["1b"].push_back(1);
+               axes["1c"].push_back(1);
+               axes["2a"].push_back(2);
+               axes["2b"].push_back(2);
+               axes["2c"].push_back(2);
+               axes["3a"].push_back(3);
+               axes["3b"].push_back(3);
+               axes["3c"].push_back(3);
+               axes["4a"].push_back(4);
+               axes["4b"].push_back(4);
+               axes["4c"].push_back(4);
+               axes["5a"].push_back(5);
+               axes["5b"].push_back(5);
+               axes["5c"].push_back(5);*/
                
                //format data
+               vector< map<float, int> > tableX; tableX.resize(numaxes);
+               map<float, int>::iterator itTable;
                vector< vector<spearmanRank> > scores; scores.resize(numaxes);
                for (map<string, vector<float> >::iterator it = axes.begin(); it != axes.end(); it++) {
                        vector<float> temp = it->second;
                        for (int i = 0; i < temp.size(); i++) {
                                spearmanRank member(it->first, temp[i]);
                                scores[i].push_back(member);  
+                               
+                               //count number of repeats
+                               itTable = tableX[i].find(temp[i]);
+                               if (itTable == tableX[i].end()) { 
+                                       tableX[i][temp[i]] = 1;
+                               }else {
+                                       tableX[i][temp[i]]++;
+                               }
+                       }
+               }
+               
+               //calc LX
+               //for each axis
+               vector<double> Lx; Lx.resize(numaxes, 0.0);
+               for (int i = 0; i < numaxes; i++) {
+                       for (itTable = tableX[i].begin(); itTable != tableX[i].end(); itTable++) {
+                               double tx = (double) itTable->second;
+                               Lx[i] += ((pow(tx, 3.0) - tx) / 12.0);
                        }
                }
                
@@ -398,20 +438,18 @@ int CorrAxesCommand::calcSpearman(map<string, vector<float> >& axes, ofstream& o
                for (int i = 0; i < numaxes; i++) {  sort(scores[i].begin(), scores[i].end(), compareSpearman); }
                
                //find ranks of xi in each axis
-               vector<float> averageX; averageX.resize(numaxes, 0.0);
                map<string, vector<float> > rankAxes;
                for (int i = 0; i < numaxes; i++) {
                        
                        vector<spearmanRank> ties;
                        int rankTotal = 0;
                        for (int j = 0; j < scores[i].size(); j++) {
-                               rankTotal += j;
+                               rankTotal += (j+1);
                                ties.push_back(scores[i][j]);
                                
-                               if (j != scores.size()-1) { // you are not the last so you can look ahead
+                               if (j != (scores[i].size()-1)) { // you are not the last so you can look ahead
                                        if (scores[i][j].score != scores[i][j+1].score) { // you are done with ties, rank them and continue
-                                               if (ties.size() > 1) { hasTies = true; }
-                                               averageX[i] += rankTotal;
+
                                                for (int k = 0; k < ties.size(); k++) {
                                                        float thisrank = rankTotal / (float) ties.size();
                                                        rankAxes[ties[k].name].push_back(thisrank);
@@ -420,20 +458,17 @@ int CorrAxesCommand::calcSpearman(map<string, vector<float> >& axes, ofstream& o
                                                rankTotal = 0;
                                        }
                                }else { // you are the last one
-                                       if (ties.size() > 1) { hasTies = true; }
-                                       averageX[i] += rankTotal;
+                                       
                                        for (int k = 0; k < ties.size(); k++) {
                                                float thisrank = rankTotal / (float) ties.size();
                                                rankAxes[ties[k].name].push_back(thisrank);
+                                               
                                        }
                                }
                        }
-                       
-                       averageX[i] /= (float) scores[i].size();
                }
                
                                
-               
                //for each otu
                for (int i = 0; i < lookupFloat[0]->getNumBins(); i++) {
                        
@@ -442,25 +477,39 @@ int CorrAxesCommand::calcSpearman(map<string, vector<float> >& axes, ofstream& o
                        
                        //find the ranks of this otu - Y
                        vector<spearmanRank> otuScores;
+                       map<float, int> tableY;
                        for (int j = 0; j < lookupFloat.size(); j++) {
                                spearmanRank member(lookupFloat[j]->getGroup(), lookupFloat[j]->getAbundance(i));
                                otuScores.push_back(member);
+                               
+                               itTable = tableY.find(member.score);
+                               if (itTable == tableY.end()) { 
+                                       tableY[member.score] = 1;
+                               }else {
+                                       tableY[member.score]++;
+                               }
+                               
+                       }
+                       
+                       //calc Ly
+                       double Ly = 0.0;
+                       for (itTable = tableY.begin(); itTable != tableY.end(); itTable++) {
+                               double ty = (double) itTable->second;
+                               Ly += ((pow(ty, 3.0) - ty) / 12.0);
                        }
                        
                        sort(otuScores.begin(), otuScores.end(), compareSpearman);
                        
                        map<string, float> rankOtus;
                        vector<spearmanRank> ties;
-                       float averageY = 0.0;
                        int rankTotal = 0;
                        for (int j = 0; j < otuScores.size(); j++) {
-                               rankTotal += j;
+                               rankTotal += (j+1);
                                ties.push_back(otuScores[j]);
                                
-                               if (j != scores.size()-1) { // you are not the last so you can look ahead
+                               if (j != (otuScores.size()-1)) { // you are not the last so you can look ahead
                                        if (otuScores[j].score != otuScores[j+1].score) { // you are done with ties, rank them and continue
-                                               if (ties.size() > 1) { hasTies = true; }
-                                               averageY += rankTotal;
+                                               
                                                for (int k = 0; k < ties.size(); k++) {
                                                        float thisrank = rankTotal / (float) ties.size();
                                                        rankOtus[ties[k].name] = thisrank;
@@ -469,53 +518,46 @@ int CorrAxesCommand::calcSpearman(map<string, vector<float> >& axes, ofstream& o
                                                rankTotal = 0;
                                        }
                                }else { // you are the last one
-                                       if (ties.size() > 1) { hasTies = true; }
-                                       averageY += rankTotal;
+                                       
                                        for (int k = 0; k < ties.size(); k++) {
                                                float thisrank = rankTotal / (float) ties.size();
                                                rankOtus[ties[k].name] = thisrank;
                                        }
                                }
                        }
-                       
-
-                       averageY /= (float) otuScores.size();
-                       //hasTies = false;              
+                       vector<double> pValues(numaxes);        
 
                        //calc spearman ranks for each axis for this otu
                        for (int j = 0; j < numaxes; j++) {
                                
                                double di = 0.0;
-                               double denom1 = 0.0;
-                               double denom2 = 0.0;
                                for (int k = 0; k < lookupFloat.size(); k++) {
                                        
                                        float xi = rankAxes[lookupFloat[k]->getGroup()][j];
                                        float yi = rankOtus[lookupFloat[k]->getGroup()];
                                        
-                                       if (hasTies) {
-                                               di += ((xi - averageX[j]) * (yi - averageY));
-                                               denom1 += ((xi - averageX[j]) * (xi - averageX[j]));
-                                               denom2 += ((yi - averageY) * (yi - averageY));
-                                       }else {
-                                               di += ((xi - yi) * (xi - yi));
-                                       }
+                                       di += ((xi - yi) * (xi - yi));
                                }
                                
                                double p = 0.0;
                                
-                               if (hasTies) {
-                                       double denom = sqrt((denom1 * denom2));
-                                       p = di / denom;
-                               }else {
-                                       int n = lookupFloat.size();
-                                       p = 1.0 - ((6 * di) / (float) (n * ((n*n) - 1)));
-                               }
+                               double n = (double) lookupFloat.size();
+                               
+                               double SX2 = ((pow(n, 3.0) - n) / 12.0) - Lx[j];
+                               double SY2 = ((pow(n, 3.0) - n) / 12.0) - Ly;
+                               
+                               p = (SX2 + SY2 - di) / (2.0 * sqrt((SX2*SY2)));
                                
                                out  << '\t' << p;
+                               
+                               pValues[j] = p;
                        }
 
-                       out << endl;
+                       double sum = 0;
+                       for(int k=0;k<numaxes;k++){
+                               sum += pValues[k] * pValues[k];
+                       }
+                       out << '\t' << sqrt(sum) << endl;
                }
                
                return 0;
@@ -529,6 +571,25 @@ int CorrAxesCommand::calcSpearman(map<string, vector<float> >& axes, ofstream& o
 int CorrAxesCommand::calcKendall(map<string, vector<float> >& axes, ofstream& out) {
        try {
                
+               numaxes = 1;
+               axes.clear();
+                axes["1a"].push_back(1);
+                axes["1b"].push_back(1);
+                axes["1c"].push_back(1);
+                axes["2a"].push_back(2);
+                axes["2b"].push_back(2);
+                axes["2c"].push_back(2);
+                axes["3a"].push_back(3);
+                axes["3b"].push_back(3);
+                axes["3c"].push_back(3);
+                axes["4a"].push_back(4);
+                axes["4b"].push_back(4);
+                axes["4c"].push_back(4);
+                axes["5a"].push_back(5);
+                axes["5b"].push_back(5);
+                axes["5c"].push_back(5);
+               
+               
                //format data
                vector< vector<spearmanRank> > scores; scores.resize(numaxes);
                for (map<string, vector<float> >::iterator it = axes.begin(); it != axes.end(); it++) {
@@ -548,10 +609,10 @@ int CorrAxesCommand::calcKendall(map<string, vector<float> >& axes, ofstream& ou
                        vector<spearmanRank*> ties;
                        int rankTotal = 0;
                        for (int j = 0; j < scores[i].size(); j++) {
-                               rankTotal += j;
+                               rankTotal += (j+1);
                                ties.push_back(&(scores[i][j]));
                                
-                               if (j != scores.size()-1) { // you are not the last so you can look ahead
+                               if (j != scores[i].size()-1) { // you are not the last so you can look ahead
                                        if (scores[i][j].score != scores[i][j+1].score) { // you are done with ties, rank them and continue
                                                for (int k = 0; k < ties.size(); k++) {
                                                        float thisrank = rankTotal / (float) ties.size();
@@ -568,6 +629,32 @@ int CorrAxesCommand::calcKendall(map<string, vector<float> >& axes, ofstream& ou
                                }
                        }
                }
+               cout << "axes ranks = ";
+               for(int i = 0; i < scores[0].size(); i++) {  cout << scores[0][i].score << '\t'; }
+               cout << endl;   
+                lookupFloat.clear();
+                lookupFloat.resize(15);
+                for (int i = 0; i < lookupFloat.size(); i++) {
+                lookupFloat[i] = new SharedRAbundFloatVector();
+                }
+                lookupFloat[0]->push_back(0.2288227, "1a");  lookupFloat[0]->setGroup("1a");
+                lookupFloat[1]->push_back(0.7394062, "1b");  lookupFloat[1]->setGroup("1b");
+                lookupFloat[2]->push_back(0.4521187, "1c");  lookupFloat[2]->setGroup("1c");
+                lookupFloat[3]->push_back(0.1598630, "2a");  lookupFloat[3]->setGroup("2a");
+                lookupFloat[4]->push_back(0.09588156, "2b");  lookupFloat[4]->setGroup("2b");
+                
+                lookupFloat[5]->push_back(0.933174, "2c");  lookupFloat[5]->setGroup("2c");
+                lookupFloat[6]->push_back(0.3958304, "3a");  lookupFloat[6]->setGroup("3a");;
+                lookupFloat[7]->push_back(0.2364419, "3b");  lookupFloat[7]->setGroup("3b");
+                lookupFloat[8]->push_back(0.1697712, "3c");  lookupFloat[8]->setGroup("3c");
+                lookupFloat[9]->push_back(0.4077173, "4a");  lookupFloat[9]->setGroup("4a");
+                
+                lookupFloat[10]->push_back(0.6116547, "4b");  lookupFloat[10]->setGroup("4b");
+                lookupFloat[11]->push_back(0.9374322, "4c");  lookupFloat[11]->setGroup("4c");
+                lookupFloat[12]->push_back(0.852184, "5a");  lookupFloat[12]->setGroup("5a");
+                lookupFloat[13]->push_back(0.845094, "5b");  lookupFloat[13]->setGroup("5b");
+                lookupFloat[14]->push_back(0.5795778, "5c");  lookupFloat[14]->setGroup("5c");
+               
                
                //for each otu
                for (int i = 0; i < lookupFloat[0]->getNumBins(); i++) {
@@ -588,10 +675,10 @@ int CorrAxesCommand::calcKendall(map<string, vector<float> >& axes, ofstream& ou
                        vector<spearmanRank> ties;
                        int rankTotal = 0;
                        for (int j = 0; j < otuScores.size(); j++) {
-                               rankTotal += j;
+                               rankTotal += (j+1);
                                ties.push_back(otuScores[j]);
                                
-                               if (j != scores.size()-1) { // you are not the last so you can look ahead
+                               if (j != otuScores.size()-1) { // you are not the last so you can look ahead
                                        if (otuScores[j].score != otuScores[j+1].score) { // you are done with ties, rank them and continue
                                                for (int k = 0; k < ties.size(); k++) {
                                                        float thisrank = rankTotal / (float) ties.size();
@@ -607,35 +694,73 @@ int CorrAxesCommand::calcKendall(map<string, vector<float> >& axes, ofstream& ou
                                        }
                                }
                        }
+                       
+                       
                        vector<double> pValues(numaxes);
                        
                        //calc spearman ranks for each axis for this otu
                        for (int j = 0; j < numaxes; j++) {
                        
-                               int P = 0;
-                               //assemble otus ranks in same order as axis ranks
+                               int numCoor = 0;
+                               int numDisCoor = 0;
+                               
+                               //assemble otus ranks in same order as axis ranks, if ties, put in the best order possible
+                               //NOTE: after this ordering the scores[j] indexes may not match the otus indexes within tied sections
+                               //since we do not use the axes ranks except to order the otu ranks, I did not take the time to reorder the axes ranks
                                vector<spearmanRank> otus; 
+                               vector<spearmanRank> otusTemp;
                                for (int l = 0; l < scores[j].size(); l++) {   
                                        spearmanRank member(scores[j][l].name, rankOtus[scores[j][l].name]);
-                                       otus.push_back(member);
+                                       otusTemp.push_back(member);
+                                       
+                                       if (l != (scores[j].size()-1)) { // you are not the last so you can look ahead
+                                               if (scores[j][l].score != scores[j][l+1].score) { // you are done with ties, order them and continue
+                                                       //order otus within tied section in the best way possible to make coor pairs
+                                                       sort(otusTemp.begin(), otusTemp.end(), compareSpearmanReverse);
+                                                       
+                                                       //save order in otus
+                                                       for (int h = 0; h < otusTemp.size(); h++) { ; otus.push_back(otusTemp[h]); }
+                                                       
+                                                       otusTemp.clear();
+                                               }
+                                       }else { // you are the last one
+                                               //order otus within tied section in the best way possible to make coor pairs
+                                               sort(otusTemp.begin(), otusTemp.end(), compareSpearmanReverse);
+                                       
+                                               //save order in otus
+                                               for (int h = 0; h < otusTemp.size(); h++) {  otus.push_back(otusTemp[h]); }
+                                       }
                                }
                                
+                               cout << "otu ranks = ";
+                               for (int h = 0; h < otus.size(); h++ ) { cout << otus[h].score << '\t'; }
+                               cout << endl;
+                               
+                               int count = 0;
                                for (int l = 0; l < scores[j].size(); l++) {
                                        
                                        int numWithHigherRank = 0;
+                                       int numWithLowerRank = 0;
                                        float thisrank = otus[l].score;
                                        
                                        for (int u = l; u < scores[j].size(); u++) {
                                                if (otus[u].score > thisrank) { numWithHigherRank++; }
+                                               else if (otus[u].score < thisrank) { numWithLowerRank++; }
+                                               count++;
                                        }
                                        
-                                       P += numWithHigherRank;
+                                       numCoor += numWithHigherRank;
+                                       numDisCoor += numWithLowerRank;
                                }
                                
-                               int n = lookupFloat.size();
-                               
-                               double p = ( (4 * P) / (float) (n * (n - 1)) ) - 1.0;
+                               //sample size
+                               //int n = lookupFloat.size();
+                               //comparing to yourself
+                               count -= lookupFloat.size();
                                
+                               //double p = ( (4 * P) / (float) ((n * (n - 1)) - numTies) ) - 1.0;
+                               double p = (numCoor - numDisCoor) / (float) count;
+                               cout << "numCoor = " << numCoor << " numDisCoor = " << numDisCoor << " p = " << p << " count = " << count << endl;
                                out << '\t' << p;
                                pValues[j] = p;