]> git.donarmstrong.com Git - mothur.git/blobdiff - corraxescommand.cpp
removed parse.sff command and made its functionality part of sffinfo command. fixed...
[mothur.git] / corraxescommand.cpp
index 322b7f9d067fc3c9bd1bd8724c8ad43d7b4c3135..429ef141b54b621bb9bff43904e78038c31df438 100644 (file)
@@ -9,10 +9,15 @@
 
 #include "corraxescommand.h"
 
+//********************************************************************************************************************
+//sorts highes to lowest
+inline bool compareSpearman(spearmanRank left, spearmanRank right){
+       return (left.score > right.score);      
+} 
 //**********************************************************************************************************************
 vector<string> CorrAxesCommand::getValidParameters(){  
        try {
-               string Array[] =  {"axes","shared","relabund","numaxes","label","groups","method","metastats","outputdir","inputdir"};
+               string Array[] =  {"axes","shared","relabund","numaxes","label","groups","method","metadata","outputdir","inputdir"};
                vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
                return myArray;
        }
@@ -69,7 +74,7 @@ CorrAxesCommand::CorrAxesCommand(string option)  {
                
                else {
                        //valid paramters for this command
-                       string Array[] =  {"axes","shared","relabund","numaxes","label","groups","method","metastats","outputdir","inputdir"};
+                       string Array[] =  {"axes","shared","relabund","numaxes","label","groups","method","metadata","outputdir","inputdir"};
                        vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
                        
                        OptionParser parser(option);
@@ -114,6 +119,14 @@ CorrAxesCommand::CorrAxesCommand(string option)  {
                                        //if the user has not given a path then, add inputdir. else leave path alone.
                                        if (path == "") {       parameters["relabund"] = inputDir + it->second;         }
                                }
+                               
+                               it = parameters.find("metadata");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["metadata"] = inputDir + it->second;         }
+                               }
                        }
                        
                        
@@ -132,6 +145,10 @@ CorrAxesCommand::CorrAxesCommand(string option)  {
                        else if (relabundfile == "not found") { relabundfile = ""; }
                        else { inputFileName = relabundfile; }
                        
+                       metadatafile = validParameter.validFile(parameters, "metadata", true);
+                       if (metadatafile == "not open") { abort = true; }
+                       else if (metadatafile == "not found") { metadatafile = ""; }
+                       
                        groups = validParameter.validFile(parameters, "groups", false);                 
                        if (groups == "not found") { groups = "";  pickedGroups = false;  }
                        else { 
@@ -168,7 +185,17 @@ CorrAxesCommand::CorrAxesCommand(string option)  {
 
 void CorrAxesCommand::help(){
        try {
-
+               m->mothurOut("The corr.axes command reads a shared or relabund file as well as a pcoa file and calculates the correlation coefficient.\n");
+               m->mothurOut("The corr.axes command parameters are shared, relabund, axes, metadata, groups, method, numaxes and label.  The shared or relabund and axes parameters are required.  If shared is given the relative abundance is calculated.\n");
+               m->mothurOut("The metadata parameter.....\n");
+               m->mothurOut("The groups parameter allows you to specify which of the groups you would like included. The group names are separated by dashes.\n");
+               m->mothurOut("The label parameter allows you to select what distance level you would like used, if none is given the first distance is used.\n");
+               m->mothurOut("The method parameter allows you to select what method you would like to use. Options are pearson, spearman and kendall. Default=pearson.\n");
+               m->mothurOut("The numaxes parameter allows you to select the number of axes you would like to use. Default=3.\n");
+               m->mothurOut("The corr.axes command should be in the following format: corr.axes(axes=yourPcoaFile, shared=yourSharedFile, method=yourMethod).\n");
+               m->mothurOut("Example corr.axes(axes=genus.pool.thetayc.genus.lt.pcoa, shared=genus.pool.shared, method=kendall).\n");
+               m->mothurOut("The corr.axes command outputs a .corr.axes file.\n");
+               m->mothurOut("Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n\n");
        }
        catch(exception& e) {
                m->errorOut(e, "CorrAxesCommand", "help");      
@@ -233,15 +260,25 @@ int CorrAxesCommand::execute(){
                
                if (m->control_pressed) {  for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  } return 0; }
                
-               string outputFileName = outputDir + m->getRootName(m->getSimpleName(inputFileName)) + "corr.axes";
+               /*************************************************************************************/
+               // calc the r values                                                                                                                            //
+               /************************************************************************************/
+               
+               string outputFileName = outputDir + m->getRootName(m->getSimpleName(inputFileName)) + method + ".corr.axes";
                outputNames.push_back(outputFileName); outputTypes["corr.axes"].push_back(outputFileName);      
                ofstream out;
                m->openOutputFile(outputFileName, out);
+               out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
+               
+               //output headings
+               out << "OTU\t";
+               for (int i = 0; i < numaxes; i++) { out << "axis" << (i+1) << '\t'; }
+               out << endl;
                
                if (method == "pearson")                {  calcPearson(axes, out);      }
-               //else if (method == "spearman")        {  calcSpearman(axes, out); }
-               //else if (method == "kendall") {  calcKendal(axes, out);       }
-               //else { m->mothurOut("[ERROR]: Invalid method."); m->mothurOutEndLine(); }
+               else if (method == "spearman")  {  calcSpearman(axes, out); }
+               else if (method == "kendall")   {  calcKendall(axes, out);      }
+               else { m->mothurOut("[ERROR]: Invalid method."); m->mothurOutEndLine(); }
                
                out.close();
                for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  }
@@ -278,6 +315,8 @@ int CorrAxesCommand::calcPearson(map<string, vector<float> >& axes, ofstream& ou
           //for each otu
           for (int i = 0; i < lookupFloat[0]->getNumBins(); i++) {
                   
+                  out << i+1 << '\t';
+                  
                   //find the averages this otu - Y
                   float sumOtu = 0.0;
                   for (int j = 0; j < lookupFloat.size(); j++) {
@@ -285,7 +324,30 @@ int CorrAxesCommand::calcPearson(map<string, vector<float> >& axes, ofstream& ou
                   }
                   float Ybar = sumOtu / (float) lookupFloat.size();
                   
-                  //find the average
+                  //find r value for each axis
+                  for (int k = 0; k < averageAxes.size(); k++) {
+                          
+                          double r = 0.0;
+                          double numerator = 0.0;
+                          double denomTerm1 = 0.0;
+                          double denomTerm2 = 0.0;
+                          for (int j = 0; j < lookupFloat.size(); j++) {
+                                  float Yi = lookupFloat[j]->getAbundance(i);
+                                  float Xi = axes[lookupFloat[j]->getGroup()][k];
+                                  
+                                  numerator += ((Xi - averageAxes[k]) * (Yi - Ybar));
+                                  denomTerm1 += ((Xi - averageAxes[k]) * (Xi - averageAxes[k]));
+                                  denomTerm2 += ((Yi - Ybar) * (Yi - Ybar));
+                          }
+                          
+                          double denom = (sqrt(denomTerm1) * sqrt(denomTerm2));
+                          
+                          r = numerator / denom;
+                          
+                          out << r << '\t'; 
+                  }
+                  
+                  out << endl;
           }
                   
           return 0;
@@ -296,6 +358,242 @@ int CorrAxesCommand::calcPearson(map<string, vector<float> >& axes, ofstream& ou
    }
 }
 //**********************************************************************************************************************
+int CorrAxesCommand::calcSpearman(map<string, vector<float> >& axes, ofstream& out) {
+       try {
+               
+               //format data
+               vector< vector<spearmanRank> > scores; scores.resize(numaxes);
+               for (map<string, vector<float> >::iterator it = axes.begin(); it != axes.end(); it++) {
+                       vector<float> temp = it->second;
+                       for (int i = 0; i < temp.size(); i++) {
+                               spearmanRank member(it->first, temp[i]);
+                               scores[i].push_back(member);  
+                       }
+               }
+               
+               //sort each axis
+               for (int i = 0; i < numaxes; i++) {  sort(scores[i].begin(), scores[i].end(), compareSpearman); }
+               
+               //find ranks of xi in each axis
+               map<string, vector<float> > rankAxes;
+               for (int i = 0; i < numaxes; i++) {
+                       
+                       vector<spearmanRank> ties;
+                       int rankTotal = 0;
+                       for (int j = 0; j < scores[i].size(); j++) {
+                               rankTotal += j;
+                               ties.push_back(scores[i][j]);
+                               
+                               if (j != scores.size()-1) { // you are not the last so you can look ahead
+                                       if (scores[i][j].score != scores[i][j+1].score) { // you are done with ties, rank them and continue
+                                               for (int k = 0; k < ties.size(); k++) {
+                                                       float thisrank = rankTotal / (float) ties.size();
+                                                       rankAxes[ties[k].name].push_back(thisrank);
+                                               }
+                                               ties.clear();
+                                               rankTotal = 0;
+                                       }
+                               }else { // you are the last one
+                                       for (int k = 0; k < ties.size(); k++) {
+                                               float thisrank = rankTotal / (float) ties.size();
+                                               rankAxes[ties[k].name].push_back(thisrank);
+                                       }
+                               }
+                       }
+               }
+               
+                               
+               
+               //for each otu
+               for (int i = 0; i < lookupFloat[0]->getNumBins(); i++) {
+                       
+                       out << i+1 << '\t';
+                       
+                       //find the ranks of this otu - Y
+                       vector<spearmanRank> otuScores;
+                       for (int j = 0; j < lookupFloat.size(); j++) {
+                               spearmanRank member(lookupFloat[j]->getGroup(), lookupFloat[j]->getAbundance(i));
+                               otuScores.push_back(member);
+                       }
+                       
+                       sort(otuScores.begin(), otuScores.end(), compareSpearman);
+                       
+                       map<string, float> rankOtus;
+                       vector<spearmanRank> ties;
+                       int rankTotal = 0;
+                       for (int j = 0; j < otuScores.size(); j++) {
+                               rankTotal += j;
+                               ties.push_back(otuScores[j]);
+                               
+                               if (j != scores.size()-1) { // you are not the last so you can look ahead
+                                       if (otuScores[j].score != otuScores[j+1].score) { // you are done with ties, rank them and continue
+                                               for (int k = 0; k < ties.size(); k++) {
+                                                       float thisrank = rankTotal / (float) ties.size();
+                                                       rankOtus[ties[k].name] = thisrank;
+                                               }
+                                               ties.clear();
+                                               rankTotal = 0;
+                                       }
+                               }else { // you are the last one
+                                       for (int k = 0; k < ties.size(); k++) {
+                                               float thisrank = rankTotal / (float) ties.size();
+                                               rankOtus[ties[k].name] = thisrank;
+                                       }
+                               }
+                       }
+                       
+                       //calc spearman ranks for each axis for this otu
+                       for (int j = 0; j < numaxes; j++) {
+                               
+                               double di = 0.0;
+                               for (int k = 0; k < lookupFloat.size(); k++) {
+                                       
+                                       float xi = rankAxes[lookupFloat[k]->getGroup()][j];
+                                       float yi = rankOtus[lookupFloat[k]->getGroup()];
+                                       
+                                       di += ((xi - yi) * (xi - yi));
+                               }
+                               
+                               int n = lookupFloat.size();
+                               double p = 1.0 - ((6 * di) / (float) (n * ((n*n) - 1)));
+                               
+                               out << p << '\t';
+                       }
+                       
+                       
+                       out << endl;
+               }
+               
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "CorrAxesCommand", "calcSpearman");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+int CorrAxesCommand::calcKendall(map<string, vector<float> >& axes, ofstream& out) {
+       try {
+               
+               //format data
+               vector< vector<spearmanRank> > scores; scores.resize(numaxes);
+               for (map<string, vector<float> >::iterator it = axes.begin(); it != axes.end(); it++) {
+                       vector<float> temp = it->second;
+                       for (int i = 0; i < temp.size(); i++) {
+                               spearmanRank member(it->first, temp[i]);
+                               scores[i].push_back(member);  
+                       }
+               }
+               
+               //sort each axis
+               for (int i = 0; i < numaxes; i++) {  sort(scores[i].begin(), scores[i].end(), compareSpearman); }
+               
+               //convert scores to ranks of xi in each axis
+               for (int i = 0; i < numaxes; i++) {
+                       
+                       vector<spearmanRank*> ties;
+                       int rankTotal = 0;
+                       for (int j = 0; j < scores[i].size(); j++) {
+                               rankTotal += j;
+                               ties.push_back(&(scores[i][j]));
+                               
+                               if (j != scores.size()-1) { // you are not the last so you can look ahead
+                                       if (scores[i][j].score != scores[i][j+1].score) { // you are done with ties, rank them and continue
+                                               for (int k = 0; k < ties.size(); k++) {
+                                                       float thisrank = rankTotal / (float) ties.size();
+                                                       (*ties[k]).score = thisrank;
+                                               }
+                                               ties.clear();
+                                               rankTotal = 0;
+                                       }
+                               }else { // you are the last one
+                                       for (int k = 0; k < ties.size(); k++) {
+                                               float thisrank = rankTotal / (float) ties.size();
+                                               (*ties[k]).score = thisrank;
+                                       }
+                               }
+                       }
+               }
+               
+               //for each otu
+               for (int i = 0; i < lookupFloat[0]->getNumBins(); i++) {
+               
+                       out << i+1 << '\t';
+                       
+                       //find the ranks of this otu - Y
+                       vector<spearmanRank> otuScores;
+                       for (int j = 0; j < lookupFloat.size(); j++) {
+                               spearmanRank member(lookupFloat[j]->getGroup(), lookupFloat[j]->getAbundance(i));
+                               otuScores.push_back(member);
+                       }
+                                               
+                       sort(otuScores.begin(), otuScores.end(), compareSpearman);
+                       
+                       map<string, float> rankOtus;
+                       vector<spearmanRank> ties;
+                       int rankTotal = 0;
+                       for (int j = 0; j < otuScores.size(); j++) {
+                               rankTotal += j;
+                               ties.push_back(otuScores[j]);
+                               
+                               if (j != scores.size()-1) { // you are not the last so you can look ahead
+                                       if (otuScores[j].score != otuScores[j+1].score) { // you are done with ties, rank them and continue
+                                               for (int k = 0; k < ties.size(); k++) {
+                                                       float thisrank = rankTotal / (float) ties.size();
+                                                       rankOtus[ties[k].name] = thisrank;
+                                               }
+                                               ties.clear();
+                                               rankTotal = 0;
+                                       }
+                               }else { // you are the last one
+                                       for (int k = 0; k < ties.size(); k++) {
+                                               float thisrank = rankTotal / (float) ties.size();
+                                               rankOtus[ties[k].name] = thisrank;
+                                       }
+                               }
+                       }
+                       
+                       //calc spearman ranks for each axis for this otu
+                       for (int j = 0; j < numaxes; j++) {
+                       
+                               int P = 0;
+                               //assemble otus ranks in same order as axis ranks
+                               vector<spearmanRank> otus; 
+                               for (int l = 0; l < scores[j].size(); l++) {   
+                                       spearmanRank member(scores[j][l].name, rankOtus[scores[j][l].name]);
+                                       otus.push_back(member);
+                               }
+                               
+                               for (int l = 0; l < scores[j].size(); l++) {
+                                       
+                                       int numWithHigherRank = 0;
+                                       float thisrank = otus[l].score;
+                                       
+                                       for (int u = l; u < scores[j].size(); u++) {
+                                               if (otus[u].score > thisrank) { numWithHigherRank++; }
+                                       }
+                                       
+                                       P += numWithHigherRank;
+                               }
+                               
+                               int n = lookupFloat.size();
+                               
+                               double p = ( (4 * P) / (float) (n * (n - 1)) ) - 1.0;
+                               
+                               out << p << '\t';
+                       }
+                       
+                       out << endl;
+               }
+               
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "CorrAxesCommand", "calcKendall");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
 int CorrAxesCommand::getShared(){
        try {
                InputData* input = new InputData(sharedfile, "sharedfile");
@@ -547,7 +845,8 @@ map<string, vector<float> > CorrAxesCommand::readAxes(){
                                headerLine = headerLine.substr(pos+4);
                        }else { done = true; }
                }
-               cout << "count = " << count << endl;    
+               
+               if (numaxes > count) { m->mothurOut("You requested " + toString(numaxes) + " axes, but your file only includes " + toString(count) + ". Using " + toString(count) + "."); m->mothurOutEndLine(); numaxes = count; }
                
                while (!in.eof()) {