]> git.donarmstrong.com Git - mothur.git/blobdiff - corraxescommand.cpp
fixes while testing 1.33.0
[mothur.git] / corraxescommand.cpp
index 10669e580998797222fd785b6d80608fd87ba036..72fa03b75edc196aafcaaa4b6ade6f95d1106d48 100644 (file)
@@ -9,20 +9,21 @@
 
 #include "corraxescommand.h"
 #include "sharedutilities.h"
+#include "linearalgebra.h"
 
 //**********************************************************************************************************************
 vector<string> CorrAxesCommand::setParameters(){       
        try {
-               CommandParameter paxes("axes", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(paxes);
-               CommandParameter pshared("shared", "InputTypes", "", "", "SharedRelMeta", "SharedRelMeta", "none",false,false); parameters.push_back(pshared);
-               CommandParameter prelabund("relabund", "InputTypes", "", "", "SharedRelMeta", "SharedRelMeta", "none",false,false); parameters.push_back(prelabund);
-               CommandParameter pmetadata("metadata", "InputTypes", "", "", "SharedRelMeta", "SharedRelMeta", "none",false,false); parameters.push_back(pmetadata);
-               CommandParameter pnumaxes("numaxes", "Number", "", "3", "", "", "",false,false); parameters.push_back(pnumaxes);
-               CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
-               CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
-               CommandParameter pmethod("method", "Multiple", "pearson-spearman-kendall", "pearson", "", "", "",false,false); parameters.push_back(pmethod);
-               CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
-               CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
+               CommandParameter paxes("axes", "InputTypes", "", "", "none", "none", "none","corraxes",false,true,true); parameters.push_back(paxes);
+               CommandParameter pshared("shared", "InputTypes", "", "", "SharedRelMeta", "SharedRelMeta", "none","",false,false,true); parameters.push_back(pshared);
+               CommandParameter prelabund("relabund", "InputTypes", "", "", "SharedRelMeta", "SharedRelMeta", "none","",false,false,true); parameters.push_back(prelabund);
+               CommandParameter pmetadata("metadata", "InputTypes", "", "", "SharedRelMeta", "SharedRelMeta", "none","",false,false); parameters.push_back(pmetadata);
+               CommandParameter pnumaxes("numaxes", "Number", "", "3", "", "", "","",false,false); parameters.push_back(pnumaxes);
+               CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
+               CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
+               CommandParameter pmethod("method", "Multiple", "pearson-spearman-kendall", "pearson", "", "", "","",false,false); parameters.push_back(pmethod);
+               CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
+               CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
                
                vector<string> myArray;
                for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
@@ -54,13 +55,29 @@ string CorrAxesCommand::getHelpString(){
                exit(1);
        }
 }
+//**********************************************************************************************************************
+string CorrAxesCommand::getOutputPattern(string type) {
+    try {
+        string pattern = "";
+        
+        if (type == "corraxes") {  pattern = "[filename],[tag],corr.axes"; }
+        else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
+        
+        return pattern;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "CorrAxesCommand", "getOutputPattern");
+        exit(1);
+    }
+}
+
 //**********************************************************************************************************************
 CorrAxesCommand::CorrAxesCommand(){    
        try {
                abort = true; calledHelp = true; 
                setParameters();
                vector<string> tempOutNames;
-               outputTypes["corr.axes"] = tempOutNames;
+               outputTypes["corraxes"] = tempOutNames;
        }
        catch(exception& e) {
                m->errorOut(e, "CorrAxesCommand", "CorrAxesCommand");
@@ -91,7 +108,7 @@ CorrAxesCommand::CorrAxesCommand(string option)  {
                        }
                        
                        vector<string> tempOutNames;
-                       outputTypes["corr.axes"] = tempOutNames;
+                       outputTypes["corraxes"] = tempOutNames;
                        
                        //if the user changes the input directory command factory will send this info to us in the output parameter 
                        string inputDir = validParameter.validFile(parameters, "inputdir", false);              
@@ -264,9 +281,11 @@ int CorrAxesCommand::execute(){
                /*************************************************************************************/
                // calc the r values                                                                                                                            //
                /************************************************************************************/
-               
-               string outputFileName = outputDir + m->getRootName(m->getSimpleName(inputFileName)) + method + ".corr.axes";
-               outputNames.push_back(outputFileName); outputTypes["corr.axes"].push_back(outputFileName);      
+        map<string, string> variables; 
+        variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(inputFileName));
+        variables["[tag]"] = method;
+               string outputFileName = getOutputFileName("corraxes", variables);
+               outputNames.push_back(outputFileName); outputTypes["corraxes"].push_back(outputFileName);       
                ofstream out;
                m->openOutputFile(outputFileName, out);
                out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
@@ -304,6 +323,8 @@ int CorrAxesCommand::execute(){
 int CorrAxesCommand::calcPearson(map<string, vector<float> >& axes, ofstream& out) {
    try {
           
+       LinearAlgebra linear;
+       
           //find average of each axis - X
           vector<float> averageAxes; averageAxes.resize(numaxes, 0.0);
           for (map<string, vector<float> >::iterator it = axes.begin(); it != axes.end(); it++) {
@@ -318,7 +339,7 @@ int CorrAxesCommand::calcPearson(map<string, vector<float> >& axes, ofstream& ou
           //for each otu
           for (int i = 0; i < lookupFloat[0]->getNumBins(); i++) {
                   
-                  if (metadatafile == "") {  out << i+1;       }
+                  if (metadatafile == "") {  out << m->currentSharedBinLabels[i];      }
                   else {  out << metadataLabels[i];            }
                                   
                   //find the averages this otu - Y
@@ -355,11 +376,7 @@ int CorrAxesCommand::calcPearson(map<string, vector<float> >& axes, ofstream& ou
                           rValues[k] = r;
                           out << '\t' << r; 
                
-               //signifigance calc - http://faculty.vassar.edu/lowry/ch4apx.html
-               double temp =  (1- (r*r)) / (double) (lookupFloat.size()-2);
-               temp = sqrt(temp);
-               double sig = r / temp;
-               if (isnan(sig) || isinf(sig)) { sig = 0.0; }
+               double sig = linear.calcPearsonSig(lookupFloat.size(), r);
                
                out << '\t' << sig;
                   }
@@ -382,6 +399,9 @@ int CorrAxesCommand::calcPearson(map<string, vector<float> >& axes, ofstream& ou
 int CorrAxesCommand::calcSpearman(map<string, vector<float> >& axes, ofstream& out) {
        try {
                
+        LinearAlgebra linear;
+        vector<double> sf; 
+        
                //format data
                vector< map<float, int> > tableX; tableX.resize(numaxes);
                map<float, int>::iterator itTable;
@@ -421,6 +441,7 @@ int CorrAxesCommand::calcSpearman(map<string, vector<float> >& axes, ofstream& o
                        
                        vector<spearmanRank> ties;
                        int rankTotal = 0;
+            double sfTemp = 0.0;
                        for (int j = 0; j < scores[i].size(); j++) {
                                rankTotal += (j+1);
                                ties.push_back(scores[i][j]);
@@ -432,6 +453,8 @@ int CorrAxesCommand::calcSpearman(map<string, vector<float> >& axes, ofstream& o
                                                        float thisrank = rankTotal / (float) ties.size();
                                                        rankAxes[ties[k].name].push_back(thisrank);
                                                }
+                        int t = ties.size();
+                        sfTemp += (t*t*t-t);
                                                ties.clear();
                                                rankTotal = 0;
                                        }
@@ -444,13 +467,14 @@ int CorrAxesCommand::calcSpearman(map<string, vector<float> >& axes, ofstream& o
                                        }
                                }
                        }
+            sf.push_back(sfTemp);
                }
                
                                
                //for each otu
                for (int i = 0; i < lookupFloat[0]->getNumBins(); i++) {
                        
-                       if (metadatafile == "") {  out << i+1;  }
+                       if (metadatafile == "") {  out << m->currentSharedBinLabels[i]; }
                        else {  out << metadataLabels[i];               }
                        
                        //find the ranks of this otu - Y
@@ -478,6 +502,7 @@ int CorrAxesCommand::calcSpearman(map<string, vector<float> >& axes, ofstream& o
                        
                        sort(otuScores.begin(), otuScores.end(), compareSpearman);
                        
+            double sg = 0.0;
                        map<string, float> rankOtus;
                        vector<spearmanRank> ties;
                        int rankTotal = 0;
@@ -492,6 +517,8 @@ int CorrAxesCommand::calcSpearman(map<string, vector<float> >& axes, ofstream& o
                                                        float thisrank = rankTotal / (float) ties.size();
                                                        rankOtus[ties[k].name] = thisrank;
                                                }
+                        int t = ties.size();
+                        sg += (t*t*t-t);
                                                ties.clear();
                                                rankTotal = 0;
                                        }
@@ -532,12 +559,7 @@ int CorrAxesCommand::calcSpearman(map<string, vector<float> >& axes, ofstream& o
                                
                                pValues[j] = p;
                 
-                //signifigance calc - http://en.wikipedia.org/wiki/Spearman%27s_rank_correlation_coefficient
-                double temp = (lookupFloat.size()-2) / (double) (1- (p*p));
-                temp = sqrt(temp);
-                double sig = p*temp;
-                if (isnan(sig) || isinf(sig)) { sig = 0.0; }
-                
+                double sig = linear.calcSpearmanSig(n, sf[j], sg, di);            
                 out  << '\t' << sig;
                 
                        }
@@ -560,6 +582,8 @@ int CorrAxesCommand::calcSpearman(map<string, vector<float> >& axes, ofstream& o
 int CorrAxesCommand::calcKendall(map<string, vector<float> >& axes, ofstream& out) {
        try {
                
+        LinearAlgebra linear;
+        
                //format data
                vector< vector<spearmanRank> > scores; scores.resize(numaxes);
                for (map<string, vector<float> >::iterator it = axes.begin(); it != axes.end(); it++) {
@@ -603,7 +627,7 @@ int CorrAxesCommand::calcKendall(map<string, vector<float> >& axes, ofstream& ou
                //for each otu
                for (int i = 0; i < lookupFloat[0]->getNumBins(); i++) {
                
-                       if (metadatafile == "") {  out << i+1;  }
+                       if (metadatafile == "") {  out << m->currentSharedBinLabels[i]; }
                        else {  out << metadataLabels[i];               }
                        
                        //find the ranks of this otu - Y
@@ -678,14 +702,7 @@ int CorrAxesCommand::calcKendall(map<string, vector<float> >& axes, ofstream& ou
                                out << '\t' << p;
                                pValues[j] = p;
                 
-                //calc signif - zA - http://en.wikipedia.org/wiki/Kendall_tau_rank_correlation_coefficient#Significance_tests
-                double numer = 3.0 * (numCoor - numDisCoor);
-                int n = scores[j].size();
-                double denom = n * (n-1) * (2*n + 5) / (double) 2.0;
-                denom = sqrt(denom);
-                double sig = numer / denom;
-                
-                if (isnan(sig) || isinf(sig)) { sig = 0.0; }
+                double sig = linear.calcKendallSig(scores[j].size(), p);
                 
                 out << '\t' << sig;
                        }
@@ -817,7 +834,7 @@ int CorrAxesCommand::eliminateZeroOTUS(vector<SharedRAbundFloatVector*>& thisloo
                                        for (int h = 0; h < diff; h++) { binLabel += "0"; }
                                }
                                binLabel += sbinNumber; 
-                               if (i < m->currentBinLabels.size()) {  binLabel = m->currentBinLabels[i]; }
+                               if (i < m->currentSharedBinLabels.size()) {  binLabel = m->currentSharedBinLabels[i]; }
                                
                                newBinLabels.push_back(binLabel);
                        }
@@ -826,7 +843,7 @@ int CorrAxesCommand::eliminateZeroOTUS(vector<SharedRAbundFloatVector*>& thisloo
                for (int j = 0; j < thislookup.size(); j++) {  delete thislookup[j];  }
                
                thislookup = newLookup;
-               m->currentBinLabels = newBinLabels;
+               m->currentSharedBinLabels = newBinLabels;
                
                return 0;
                
@@ -906,16 +923,11 @@ int CorrAxesCommand::getMetadata(){
                m->openInputFile(metadatafile, in);
                
                string headerLine = m->getline(in); m->gobble(in);
-               istringstream iss (headerLine,istringstream::in);
-               
-               //read the first label, because it refers to the groups
-               string columnLabel;
-               iss >> columnLabel; m->gobble(iss); 
+               vector<string> pieces = m->splitWhiteSpace(headerLine);
                
                //save names of columns you are reading
-               while (!iss.eof()) {
-                       iss >> columnLabel; m->gobble(iss);
-                       metadataLabels.push_back(columnLabel);
+               for (int i = 1; i < pieces.size(); i++) {
+                       metadataLabels.push_back(pieces[i]);
                }
                int count = metadataLabels.size();