]> git.donarmstrong.com Git - mothur.git/blobdiff - pcacommand.cpp
sffinfo bug with flow grams right index when clipQualRight=0
[mothur.git] / pcacommand.cpp
index 2e8f132eefa8a491f1b5e23a2d9650da22ee1c84..7103adb8a150a3e6f9871989ba94bd45d2b6a542 100644 (file)
 #include "inputdata.h"
 
 //**********************************************************************************************************************
-vector<string> PCACommand::getValidParameters(){       
+vector<string> PCACommand::setParameters(){    
        try {
-               string Array[] =  {"label", "groups","metric","outputdir","inputdir"};
-               vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
+               CommandParameter pshared("shared", "InputTypes", "", "", "LRSS", "LRSS", "none","pca-loadings",false,false,true); parameters.push_back(pshared);        
+               CommandParameter prelabund("relabund", "InputTypes", "", "", "LRSS", "LRSS", "none","pca-loadings",false,false,true); parameters.push_back(prelabund);
+               CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
+               CommandParameter pmetric("metric", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pmetric);
+               CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
+               CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
+               CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
+               
+               vector<string> myArray;
+               for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
                return myArray;
        }
        catch(exception& e) {
-               m->errorOut(e, "PCACommand", "getValidParameters");
+               m->errorOut(e, "PCACommand", "setParameters");
                exit(1);
        }
 }
 //**********************************************************************************************************************
-PCACommand::PCACommand(){      
+string PCACommand::getHelpString(){    
        try {
-               abort = true; calledHelp = true; 
-               vector<string> tempOutNames;
-               outputTypes["pca"] = tempOutNames;
-               outputTypes["loadings"] = tempOutNames;
+               string helpString = "";
+               helpString += "The pca command parameters are shared, relabund, label, groups and metric.  shared or relabund is required unless you have a valid current file."; 
+               helpString += "The label parameter is used to analyze specific labels in your input. Default is the first label in your shared or relabund file. Multiple labels may be separated by dashes.\n";
+               helpString += "The groups parameter allows you to specify which groups you would like analyzed. Groupnames are separated by dashes.\n";
+               helpString += "The metric parameter allows you to indicate if would like the pearson correlation coefficient calculated. Default=True";
+               helpString += "Example pca(groups=yourGroups).\n";
+               helpString += "Example pca(groups=A-B-C).\n";
+               helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
+               return helpString;
        }
        catch(exception& e) {
-               m->errorOut(e, "PCACommand", "PCACommand");
+               m->errorOut(e, "PCACommand", "getHelpString");
                exit(1);
        }
 }
 //**********************************************************************************************************************
-vector<string> PCACommand::getRequiredParameters(){    
-       try {
-               vector<string> myArray;
-               return myArray;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "PCACommand", "getRequiredParameters");
-               exit(1);
-       }
+string PCACommand::getOutputPattern(string type) {
+    try {
+        string pattern = "";
+        
+        if (type == "pca") {  pattern = "[filename],[distance],pca.axes"; } 
+        else if (type == "loadings") {  pattern = "[filename],[distance],pca.loadings"; } 
+        else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
+        
+        return pattern;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "PCACommand", "getOutputPattern");
+        exit(1);
+    }
 }
+
 //**********************************************************************************************************************
-vector<string> PCACommand::getRequiredFiles(){ 
+PCACommand::PCACommand(){      
        try {
-               string Array[] =  {"shared","relabund","or"};
-               vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
-               return myArray;
+               abort = true; calledHelp = true; 
+               setParameters();
+               vector<string> tempOutNames;
+               outputTypes["pca"] = tempOutNames;
+               outputTypes["loadings"] = tempOutNames;
        }
        catch(exception& e) {
-               m->errorOut(e, "PCACommand", "getRequiredFiles");
+               m->errorOut(e, "PCACommand", "PCACommand");
                exit(1);
        }
 }
@@ -64,15 +85,12 @@ PCACommand::PCACommand(string option)  {
        try {
                abort = false; calledHelp = false;   
                
-               globaldata = GlobalData::getInstance();
-               
                //allow user to run help
                if(option == "help") { help(); abort = true; calledHelp = true; }
+               else if(option == "citation") { citation(); abort = true; calledHelp = true;}
                
                else {
-                       //valid paramters for this command
-                       string Array[] =  {"label","groups","metric","outputdir", "inputdir"};
-                       vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
+                       vector<string> myArray = setParameters();
                        
                        OptionParser parser(option);
                        map<string, string> parameters = parser. getParameters();
@@ -84,22 +102,62 @@ PCACommand::PCACommand(string option)  {
                        for (it = parameters.begin(); it != parameters.end(); it++) { 
                                if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
                        }
-                       //if the user changes the input directory command factory will send this info to us in the output parameter 
-                       string inputDir = validParameter.validFile(parameters, "inputdir", false);              if (inputDir == "not found"){   inputDir = "";          }
-                       
+       
                        //initialize outputTypes
                        vector<string> tempOutNames;
                        outputTypes["pca"] = tempOutNames;
                        outputTypes["loadings"] = tempOutNames;
                        
-                       //make sure the user has already run the read.otu command
-                       if ((globaldata->getSharedFile() == "") && (globaldata->getRelAbundFile() == "")) {
-                               m->mothurOut("You must read a list and a group, shared or relabund file before you can use the pca command."); m->mothurOutEndLine(); abort = true; 
+                       //if the user changes the input directory command factory will send this info to us in the output parameter 
+                       string inputDir = validParameter.validFile(parameters, "inputdir", false);              
+                       if (inputDir == "not found"){   inputDir = "";          }
+                       else {
+                               string path;
+                               it = parameters.find("shared");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["shared"] = inputDir + it->second;           }
+                               }
+                               
+                               it = parameters.find("relabund");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["relabund"] = inputDir + it->second;         }
+                               }
                        }
                        
-                       if (globaldata->getSharedFile() != "")          { mode = "shared"; inputFile = globaldata->getSharedFile();             }
-                       if (globaldata->getRelAbundFile() != "")        { mode = "relabund"; inputFile = globaldata->getRelAbundFile(); }
+                       //check for required parameters
+                       sharedfile = validParameter.validFile(parameters, "shared", true);
+                       if (sharedfile == "not open") { sharedfile = ""; abort = true; }        
+                       else if (sharedfile == "not found") { sharedfile = ""; }
+                       else {  mode = "sharedfile"; inputFile = sharedfile; m->setSharedFile(sharedfile); }
+                       
+                       relabundfile = validParameter.validFile(parameters, "relabund", true);
+                       if (relabundfile == "not open") { relabundfile = ""; abort = true; }    
+                       else if (relabundfile == "not found") { relabundfile = ""; }
+                       else {  mode = "relabund"; inputFile = relabundfile; m->setRelAbundFile(relabundfile); }
                        
+                       
+                       if ((sharedfile == "") && (relabundfile == "")) { 
+                               //is there are current file available for any of these?
+                               //give priority to shared, then list, then rabund, then sabund
+                               //if there is a current shared file, use it
+                               sharedfile = m->getSharedFile(); 
+                               if (sharedfile != "") { inputFile = sharedfile; mode = "sharedfile"; m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
+                               else { 
+                                       relabundfile = m->getRelAbundFile(); 
+                                       if (relabundfile != "") { inputFile = relabundfile; mode = "relabund"; m->mothurOut("Using " + relabundfile + " as input file for the relabund parameter."); m->mothurOutEndLine(); }
+                                       else { 
+                                               m->mothurOut("No valid current files. You must provide a relabund or shared file."); m->mothurOutEndLine(); 
+                                               abort = true;
+                                       }
+                               }
+                       }
+                               
                        //if the user changes the output directory command factory will send this info to us in the output parameter 
                        outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
                                outputDir = ""; 
@@ -110,13 +168,13 @@ PCACommand::PCACommand(string option)  {
                        metric = m->isTrue(temp); 
                        
                        label = validParameter.validFile(parameters, "label", false);                   
-                       if (label == "not found") { label = ""; labels = globaldata->labels; if(labels.size() == 0) {  m->mothurOut("You did not provide a label, I will use the first label in your inputfile."); m->mothurOutEndLine(); } }
+                       if (label == "not found") { label = ""; if(labels.size() == 0) {  m->mothurOut("You did not provide a label, I will use the first label in your inputfile."); m->mothurOutEndLine(); } }
                        else { m->splitAtDash(label, labels); }
                        
                        groups = validParameter.validFile(parameters, "groups", false);                 
                        if (groups == "not found") { groups = "";  }
                        else { m->splitAtDash(groups, Groups);  }                       
-                       globaldata->Groups = Groups;                    
+                       m->setGroups(Groups);                   
                        
                }
                
@@ -127,25 +185,6 @@ PCACommand::PCACommand(string option)  {
        }
 }
 //**********************************************************************************************************************
-void PCACommand::help(){
-       try {
-               m->mothurOut("The pca command can only be run after a successful read.otu command of a shared or relabund file."); m->mothurOutEndLine();
-               m->mothurOut("The pca command parameters are label, groups and metric. No parameters are required."); m->mothurOutEndLine();
-               m->mothurOut("The label parameter is used to analyze specific labels in your input. Default is the first label in your shared or relabund file. Multiple labels may be separated by dashes.\n");
-               m->mothurOut("The groups parameter allows you to specify which groups you would like analyzed. Groupnames are separated by dashes.\n");
-               m->mothurOut("The metric parameter allows indicate you if would like the pearson correlation coefficient calculated. Default=True"); m->mothurOutEndLine();
-               m->mothurOut("Example pca(groups=yourGroups).\n");
-               m->mothurOut("Example pca(groups=A-B-C).\n");
-               m->mothurOut("Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n\n");
-       }
-       catch(exception& e) {
-               m->errorOut(e, "PCACommand", "help");
-               exit(1);
-       }
-}
-//**********************************************************************************************************************
-PCACommand::~PCACommand(){}
-//**********************************************************************************************************************
 int PCACommand::execute(){
        try {
                
@@ -159,7 +198,7 @@ int PCACommand::execute(){
                //get first line of shared file
                vector< vector<double> > matrix;
                InputData* input;
-               if (mode == "shared")                   {  
+               if (mode == "sharedfile")                       {  
                        input = new InputData(inputFile, "sharedfile");
                }else if (mode == "relabund")   { 
                        input = new InputData(inputFile, "relabund");
@@ -181,7 +220,7 @@ int PCACommand::execute(){
                //as long as you are not at the end of the file or done wih the lines you want
                while((lookupFloat[0] != NULL) && (userLabels.size() != 0)) {
                        
-                       if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) {       remove(outputNames[i].c_str());  } delete input; for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  }  lookupFloat.clear(); return 0;  }
+                       if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]);  } delete input; for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  }  lookupFloat.clear(); return 0;  }
                        
                        if(labels.count(lookupFloat[0]->getLabel()) == 1){
                                processedLabels.insert(lookupFloat[0]->getLabel());
@@ -214,7 +253,7 @@ int PCACommand::execute(){
                }
                
                
-               if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str());  } delete input; for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  } lookupFloat.clear(); return 0;  }
+               if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);  } delete input; for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  } lookupFloat.clear(); return 0;  }
                
                //output error messages about any remaining user labels
                set<string>::iterator it;
@@ -242,7 +281,7 @@ int PCACommand::execute(){
                for (int i = 0; i < lookupFloat.size(); i++) {  if (lookupFloat[i] != NULL) {   delete lookupFloat[i];  } } lookupFloat.clear();
                delete input;
                
-               if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str());  } return 0; }
+               if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);  } return 0; }
                
                m->mothurOutEndLine();
                m->mothurOut("Output File Names: "); m->mothurOutEndLine();
@@ -256,7 +295,8 @@ int PCACommand::execute(){
                exit(1);
        }
 }
-//**********************************************************************************************************************
+
+/**********************************************************************************************************************
 vector< vector<double> > PCACommand::createMatrix(vector<SharedRAbundFloatVector*> lookupFloat){
        try {
                vector< vector<double> > matrix; matrix.resize(lookupFloat.size());
@@ -283,62 +323,82 @@ vector< vector<double> > PCACommand::createMatrix(vector<SharedRAbundFloatVector
                m->errorOut(e, "PCACommand", "createMatrix");   
                exit(1);
        }
-}
+}*/
 //**********************************************************************************************************************
+
 int PCACommand::process(vector<SharedRAbundFloatVector*>& lookupFloat){
        try {
                m->mothurOut("\nProcessing " + lookupFloat[0]->getLabel()); m->mothurOutEndLine();
+       
+               int numOTUs = lookupFloat[0]->getNumBins();
+               int numSamples = lookupFloat.size();
                
-               vector< vector<double> > matrix; matrix.resize(lookupFloat.size());
+               vector< vector<double> > matrix(numSamples);
+               vector<double> colMeans(numOTUs);
                
-               //fill matrix with shared files relative abundances
+               //fill matrix with shared relative abundances, re-center
                for (int i = 0; i < lookupFloat.size(); i++) {
-                       for (int j = 0; j < lookupFloat[i]->getNumBins(); j++) {
-                               matrix[i].push_back(lookupFloat[i]->getAbundance(j));
+                       matrix[i].resize(numOTUs, 0);
+                       
+                       for (int j = 0; j < numOTUs; j++) {
+                               matrix[i][j] = lookupFloat[i]->getAbundance(j);
+                               colMeans[j] += matrix[i][j];
                        }
                }
                
-               vector< vector<double> > transposeMatrix; transposeMatrix.resize(matrix[0].size());
-               for (int i = 0; i < transposeMatrix.size(); i++) {
-                       for (int j = 0; j < matrix.size(); j++) {
-                               transposeMatrix[i].push_back(matrix[j][i]);
+
+               for(int j=0;j<numOTUs;j++){
+                       colMeans[j] = colMeans[j] / (double)numSamples;
+               }
+               
+               vector<vector<double> > centered = matrix;
+               for(int i=0;i<numSamples;i++){
+                       for(int j=0;j<numOTUs;j++){
+                               centered[i][j] = centered[i][j] - colMeans[j];                          
                        }
                }
+
                
-               matrix = linearCalc.matrix_mult(matrix, transposeMatrix);               
+               vector< vector<double> > transpose(numOTUs);
+               for (int i = 0; i < numOTUs; i++) {
+                       transpose[i].resize(numSamples, 0);
+                       
+                       for (int j = 0; j < numSamples; j++) {
+                               transpose[i][j] = centered[j][i];
+                       }
+               }
+
+               vector<vector<double> > crossProduct = linearCalc.matrix_mult(transpose, centered);     
                
-               double offset = 0.0000;
                vector<double> d;
                vector<double> e;
-               vector<vector<double> > G = matrix;
-               vector<vector<double> > copy_G;
-                       
-               for(int count=0;count<2;count++){
-                       linearCalc.tred2(G, d, e);                              if (m->control_pressed) { return 0; }
-                       linearCalc.qtli(d, e, G);                               if (m->control_pressed) { return 0; }
-                       offset = d[d.size()-1];
-                       if(offset > 0.0) break;
-               } 
+
+               linearCalc.tred2(crossProduct, d, e);           if (m->control_pressed) { return 0; }
+               linearCalc.qtli(d, e, crossProduct);            if (m->control_pressed) { return 0; }
+               
+               vector<vector<double> > X = linearCalc.matrix_mult(centered, crossProduct);
                
                if (m->control_pressed) { return 0; }
                
                string fbase = outputDir + m->getRootName(m->getSimpleName(inputFile));
-               string outputFileName = fbase + lookupFloat[0]->getLabel();
-               output(outputFileName, globaldata->Groups, G, d);
+               //string outputFileName = fbase + lookupFloat[0]->getLabel();
+               output(fbase, lookupFloat[0]->getLabel(), m->getGroups(), X, d);
                
                if (metric) {   
                        
+                       vector<vector<double> > observedEuclideanDistance = linearCalc.getObservedEuclideanDistance(centered);
+                       
                        for (int i = 1; i < 4; i++) {
                                
-                               vector< vector<double> > EuclidDists = linearCalc.calculateEuclidianDistance(G, i); //G is the pcoa file
-                               
-                               if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str());  } return 0; }
+                               vector< vector<double> > PCAEuclidDists = linearCalc.calculateEuclidianDistance(X, i); //G is the pca file
                                
-                               double corr = linearCalc.calcPearson(EuclidDists, matrix); //G is the pcoa file, D is the users distance matrix
-                               
-                               m->mothurOut("Pearson's coefficient using " + toString(i) + " axis: " + toString(corr)); m->mothurOutEndLine();
+                               if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);  } return 0; }
+
+                               double corr = linearCalc.calcPearson(PCAEuclidDists, observedEuclideanDistance);
+                                                               
+                               m->mothurOut("Rsq " + toString(i) + " axis: " + toString(corr * corr)); m->mothurOutEndLine();
                                
-                               if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str());  } return 0; }
+                               if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);  } return 0; }
                        }
                }
                
@@ -351,44 +411,48 @@ int PCACommand::process(vector<SharedRAbundFloatVector*>& lookupFloat){
 }
 /*********************************************************************************************************************************/
 
-void PCACommand::output(string fnameRoot, vector<string> name_list, vector<vector<double> >& G, vector<double> d) {
+void PCACommand::output(string fbase, string label, vector<string> name_list, vector<vector<double> >& G, vector<double> d) {
        try {
-               int rank = name_list.size();
+
+               int numEigenValues = d.size();
                double dsum = 0.0000;
-               for(int i=0;i<rank;i++){
+               for(int i=0;i<numEigenValues;i++){
                        dsum += d[i];
-                       for(int j=0;j<rank;j++){
-                               if(d[j] >= 0)   {       G[i][j] *= pow(d[j],0.5);       }
-                               else                    {       G[i][j] = 0.00000;                      }
-                       }
                }
                
-               ofstream pcaData((fnameRoot+".pca.axes").c_str(), ios::trunc);
+               ofstream pcaData;
+        map<string, string> variables; 
+        variables["[filename]"] = fbase;
+        variables["[distance]"] = label;
+        string pcaFileName = getOutputFileName("pca",variables);
+        m->openOutputFile(pcaFileName, pcaData);
                pcaData.setf(ios::fixed, ios::floatfield);
                pcaData.setf(ios::showpoint);   
-               outputNames.push_back(fnameRoot+".pca.axes");
-               outputTypes["pca"].push_back(fnameRoot+".pca.axes");
+               outputNames.push_back(pcaFileName);
+               outputTypes["pca"].push_back(pcaFileName);
                
-               ofstream pcaLoadings((fnameRoot+".pca.loadings").c_str(), ios::trunc);
+               ofstream pcaLoadings;
+        string loadingsFilename = getOutputFileName("loadings",variables);
+         m->openOutputFile(loadingsFilename, pcaLoadings);
                pcaLoadings.setf(ios::fixed, ios::floatfield);
                pcaLoadings.setf(ios::showpoint);
-               outputNames.push_back(fnameRoot+".pca.loadings");
-               outputTypes["loadings"].push_back(fnameRoot+".pca.loadings");   
+               outputNames.push_back(loadingsFilename);
+               outputTypes["loadings"].push_back(loadingsFilename);    
                
                pcaLoadings << "axis\tloading\n";
-               for(int i=0;i<rank;i++){
+               for(int i=0;i<numEigenValues;i++){
                        pcaLoadings << i+1 << '\t' << d[i] * 100.0 / dsum << endl;
                }
                
                pcaData << "group";
-               for(int i=0;i<rank;i++){
+               for(int i=0;i<numEigenValues;i++){
                        pcaData << '\t' << "axis" << i+1;
                }
                pcaData << endl;
                
-               for(int i=0;i<rank;i++){
+               for(int i=0;i<name_list.size();i++){
                        pcaData << name_list[i] << '\t';
-                       for(int j=0;j<rank;j++){
+                       for(int j=0;j<numEigenValues;j++){
                                pcaData << G[i][j] << '\t';
                        }
                        pcaData << endl;