]> git.donarmstrong.com Git - mothur.git/commitdiff
pca command
authorwestcott <westcott>
Mon, 10 Jan 2011 17:39:51 +0000 (17:39 +0000)
committerwestcott <westcott>
Mon, 10 Jan 2011 17:39:51 +0000 (17:39 +0000)
20 files changed:
Mothur.xcodeproj/project.pbxproj
commandfactory.cpp
corraxescommand.cpp
corraxescommand.h
datavector.hpp
getrelabundcommand.cpp
getrelabundcommand.h
inputdata.cpp
linearalgebra.cpp
linearalgebra.h
metastatscommand.cpp
metastatscommand.h
pcacommand.cpp [new file with mode: 0644]
pcacommand.h [new file with mode: 0644]
pcoacommand.cpp
pcoacommand.h
sharedrabundfloatvector.cpp
sharedrabundfloatvector.h
sharedrabundvector.cpp
sharedrabundvector.h

index 20b0e4898f2e58c2fc781931725fffab60cf569e..c59979fe15bb1ce671c189216fdd3d9776d10280 100644 (file)
                A7E9B98E12D37EC400DA6239 /* weightedlinkage.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B87E12D37EC400DA6239 /* weightedlinkage.cpp */; };
                A7E9B98F12D37EC400DA6239 /* whittaker.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B87F12D37EC400DA6239 /* whittaker.cpp */; };
                A7FC480E12D788F20055BC5C /* linearalgebra.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7FC480D12D788F20055BC5C /* linearalgebra.cpp */; };
+               A7FC486712D795D60055BC5C /* pcacommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7FC486612D795D60055BC5C /* pcacommand.cpp */; };
 /* End PBXBuildFile section */
 
 /* Begin PBXCopyFilesBuildPhase section */
                A7E9B88012D37EC400DA6239 /* whittaker.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = whittaker.h; sourceTree = "<group>"; };
                A7FC480C12D788F20055BC5C /* linearalgebra.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = linearalgebra.h; sourceTree = "<group>"; };
                A7FC480D12D788F20055BC5C /* linearalgebra.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = linearalgebra.cpp; sourceTree = "<group>"; };
+               A7FC486512D795D60055BC5C /* pcacommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = pcacommand.h; sourceTree = "<group>"; };
+               A7FC486612D795D60055BC5C /* pcacommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = pcacommand.cpp; sourceTree = "<group>"; };
                C6A0FF2C0290799A04C91782 /* mothur.1 */ = {isa = PBXFileReference; lastKnownFileType = text.man; path = mothur.1; sourceTree = "<group>"; };
 /* End PBXFileReference section */
 
                                A7E9B78212D37EC400DA6239 /* parselistscommand.h */,
                                A7E9B78512D37EC400DA6239 /* parsimonycommand.cpp */,
                                A7E9B78612D37EC400DA6239 /* parsimonycommand.h */,
+                               A7FC486512D795D60055BC5C /* pcacommand.h */,
+                               A7FC486612D795D60055BC5C /* pcacommand.cpp */,
                                A7E9B78712D37EC400DA6239 /* pcoacommand.cpp */,
                                A7E9B78812D37EC400DA6239 /* pcoacommand.h */,
                                A7E9B78B12D37EC400DA6239 /* phylodiversitycommand.cpp */,
                                A7E9B98F12D37EC400DA6239 /* whittaker.cpp in Sources */,
                                A70332B712D3A13400761E33 /* makefile in Sources */,
                                A7FC480E12D788F20055BC5C /* linearalgebra.cpp in Sources */,
+                               A7FC486712D795D60055BC5C /* pcacommand.cpp in Sources */,
                        );
                        runOnlyForDeploymentPostprocessing = 0;
                };
index e3eb2b230ebbd11d83311feb3b63336eb928e698..f581b6db86bb7e8727c76ba28abc57dfe98c06f5 100644 (file)
 #include "trimflowscommand.h"
 #include "corraxescommand.h"
 #include "shhhercommand.h"
+#include "pcacommand.h"
 
 /*******************************************************/
 
@@ -213,6 +214,7 @@ CommandFactory::CommandFactory(){
        commands["indicator"]                   = "indicator";
        commands["consensus.seqs"]              = "consensus.seqs";
        commands["corr.axes"]                   = "corr.axes";
+       commands["pca"]                                 = "pca";
        commands["pairwise.seqs"]               = "MPIEnabled";
        commands["pipeline.pds"]                = "MPIEnabled";
        commands["classify.seqs"]               = "MPIEnabled"; 
@@ -333,7 +335,8 @@ Command* CommandFactory::getCommand(string commandName, string optionString){
                else if(commandName == "phylotype")                             {       command = new PhylotypeCommand(optionString);                           }
                else if(commandName == "mgcluster")                             {       command = new MGClusterCommand(optionString);                           }
                else if(commandName == "pre.cluster")                   {       command = new PreClusterCommand(optionString);                          }
-               else if(commandName == "pcoa")                                  {       command = new PCOACommand(optionString);                                                }
+               else if(commandName == "pcoa")                                  {       command = new PCOACommand(optionString);                                        }
+               else if(commandName == "pca")                                   {       command = new PCACommand(optionString);                                         }
                else if(commandName == "otu.hierarchy")                 {       command = new OtuHierarchyCommand(optionString);                        }
                else if(commandName == "set.dir")                               {       command = new SetDirectoryCommand(optionString);                        }
                else if(commandName == "set.logfile")                   {       command = new SetLogFileCommand(optionString);                          }
@@ -457,7 +460,8 @@ Command* CommandFactory::getCommand(string commandName, string optionString, str
                else if(commandName == "phylotype")                             {       pipecommand = new PhylotypeCommand(optionString);                               }
                else if(commandName == "mgcluster")                             {       pipecommand = new MGClusterCommand(optionString);                               }
                else if(commandName == "pre.cluster")                   {       pipecommand = new PreClusterCommand(optionString);                              }
-               else if(commandName == "pcoa")                                  {       pipecommand = new PCOACommand(optionString);                                            }
+               else if(commandName == "pcoa")                                  {       pipecommand = new PCOACommand(optionString);                                    }
+               else if(commandName == "pca")                                   {       pipecommand = new PCACommand(optionString);                                             }
                else if(commandName == "otu.hierarchy")                 {       pipecommand = new OtuHierarchyCommand(optionString);                    }
                else if(commandName == "set.dir")                               {       pipecommand = new SetDirectoryCommand(optionString);                    }
                else if(commandName == "set.logfile")                   {       pipecommand = new SetLogFileCommand(optionString);                              }
@@ -569,6 +573,7 @@ Command* CommandFactory::getCommand(string commandName){
                else if(commandName == "mgcluster")                             {       shellcommand = new MGClusterCommand();                          }
                else if(commandName == "pre.cluster")                   {       shellcommand = new PreClusterCommand();                         }
                else if(commandName == "pcoa")                                  {       shellcommand = new PCOACommand();                                       }
+               else if(commandName == "pca")                                   {       shellcommand = new PCACommand();                                        }
                else if(commandName == "otu.hierarchy")                 {       shellcommand = new OtuHierarchyCommand();                       }
                else if(commandName == "set.dir")                               {       shellcommand = new SetDirectoryCommand();                       }
                else if(commandName == "set.logfile")                   {       shellcommand = new SetLogFileCommand();                         }
index 9ffc4b6ee1ce5cec5440264ae2b0f35f04e3aa49..0d421b5aba7d6be8e8f9bf13ae00d39eecbac926 100644 (file)
@@ -222,22 +222,21 @@ int CorrAxesCommand::execute(){
                // use smart distancing to get right sharedRabund and convert to relabund if needed  //
                /************************************************************************************/
                if (sharedfile != "") {  
-                       getShared(); 
-                       if (m->control_pressed) {  for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } return 0; }
-                       if (lookup[0] == NULL) { m->mothurOut("[ERROR] reading shared file."); m->mothurOutEndLine(); return 0; }
+                       InputData* input = new InputData(sharedfile, "sharedfile");
+                       getSharedFloat(input); 
+                       delete input;
                        
-                       //fills lookupFloat with relative abundance values from lookup
-                       convertToRelabund();
-                       
-                       for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
+                       if (m->control_pressed) {  for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  } return 0; }
+                       if (lookupFloat[0] == NULL) { m->mothurOut("[ERROR] reading relabund file."); m->mothurOutEndLine(); return 0; }
                        
                }else if (relabundfile != "") { 
-                       getSharedFloat(); 
+                       InputData* input = new InputData(relabundfile, "relabund");
+                       getSharedFloat(input); 
+                       delete input;
+                       
                        if (m->control_pressed) {  for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  } return 0; }
                        if (lookupFloat[0] == NULL) { m->mothurOut("[ERROR] reading relabund file."); m->mothurOutEndLine(); return 0; }
                        
-                       if (pickedGroups) { eliminateZeroOTUS(lookupFloat); }
-                       
                }else if (metadatafile != "") { 
                        getMetadata();  //reads metadata file and store in lookupFloat, saves column headings in metadataLabels for later
                        if (m->control_pressed) {  for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  } return 0; }
@@ -612,89 +611,12 @@ int CorrAxesCommand::calcKendall(map<string, vector<float> >& axes, ofstream& ou
        }
 }
 //**********************************************************************************************************************
-int CorrAxesCommand::getShared(){
+int CorrAxesCommand::getSharedFloat(InputData* input){
        try {
-               InputData* input = new InputData(sharedfile, "sharedfile");
-               lookup = input->getSharedRAbundVectors();
-               string lastLabel = lookup[0]->getLabel();
-               
-               if (label == "") { label = lastLabel; delete input; return 0; }
-               
-               //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
-               set<string> labels; labels.insert(label);
-               set<string> processedLabels;
-               set<string> userLabels = labels;
-               
-               //as long as you are not at the end of the file or done wih the lines you want
-               while((lookup[0] != NULL) && (userLabels.size() != 0)) {
-                       if (m->control_pressed) {  delete input; return 0;  }
-                       
-                       if(labels.count(lookup[0]->getLabel()) == 1){
-                               processedLabels.insert(lookup[0]->getLabel());
-                               userLabels.erase(lookup[0]->getLabel());
-                               break;
-                       }
-                       
-                       if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
-                               string saveLabel = lookup[0]->getLabel();
-                               
-                               for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
-                               lookup = input->getSharedRAbundVectors(lastLabel);
-                               
-                               processedLabels.insert(lookup[0]->getLabel());
-                               userLabels.erase(lookup[0]->getLabel());
-                               
-                               //restore real lastlabel to save below
-                               lookup[0]->setLabel(saveLabel);
-                               break;
-                       }
-                       
-                       lastLabel = lookup[0]->getLabel();                      
-                       
-                       //get next line to process
-                       //prevent memory leak
-                       for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
-                       lookup = input->getSharedRAbundVectors();
-               }
-               
-               
-               if (m->control_pressed) { delete input; return 0;  }
-               
-               //output error messages about any remaining user labels
-               set<string>::iterator it;
-               bool needToRun = false;
-               for (it = userLabels.begin(); it != userLabels.end(); it++) {  
-                       m->mothurOut("Your file does not include the label " + *it); 
-                       if (processedLabels.count(lastLabel) != 1) {
-                               m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
-                               needToRun = true;
-                       }else {
-                               m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
-                       }
-               }
-               
-               //run last label if you need to
-               if (needToRun == true)  {
-                       for (int i = 0; i < lookup.size(); i++) {  if (lookup[i] != NULL) {     delete lookup[i];       } } 
-                       lookup = input->getSharedRAbundVectors(lastLabel);
-               }       
-               
-               delete input;
-               return 0;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "CorrAxesCommand", "getShared"); 
-               exit(1);
-       }
-}
-//**********************************************************************************************************************
-int CorrAxesCommand::getSharedFloat(){
-       try {
-               InputData* input = new InputData(relabundfile, "relabund");
                lookupFloat = input->getSharedRAbundFloatVectors();
                string lastLabel = lookupFloat[0]->getLabel();
                
-               if (label == "") { label = lastLabel; delete input; return 0; }
+               if (label == "") { label = lastLabel;  return 0; }
                
                //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
                set<string> labels; labels.insert(label);
@@ -704,7 +626,7 @@ int CorrAxesCommand::getSharedFloat(){
                //as long as you are not at the end of the file or done wih the lines you want
                while((lookupFloat[0] != NULL) && (userLabels.size() != 0)) {
                        
-                       if (m->control_pressed) {  delete input; return 0;  }
+                       if (m->control_pressed) {  return 0;  }
                        
                        if(labels.count(lookupFloat[0]->getLabel()) == 1){
                                processedLabels.insert(lookupFloat[0]->getLabel());
@@ -735,7 +657,7 @@ int CorrAxesCommand::getSharedFloat(){
                }
                
                
-               if (m->control_pressed) { delete input; return 0;  }
+               if (m->control_pressed) { return 0;  }
                
                //output error messages about any remaining user labels
                set<string>::iterator it;
@@ -756,7 +678,6 @@ int CorrAxesCommand::getSharedFloat(){
                        lookupFloat = input->getSharedRAbundFloatVectors(lastLabel);
                }       
                
-               delete input;
                return 0;
        }
        catch(exception& e) {
@@ -764,43 +685,6 @@ int CorrAxesCommand::getSharedFloat(){
                exit(1);
        }
 }
-/*****************************************************************/
-int CorrAxesCommand::convertToRelabund(){
-       try {
-               
-               vector<SharedRAbundFloatVector*> newLookup;
-               for (int i = 0; i < lookup.size(); i++) {
-                       SharedRAbundFloatVector* temp = new SharedRAbundFloatVector();
-                       temp->setLabel(lookup[i]->getLabel());
-                       temp->setGroup(lookup[i]->getGroup());
-                       newLookup.push_back(temp);
-               }
-               
-               for (int i = 0; i < lookup.size(); i++) {
-                       
-                       for (int j = 0; j < lookup[i]->getNumBins(); j++) {
-                               
-                               if (m->control_pressed) { return 0; }
-                               
-                               int abund = lookup[i]->getAbundance(j);
-                               
-                               float relabund = abund / (float) lookup[i]->getNumSeqs();
-                               
-                               newLookup[i]->push_back(relabund, lookup[i]->getGroup());
-                       }
-               }
-               
-               if (pickedGroups) { eliminateZeroOTUS(newLookup); }
-               
-               lookupFloat = newLookup;
-               
-               return 0;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "CorrAxesCommand", "convertToRelabund"); 
-               exit(1);
-       }
-}
 //**********************************************************************************************************************
 int CorrAxesCommand::eliminateZeroOTUS(vector<SharedRAbundFloatVector*>& thislookup) {
        try {
index e02c52a002b338f4f8d255b2daccbc571d16651b..8547b0fa1978a6d5477b9d393d0846455a3ad00a 100644 (file)
@@ -11,7 +11,6 @@
  */
 
 #include "command.hpp"
-#include "sharedrabundvector.h"
 #include "sharedrabundfloatvector.h"
 #include "inputdata.h"
 
@@ -47,14 +46,11 @@ private:
        
        vector<string> outputNames, Groups;
        map<string, vector<string> > outputTypes;
-       vector<SharedRAbundVector*> lookup;
        vector<SharedRAbundFloatVector*> lookupFloat;
        vector<string> metadataLabels;
        
-       int getShared();
-       int getSharedFloat();
+       int getSharedFloat(InputData*);
        int getMetadata();
-       int convertToRelabund();
        int eliminateZeroOTUS(vector<SharedRAbundFloatVector*>&);
        map<string, vector<float> > readAxes();
        int calcPearson(map<string, vector<float> >&, ofstream&);
index a5f96eb7f497691d3f2e0a58e976b158cc27d831..3ecd407bb35af63aa4a88f31722c469c219bc0a2 100644 (file)
@@ -16,6 +16,7 @@ class SharedListVector;
 class SharedOrderVector;
 class SharedSAbundVector;
 class SharedRAbundVector;
+class SharedRAbundFloatVector;
 
 class DataVector {
        
index 8c28c47b220e1554972d3cdb97bd2218af4f1c77..08e9228b215819f5e19e4d697137384294392422 100644 (file)
@@ -275,8 +275,6 @@ int GetRelAbundCommand::execute(){
 
 int GetRelAbundCommand::getRelAbundance(vector<SharedRAbundVector*>& thisLookUp, ofstream& out){
        try {
-               if (pickedGroups) { eliminateZeroOTUS(thisLookUp); }
-
                
                 for (int i = 0; i < thisLookUp.size(); i++) {
                        out << thisLookUp[i]->getLabel() << '\t' << thisLookUp[i]->getGroup() << '\t' << thisLookUp[i]->getNumBins() << '\t';
@@ -320,48 +318,5 @@ int GetRelAbundCommand::getRelAbundance(vector<SharedRAbundVector*>& thisLookUp,
        }
 }
 //**********************************************************************************************************************
-int GetRelAbundCommand::eliminateZeroOTUS(vector<SharedRAbundVector*>& thislookup) {
-       try {
-               
-               vector<SharedRAbundVector*> newLookup;
-               for (int i = 0; i < thislookup.size(); i++) {
-                       SharedRAbundVector* temp = new SharedRAbundVector();
-                       temp->setLabel(thislookup[i]->getLabel());
-                       temp->setGroup(thislookup[i]->getGroup());
-                       newLookup.push_back(temp);
-               }
-               
-               //for each bin
-               for (int i = 0; i < thislookup[0]->getNumBins(); i++) {
-                       if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
-               
-                       //look at each sharedRabund and make sure they are not all zero
-                       bool allZero = true;
-                       for (int j = 0; j < thislookup.size(); j++) {
-                               if (thislookup[j]->getAbundance(i) != 0) { allZero = false;  break;  }
-                       }
-                       
-                       //if they are not all zero add this bin
-                       if (!allZero) {
-                               for (int j = 0; j < thislookup.size(); j++) {
-                                       newLookup[j]->push_back(thislookup[j]->getAbundance(i), thislookup[j]->getGroup());
-                               }
-                       }
-               }
-
-               for (int j = 0; j < thislookup.size(); j++) {  delete thislookup[j];  }
-
-               thislookup = newLookup;
-               
-               return 0;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "GetRelAbundCommand", "eliminateZeroOTUS");
-               exit(1);
-       }
-}
-
-//**********************************************************************************************************************
 
 
index 3a5f6ad094a16bf2e43c4521c2f1d35436352e60..90d5f9a2ceb9c3164640d8482e88599324557495 100644 (file)
@@ -43,7 +43,6 @@ private:
        map<string, vector<string> > outputTypes;
        
        int getRelAbundance(vector<SharedRAbundVector*>&, ofstream&);
-       int eliminateZeroOTUS(vector<SharedRAbundVector*>& thislookup);
 
 };
 
index 5d1542739b0b0b6d6e730a5e84654468e3ca819f..283fb7edc33856613612e10b0d293d0eea1f056d 100644 (file)
@@ -519,6 +519,15 @@ vector<SharedRAbundFloatVector*> InputData::getSharedRAbundFloatVectors(){
                                if (SharedRelAbund != NULL) {
                                        return SharedRelAbund->getSharedRAbundFloatVectors();
                                }
+                       }else if (format == "sharedfile")  {
+                               SharedRAbundVector* SharedRAbund = new SharedRAbundVector(fileHandle);
+                               if (SharedRAbund != NULL) {
+                                       vector<SharedRAbundVector*> lookup = SharedRAbund->getSharedRAbundVectors(); 
+                                       vector<SharedRAbundFloatVector*> lookupFloat = SharedRAbund->getSharedRAbundFloatVectors(lookup); 
+                                       for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } lookup.clear();
+                                       return lookupFloat;  
+                               }
+                                               
                        }
                        m->gobble(fileHandle);
                }
@@ -560,8 +569,32 @@ vector<SharedRAbundFloatVector*> InputData::getSharedRAbundFloatVectors(string l
                                        }else{  break;  }
                                        m->gobble(in);
                                }
-                       }
+                       }else if (format == "sharedfile")  {
+                               while (in.eof() != true) {
+                                       
+                                       SharedRAbundVector* SharedRAbund = new SharedRAbundVector(in);
+                                       if (SharedRAbund != NULL) {
+                                               thisLabel = SharedRAbund->getLabel();
+                                               
+                                               //if you are at the last label
+                                               if (thisLabel == label) {  
+                                                       in.close(); 
+                                                       vector<SharedRAbundVector*> lookup = SharedRAbund->getSharedRAbundVectors(); 
+                                                       vector<SharedRAbundFloatVector*> lookupFloat = SharedRAbund->getSharedRAbundFloatVectors(lookup); 
+                                                       for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } lookup.clear();
+                                                       return lookupFloat;  
+                                               }else {
+                                                       //so you don't loose this memory
+                                                       vector<SharedRAbundVector*> lookup = SharedRAbund->getSharedRAbundVectors(); 
+                                                       for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } lookup.clear();
+                                                       delete SharedRAbund;
+                                               }
+                                       }else{  break;  }
+                                       m->gobble(in);
+                               }
+                       }       
                }
+               
                                
                //this is created to signal to calling function that the input file is at eof
                vector<SharedRAbundFloatVector*> null;  null.push_back(NULL);
index e015b5fa1472d9e587a8f24fe008f6af4f049c84..5de5a71dc35c1ab8888ff28ab8d7e92107f8cc72 100644 (file)
@@ -27,7 +27,7 @@ vector<vector<double> > LinearAlgebra::matrix_mult(vector<vector<double> > first
                
                product.resize(first_rows);
                for(int i=0;i<first_rows;i++){
-                       product[i].resize(second_cols);
+                       product[i].resize(first_cols);
                }
                
                for(int i=0;i<first_rows;i++){
@@ -234,5 +234,113 @@ int LinearAlgebra::qtli(vector<double>& d, vector<double>& e, vector<vector<doub
        }
 }
 /*********************************************************************************************************************************/
+vector< vector<double> > LinearAlgebra::calculateEuclidianDistance(vector< vector<double> >& axes, int dimensions){
+       try {
+               //make square matrix
+               vector< vector<double> > dists; dists.resize(axes.size());
+               for (int i = 0; i < dists.size(); i++) {  dists[i].resize(axes.size(), 0.0); }
+               
+               if (dimensions == 1) { //one dimension calc = abs(x-y)
+                       
+                       for (int i = 0; i < dists.size(); i++) {
+                               
+                               if (m->control_pressed) { return dists; }
+                               
+                               for (int j = 0; j < i; j++) {
+                                       dists[i][j] = abs(axes[i][0] - axes[j][0]);
+                                       dists[j][i] = dists[i][j];
+                               }
+                       }
+                       
+               }else if (dimensions == 2) { //two dimension calc = sqrt ((x1 - y1)^2 + (x2 - y2)^2)
+                       
+                       for (int i = 0; i < dists.size(); i++) {
+                               
+                               if (m->control_pressed) { return dists; }
+                               
+                               for (int j = 0; j < i; j++) {
+                                       double firstDim = ((axes[i][0] - axes[j][0]) * (axes[i][0] - axes[j][0]));
+                                       double secondDim = ((axes[i][1] - axes[j][1]) * (axes[i][1] - axes[j][1]));
+                                       
+                                       dists[i][j] = sqrt((firstDim + secondDim));
+                                       dists[j][i] = dists[i][j];
+                               }
+                       }
+                       
+               }else if (dimensions == 3) { //two dimension calc = sqrt ((x1 - y1)^2 + (x2 - y2)^2 + (x3 - y3)^2)
+                       
+                       for (int i = 0; i < dists.size(); i++) {
+                               
+                               if (m->control_pressed) { return dists; }
+                               
+                               for (int j = 0; j < i; j++) {
+                                       double firstDim = ((axes[i][0] - axes[j][0]) * (axes[i][0] - axes[j][0]));
+                                       double secondDim = ((axes[i][1] - axes[j][1]) * (axes[i][1] - axes[j][1]));
+                                       double thirdDim = ((axes[i][2] - axes[j][2]) * (axes[i][2] - axes[j][2]));
+                                       
+                                       dists[i][j] = sqrt((firstDim + secondDim + thirdDim));
+                                       dists[j][i] = dists[i][j];
+                               }
+                       }
+                       
+               }else { m->mothurOut("[ERROR]: too many dimensions, aborting."); m->mothurOutEndLine(); m->control_pressed = true; }
+               
+               return dists;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "LinearAlgebra", "calculateEuclidianDistance");
+               exit(1);
+       }
+}
+/*********************************************************************************************************************************/
+double LinearAlgebra::calcPearson(vector< vector<double> >& euclidDists, vector< vector<double> >& userDists){
+       try {
+               
+               //find average for - X
+               vector<float> averageEuclid; averageEuclid.resize(euclidDists.size(), 0.0);
+               for (int i = 0; i < euclidDists.size(); i++) {
+                       for (int j = 0; j < euclidDists[i].size(); j++) {
+                               averageEuclid[i] += euclidDists[i][j];  
+                       }
+               }
+               for (int i = 0; i < averageEuclid.size(); i++) {  averageEuclid[i] = averageEuclid[i] / (float) euclidDists.size();   }
+               
+               //find average for - Y
+               vector<float> averageUser; averageUser.resize(userDists.size(), 0.0);
+               for (int i = 0; i < userDists.size(); i++) {
+                       for (int j = 0; j < userDists[i].size(); j++) {
+                               averageUser[i] += userDists[i][j];  
+                       }
+               }
+               for (int i = 0; i < averageUser.size(); i++) {  averageUser[i] = averageUser[i] / (float) userDists.size();  }
+               
+               double numerator = 0.0;
+               double denomTerm1 = 0.0;
+               double denomTerm2 = 0.0;
+               
+               for (int i = 0; i < euclidDists.size(); i++) {
+                       
+                       for (int k = 0; k < i; k++) {
+                               
+                               float Yi = userDists[i][k];
+                               float Xi = euclidDists[i][k];
+                               
+                               numerator += ((Xi - averageEuclid[k]) * (Yi - averageUser[k]));
+                               denomTerm1 += ((Xi - averageEuclid[k]) * (Xi - averageEuclid[k]));
+                               denomTerm2 += ((Yi - averageUser[k]) * (Yi - averageUser[k]));
+                       }
+               }
+               
+               double denom = (sqrt(denomTerm1) * sqrt(denomTerm2));
+               double r = numerator / denom;
+               
+               return r;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "LinearAlgebra", "calculateEuclidianDistance");
+               exit(1);
+       }
+}
+/*********************************************************************************************************************************/
 
 
index 2acf5cb5584a59d9b6ef87517ce3ad53a839bf56..286649374e1bd5a4768c257f564e86fb8ca14567 100644 (file)
@@ -21,7 +21,8 @@ public:
        vector<vector<double> > matrix_mult(vector<vector<double> >, vector<vector<double> >);
        int tred2(vector<vector<double> >&, vector<double>&, vector<double>&);
        int qtli(vector<double>&, vector<double>&, vector<vector<double> >&);
-       
+       vector< vector<double> > calculateEuclidianDistance(vector<vector<double> >&, int);
+       double calcPearson(vector<vector<double> >&, vector<vector<double> >&);
        
 private:
        MothurOut* m;
index a734ce3a6c9e01a89f6d99e1ff2bd1236b1c944a..81715e0abe84b485b96eb9997120dbe127dd6a5e 100644 (file)
@@ -347,7 +347,6 @@ int MetaStatsCommand::execute(){
 
 int MetaStatsCommand::process(vector<SharedRAbundVector*>& thisLookUp){
        try {
-               if (pickedGroups) { eliminateZeroOTUS(thisLookUp); }
                
                #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
                                if(processors == 1){
@@ -466,45 +465,3 @@ int MetaStatsCommand::driver(int start, int num, vector<SharedRAbundVector*>& th
        }
 }
 //**********************************************************************************************************************
-int MetaStatsCommand::eliminateZeroOTUS(vector<SharedRAbundVector*>& thislookup) {
-       try {
-               
-               vector<SharedRAbundVector*> newLookup;
-               for (int i = 0; i < thislookup.size(); i++) {
-                       SharedRAbundVector* temp = new SharedRAbundVector();
-                       temp->setLabel(thislookup[i]->getLabel());
-                       temp->setGroup(thislookup[i]->getGroup());
-                       newLookup.push_back(temp);
-               }
-               
-               //for each bin
-               for (int i = 0; i < thislookup[0]->getNumBins(); i++) {
-                       if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
-               
-                       //look at each sharedRabund and make sure they are not all zero
-                       bool allZero = true;
-                       for (int j = 0; j < thislookup.size(); j++) {
-                               if (thislookup[j]->getAbundance(i) != 0) { allZero = false;  break;  }
-                       }
-                       
-                       //if they are not all zero add this bin
-                       if (!allZero) {
-                               for (int j = 0; j < thislookup.size(); j++) {
-                                       newLookup[j]->push_back(thislookup[j]->getAbundance(i), thislookup[j]->getGroup());
-                               }
-                       }
-               }
-
-               for (int j = 0; j < thislookup.size(); j++) {  delete thislookup[j];  }
-
-               thislookup = newLookup;
-               
-               return 0;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "MetaStatsCommand", "eliminateZeroOTUS");
-               exit(1);
-       }
-}
-//**********************************************************************************************************************
index a38f3a7951c9629b680b4799c4a9ea00ea8d0270..8c633b5417cbe07dc96827cf80c8e0b5522c0fad 100644 (file)
@@ -54,7 +54,6 @@ private:
        float threshold;
        
        int process(vector<SharedRAbundVector*>&);
-       int eliminateZeroOTUS(vector<SharedRAbundVector*>&);
        int driver(int, int, vector<SharedRAbundVector*>&);
 };
 
diff --git a/pcacommand.cpp b/pcacommand.cpp
new file mode 100644 (file)
index 0000000..0842c53
--- /dev/null
@@ -0,0 +1,405 @@
+/*
+ *  pcacommand.cpp
+ *  mothur
+ *
+ *  Created by westcott on 1/7/11.
+ *  Copyright 2011 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "pcacommand.h"
+#include "inputdata.h"
+
+//**********************************************************************************************************************
+vector<string> PCACommand::getValidParameters(){       
+       try {
+               string Array[] =  {"label", "groups","metric","outputdir","inputdir"};
+               vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
+               return myArray;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "PCACommand", "getValidParameters");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+PCACommand::PCACommand(){      
+       try {
+               abort = true;
+               //initialize outputTypes
+               vector<string> tempOutNames;
+               outputTypes["pca"] = tempOutNames;
+               outputTypes["loadings"] = tempOutNames;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "PCACommand", "PCACommand");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+vector<string> PCACommand::getRequiredParameters(){    
+       try {
+               vector<string> myArray;
+               return myArray;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "PCACommand", "getRequiredParameters");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+vector<string> PCACommand::getRequiredFiles(){ 
+       try {
+               string Array[] =  {"shared","relabund","or"};
+               vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
+               return myArray;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "PCACommand", "getRequiredFiles");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+
+PCACommand::PCACommand(string option)  {
+       try {
+               abort = false;
+               
+               globaldata = GlobalData::getInstance();
+               
+               //allow user to run help
+               if(option == "help") { help(); abort = true; }
+               
+               else {
+                       //valid paramters for this command
+                       string Array[] =  {"label","groups","metric","outputdir", "inputdir"};
+                       vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
+                       
+                       OptionParser parser(option);
+                       map<string, string> parameters = parser. getParameters();
+                       
+                       ValidParameters validParameter;
+                       map<string, string>::iterator it;
+                       
+                       //check to make sure all parameters are valid for command
+                       for (it = parameters.begin(); it != parameters.end(); it++) { 
+                               if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
+                       }
+                       //if the user changes the input directory command factory will send this info to us in the output parameter 
+                       string inputDir = validParameter.validFile(parameters, "inputdir", false);              if (inputDir == "not found"){   inputDir = "";          }
+                       
+                       //initialize outputTypes
+                       vector<string> tempOutNames;
+                       outputTypes["pca"] = tempOutNames;
+                       outputTypes["loadings"] = tempOutNames;
+                       
+                       //make sure the user has already run the read.otu command
+                       if ((globaldata->getSharedFile() == "") && (globaldata->getRelAbundFile() == "")) {
+                               m->mothurOut("You must read a list and a group, shared or relabund file before you can use the pca command."); m->mothurOutEndLine(); abort = true; 
+                       }
+                       
+                       if (globaldata->getSharedFile() != "")          { mode = "shared"; inputFile = globaldata->getSharedFile();             }
+                       if (globaldata->getRelAbundFile() != "")        { mode = "relabund"; inputFile = globaldata->getRelAbundFile(); }
+                       
+                       //if the user changes the output directory command factory will send this info to us in the output parameter 
+                       outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
+                               outputDir = ""; 
+                               outputDir += m->hasPath(inputFile); //if user entered a file with a path then preserve it       
+                       }
+                                               
+                       string temp = validParameter.validFile(parameters, "metric", false);    if (temp == "not found"){       temp = "T";                             }
+                       metric = m->isTrue(temp); 
+                       
+                       label = validParameter.validFile(parameters, "label", false);                   
+                       if (label == "not found") { label = ""; m->mothurOut("You did not provide a label, I will use the first label in your inputfile."); m->mothurOutEndLine();  }   
+                       else { m->splitAtDash(label, labels); }
+                       
+                       groups = validParameter.validFile(parameters, "groups", false);                 
+                       if (groups == "not found") { groups = "";  }
+                       else { m->splitAtDash(groups, Groups);  }                       
+                       globaldata->Groups = Groups;                    
+                       
+               }
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "PCACommand", "PCACommand");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+void PCACommand::help(){
+       try {
+               m->mothurOut("The pca command can only be run after a successful read.otu command."); m->mothurOutEndLine();
+               m->mothurOut("The pca command parameters are label, groups and metric. No parameters are required."); m->mothurOutEndLine();
+               m->mothurOut("The label parameter is used to analyze specific labels in your input. Default is the first label in your shared or relabund file. Multpile labels may be separated by dashes.\n");
+               m->mothurOut("The groups parameter allows you to specify which groups you would like analyzed. Groupnames are separated by dashes.\n");
+               m->mothurOut("The metric parameter allows indicate you if would like the pearson correlation coefficient calculated. Default=True"); m->mothurOutEndLine();
+               m->mothurOut("Example pca(groups=yourGroups).\n");
+               m->mothurOut("Example pca(groups=A-B-C).\n");
+               m->mothurOut("Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n\n");
+       }
+       catch(exception& e) {
+               m->errorOut(e, "PCACommand", "help");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+PCACommand::~PCACommand(){}
+//**********************************************************************************************************************
+int PCACommand::execute(){
+       try {
+               
+               if (abort == true) { return 0; }
+               
+               cout.setf(ios::fixed, ios::floatfield);
+               cout.setf(ios::showpoint);
+               cerr.setf(ios::fixed, ios::floatfield);
+               cerr.setf(ios::showpoint);
+               
+               //get first line of shared file
+               vector< vector<double> > matrix;
+               InputData* input;
+               if (mode == "shared")                   {  
+                       input = new InputData(inputFile, "sharedfile");
+               }else if (mode == "relabund")   { 
+                       input = new InputData(inputFile, "relabund");
+               }else {  m->mothurOut("[ERROR]: filetype not recognized."); m->mothurOutEndLine();  return 0; }
+               
+               vector<SharedRAbundFloatVector*> lookupFloat = input->getSharedRAbundFloatVectors();
+               string lastLabel = lookupFloat[0]->getLabel();
+                       
+               set<string> processedLabels;
+               set<string> userLabels = labels;
+               
+               //if the user gave no labels, then use the first one read
+               if (labels.size() == 0) { 
+                       label = lastLabel;  
+                       
+                       process(lookupFloat);
+               }
+               
+               //as long as you are not at the end of the file or done wih the lines you want
+               while((lookupFloat[0] != NULL) && (userLabels.size() != 0)) {
+                       
+                       if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) {       remove(outputNames[i].c_str());  } delete input; for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  }  lookupFloat.clear(); return 0;  }
+                       
+                       if(labels.count(lookupFloat[0]->getLabel()) == 1){
+                               processedLabels.insert(lookupFloat[0]->getLabel());
+                               userLabels.erase(lookupFloat[0]->getLabel());
+                               
+                               process(lookupFloat);
+                       }
+                       
+                       if ((m->anyLabelsToProcess(lookupFloat[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
+                               string saveLabel = lookupFloat[0]->getLabel();
+                               
+                               for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  }  lookupFloat.clear();
+                               lookupFloat = input->getSharedRAbundFloatVectors(lastLabel);
+                               
+                               process(lookupFloat);
+                               
+                               processedLabels.insert(lookupFloat[0]->getLabel());
+                               userLabels.erase(lookupFloat[0]->getLabel());
+                               
+                               //restore real lastlabel to save below
+                               lookupFloat[0]->setLabel(saveLabel);
+                       }
+                       
+                       lastLabel = lookupFloat[0]->getLabel();                 
+                       
+                       //get next line to process
+                       //prevent memory leak
+                       for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  } lookupFloat.clear();
+                       lookupFloat = input->getSharedRAbundFloatVectors();
+               }
+               
+               
+               if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str());  } delete input; for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  } lookupFloat.clear(); return 0;  }
+               
+               //output error messages about any remaining user labels
+               set<string>::iterator it;
+               bool needToRun = false;
+               for (it = userLabels.begin(); it != userLabels.end(); it++) {  
+                       m->mothurOut("Your file does not include the label " + *it); 
+                       if (processedLabels.count(lastLabel) != 1) {
+                               m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
+                               needToRun = true;
+                       }else {
+                               m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
+                       }
+               }
+               
+               //run last label if you need to
+               if (needToRun == true)  {
+                       for (int i = 0; i < lookupFloat.size(); i++) {  if (lookupFloat[i] != NULL) {   delete lookupFloat[i];  } }  lookupFloat.clear();
+                       lookupFloat = input->getSharedRAbundFloatVectors(lastLabel);
+                       
+                       process(lookupFloat);
+                       
+                       for (int i = 0; i < lookupFloat.size(); i++) {  if (lookupFloat[i] != NULL) {   delete lookupFloat[i];  } } lookupFloat.clear();
+               }       
+               
+               for (int i = 0; i < lookupFloat.size(); i++) {  if (lookupFloat[i] != NULL) {   delete lookupFloat[i];  } } lookupFloat.clear();
+               delete input;
+               
+               if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str());  } return 0; }
+               
+               m->mothurOutEndLine();
+               m->mothurOut("Output File Names: "); m->mothurOutEndLine();
+               for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
+               m->mothurOutEndLine();
+               
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "PCACommand", "execute");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+vector< vector<double> > PCACommand::createMatrix(vector<SharedRAbundFloatVector*> lookupFloat){
+       try {
+               vector< vector<double> > matrix; matrix.resize(lookupFloat.size());
+               
+               //fill matrix with shared files relative abundances
+               for (int i = 0; i < lookupFloat.size(); i++) {
+                       for (int j = 0; j < lookupFloat[i]->getNumBins(); j++) {
+                               matrix[i].push_back(lookupFloat[i]->getAbundance(j));
+                       }
+               }
+               
+               vector< vector<double> > transposeMatrix; transposeMatrix.resize(matrix[0].size());
+               for (int i = 0; i < transposeMatrix.size(); i++) {
+                       for (int j = 0; j < matrix.size(); j++) {
+                               transposeMatrix[i].push_back(matrix[j][i]);
+                       }
+               }
+               
+               matrix = linearCalc.matrix_mult(matrix, transposeMatrix);
+               
+               return matrix;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "PCACommand", "createMatrix");   
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+int PCACommand::process(vector<SharedRAbundFloatVector*>& lookupFloat){
+       try {
+               m->mothurOut("\nProcessing " + lookupFloat[0]->getLabel()); m->mothurOutEndLine();
+               
+               vector< vector<double> > matrix; matrix.resize(lookupFloat.size());
+               
+               //fill matrix with shared files relative abundances
+               for (int i = 0; i < lookupFloat.size(); i++) {
+                       for (int j = 0; j < lookupFloat[i]->getNumBins(); j++) {
+                               matrix[i].push_back(lookupFloat[i]->getAbundance(j));
+                       }
+               }
+               
+               vector< vector<double> > transposeMatrix; transposeMatrix.resize(matrix[0].size());
+               for (int i = 0; i < transposeMatrix.size(); i++) {
+                       for (int j = 0; j < matrix.size(); j++) {
+                               transposeMatrix[i].push_back(matrix[j][i]);
+                       }
+               }
+               
+               matrix = linearCalc.matrix_mult(matrix, transposeMatrix);               
+               
+               double offset = 0.0000;
+               vector<double> d;
+               vector<double> e;
+               vector<vector<double> > G = matrix;
+               vector<vector<double> > copy_G;
+                       
+               for(int count=0;count<2;count++){
+                       linearCalc.tred2(G, d, e);                              if (m->control_pressed) { return 0; }
+                       linearCalc.qtli(d, e, G);                               if (m->control_pressed) { return 0; }
+                       offset = d[d.size()-1];
+                       if(offset > 0.0) break;
+               } 
+               
+               if (m->control_pressed) { return 0; }
+               
+               string fbase = outputDir + m->getRootName(m->getSimpleName(inputFile));
+               string outputFileName = fbase + lookupFloat[0]->getLabel();
+               output(outputFileName, globaldata->Groups, G, d);
+               
+               if (metric) {   
+                       
+                       for (int i = 1; i < 4; i++) {
+                               
+                               vector< vector<double> > EuclidDists = linearCalc.calculateEuclidianDistance(G, i); //G is the pcoa file
+                               
+                               if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str());  } return 0; }
+                               
+                               double corr = linearCalc.calcPearson(EuclidDists, matrix); //G is the pcoa file, D is the users distance matrix
+                               
+                               m->mothurOut("Pearson's coefficient using " + toString(i) + " axis: " + toString(corr)); m->mothurOutEndLine();
+                               
+                               if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str());  } return 0; }
+                       }
+               }
+               
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "PCACommand", "process");        
+               exit(1);
+       }
+}
+/*********************************************************************************************************************************/
+
+void PCACommand::output(string fnameRoot, vector<string> name_list, vector<vector<double> >& G, vector<double> d) {
+       try {
+               int rank = name_list.size();
+               double dsum = 0.0000;
+               for(int i=0;i<rank;i++){
+                       dsum += d[i];
+                       for(int j=0;j<rank;j++){
+                               if(d[j] >= 0)   {       G[i][j] *= pow(d[j],0.5);       }
+                               else                    {       G[i][j] = 0.00000;                      }
+                       }
+               }
+               
+               ofstream pcaData((fnameRoot+".pca").c_str(), ios::trunc);
+               pcaData.setf(ios::fixed, ios::floatfield);
+               pcaData.setf(ios::showpoint);   
+               outputNames.push_back(fnameRoot+".pca");
+               outputTypes["pca"].push_back(fnameRoot+".pca");
+               
+               ofstream pcaLoadings((fnameRoot+".pca.loadings").c_str(), ios::trunc);
+               pcaLoadings.setf(ios::fixed, ios::floatfield);
+               pcaLoadings.setf(ios::showpoint);
+               outputNames.push_back(fnameRoot+".pca.loadings");
+               outputTypes["loadings"].push_back(fnameRoot+".pca.loadings");   
+               
+               pcaLoadings << "axis\tloading\n";
+               for(int i=0;i<rank;i++){
+                       pcaLoadings << i+1 << '\t' << d[i] * 100.0 / dsum << endl;
+               }
+               
+               pcaData << "group";
+               for(int i=0;i<rank;i++){
+                       pcaData << '\t' << "axis" << i+1;
+               }
+               pcaData << endl;
+               
+               for(int i=0;i<rank;i++){
+                       pcaData << name_list[i] << '\t';
+                       for(int j=0;j<rank;j++){
+                               pcaData << G[i][j] << '\t';
+                       }
+                       pcaData << endl;
+               }
+       }
+       catch(exception& e) {
+               m->errorOut(e, "PCACommand", "output");
+               exit(1);
+       }
+}
+/*********************************************************************************************************************************/
+
+
diff --git a/pcacommand.h b/pcacommand.h
new file mode 100644 (file)
index 0000000..cf4348c
--- /dev/null
@@ -0,0 +1,52 @@
+#ifndef PCACOMMAND_H
+#define PCACOMMAND_H
+
+/*
+ *  pcacommand.h
+ *  mothur
+ *
+ *  Created by westcott on 1/7/11.
+ *  Copyright 2011 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "command.hpp"
+#include "linearalgebra.h"
+#include "globaldata.hpp"
+
+
+/*****************************************************************/
+class PCACommand : public Command {
+       
+public:
+       PCACommand(string);     
+       PCACommand();
+       ~PCACommand();
+       vector<string> getRequiredParameters();
+       vector<string> getValidParameters();
+       vector<string> getRequiredFiles();
+       map<string, vector<string> > getOutputFiles() { return outputTypes; }
+       int execute();  
+       void help();
+       
+private:
+       GlobalData* globaldata;
+       
+       bool abort, metric;
+       string outputDir, mode, inputFile, label, groups;
+       vector<string> outputNames, Groups;
+       set<string> labels;
+       map<string, vector<string> > outputTypes;
+       LinearAlgebra linearCalc;
+       
+       vector< vector<double> > createMatrix(vector<SharedRAbundFloatVector*>);
+       int process(vector<SharedRAbundFloatVector*>&);
+       void output(string, vector<string>, vector<vector<double> >&, vector<double>);
+       
+};
+
+/*****************************************************************/
+
+#endif
+
+
index 9375144bcb342faf364a38eee6eca63226099659..d1d919c93d5f43086658a86cf79f11a09de8ae9f 100644 (file)
@@ -30,7 +30,6 @@ PCOACommand::PCOACommand(){
                vector<string> tempOutNames;
                outputTypes["pcoa"] = tempOutNames;
                outputTypes["loadings"] = tempOutNames;
-               outputTypes["corr"] = tempOutNames;
        }
        catch(exception& e) {
                m->errorOut(e, "PCOACommand", "PCOACommand");
@@ -102,7 +101,6 @@ PCOACommand::PCOACommand(string option)  {
                        vector<string> tempOutNames;
                        outputTypes["pcoa"] = tempOutNames;
                        outputTypes["loadings"] = tempOutNames;
-                       outputTypes["corr"] = tempOutNames;
                        
                        //required parameters
                        phylipfile = validParameter.validFile(parameters, "phylip", true);
@@ -192,11 +190,11 @@ int PCOACommand::execute(){
                        
                        for (int i = 1; i < 4; i++) {
                                                        
-                               vector< vector<double> > EuclidDists = calculateEuclidianDistance(G, i); //G is the pcoa file
+                               vector< vector<double> > EuclidDists = linearCalc.calculateEuclidianDistance(G, i); //G is the pcoa file
                                
                                if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str());  } return 0; }
                                
-                               double corr = calcPearson(EuclidDists, D); //G is the pcoa file, D is the users distance matrix
+                               double corr = linearCalc.calcPearson(EuclidDists, D); //G is the pcoa file, D is the users distance matrix
                                
                                m->mothurOut("Pearson's coefficient using " + toString(i) + " axis: " + toString(corr)); m->mothurOutEndLine();
                                
@@ -217,114 +215,6 @@ int PCOACommand::execute(){
        }
 }
 /*********************************************************************************************************************************/
-vector< vector<double> > PCOACommand::calculateEuclidianDistance(vector< vector<double> >& axes, int dimensions){
-       try {
-               //make square matrix
-               vector< vector<double> > dists; dists.resize(axes.size());
-               for (int i = 0; i < dists.size(); i++) {  dists[i].resize(axes.size(), 0.0); }
-                       
-               if (dimensions == 1) { //one dimension calc = abs(x-y)
-                       
-                       for (int i = 0; i < dists.size(); i++) {
-                               
-                               if (m->control_pressed) { return dists; }
-                               
-                               for (int j = 0; j < i; j++) {
-                                       dists[i][j] = abs(axes[i][0] - axes[j][0]);
-                                       dists[j][i] = dists[i][j];
-                               }
-                       }
-                       
-               }else if (dimensions == 2) { //two dimension calc = sqrt ((x1 - y1)^2 + (x2 - y2)^2)
-                       
-                       for (int i = 0; i < dists.size(); i++) {
-                               
-                               if (m->control_pressed) { return dists; }
-                               
-                               for (int j = 0; j < i; j++) {
-                                       double firstDim = ((axes[i][0] - axes[j][0]) * (axes[i][0] - axes[j][0]));
-                                       double secondDim = ((axes[i][1] - axes[j][1]) * (axes[i][1] - axes[j][1]));
-
-                                       dists[i][j] = sqrt((firstDim + secondDim));
-                                       dists[j][i] = dists[i][j];
-                               }
-                       }
-                       
-               }else if (dimensions == 3) { //two dimension calc = sqrt ((x1 - y1)^2 + (x2 - y2)^2 + (x3 - y3)^2)
-                       
-                       for (int i = 0; i < dists.size(); i++) {
-                               
-                               if (m->control_pressed) { return dists; }
-                               
-                               for (int j = 0; j < i; j++) {
-                                       double firstDim = ((axes[i][0] - axes[j][0]) * (axes[i][0] - axes[j][0]));
-                                       double secondDim = ((axes[i][1] - axes[j][1]) * (axes[i][1] - axes[j][1]));
-                                       double thirdDim = ((axes[i][2] - axes[j][2]) * (axes[i][2] - axes[j][2]));
-                                       
-                                       dists[i][j] = sqrt((firstDim + secondDim + thirdDim));
-                                       dists[j][i] = dists[i][j];
-                               }
-                       }
-                       
-               }else { m->mothurOut("[ERROR]: too many dimensions, aborting."); m->mothurOutEndLine(); m->control_pressed = true; }
-               
-               return dists;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "PCOACommand", "calculateEuclidianDistance");
-               exit(1);
-       }
-}
-/*********************************************************************************************************************************/
-double PCOACommand::calcPearson(vector< vector<double> >& euclidDists, vector< vector<double> >& userDists){
-       try {
-               
-               //find average for - X
-               vector<float> averageEuclid; averageEuclid.resize(euclidDists.size(), 0.0);
-               for (int i = 0; i < euclidDists.size(); i++) {
-                       for (int j = 0; j < euclidDists[i].size(); j++) {
-                               averageEuclid[i] += euclidDists[i][j];  
-                       }
-               }
-               for (int i = 0; i < averageEuclid.size(); i++) {  averageEuclid[i] = averageEuclid[i] / (float) euclidDists.size();   }
-               
-               //find average for - Y
-               vector<float> averageUser; averageUser.resize(userDists.size(), 0.0);
-               for (int i = 0; i < userDists.size(); i++) {
-                       for (int j = 0; j < userDists[i].size(); j++) {
-                               averageUser[i] += userDists[i][j];  
-                       }
-               }
-               for (int i = 0; i < averageUser.size(); i++) {  averageUser[i] = averageUser[i] / (float) userDists.size();  }
-               
-               double numerator = 0.0;
-               double denomTerm1 = 0.0;
-               double denomTerm2 = 0.0;
-               
-               for (int i = 0; i < euclidDists.size(); i++) {
-                       
-                       for (int k = 0; k < i; k++) {
-                               
-                               float Yi = userDists[i][k];
-                               float Xi = euclidDists[i][k];
-                                       
-                               numerator += ((Xi - averageEuclid[k]) * (Yi - averageUser[k]));
-                               denomTerm1 += ((Xi - averageEuclid[k]) * (Xi - averageEuclid[k]));
-                               denomTerm2 += ((Yi - averageUser[k]) * (Yi - averageUser[k]));
-                       }
-               }
-               
-               double denom = (sqrt(denomTerm1) * sqrt(denomTerm2));
-               double r = numerator / denom;
-               
-               return r;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "PCOACommand", "calculateEuclidianDistance");
-               exit(1);
-       }
-}
-/*********************************************************************************************************************************/
 
 void PCOACommand::get_comment(istream& f, char begin, char end){
        try {
index 8bb7c1bdc586798b70fb3ec3f601bf13b5a16d9e..c62b3d668c9a38287769476a598033544d9790a3 100644 (file)
@@ -42,8 +42,6 @@ private:
        void read(string, vector<string>&, vector<vector<double> >&);
        void recenter(double, vector<vector<double> >, vector<vector<double> >&);
        void output(string, vector<string>, vector<vector<double> >&, vector<double>);
-       vector< vector<double> > calculateEuclidianDistance(vector<vector<double> >&, int);
-       double calcPearson(vector<vector<double> >&, vector<vector<double> >&);
        
 };
        
index 52ffbb89b3137cf724ac9ae1adc501f85c2dbe4e..d65cc204cc97bd43941928bad593b515522625e6 100644 (file)
@@ -288,17 +288,21 @@ vector<SharedRAbundFloatVector*> SharedRAbundFloatVector::getSharedRAbundFloatVe
                util = new SharedUtil();
                
                util->setGroups(globaldata->Groups, globaldata->gGroupmap->namesOfGroups);
-
+               
+               bool remove = false;
                for (int i = 0; i < lookup.size(); i++) {
                        //if this sharedrabund is not from a group the user wants then delete it.
                        if (util->isValidGroup(lookup[i]->getGroup(), globaldata->Groups) == false) { 
                                delete lookup[i]; lookup[i] = NULL;
                                lookup.erase(lookup.begin()+i); 
                                i--; 
+                               remove = true;
                        }
                }
                
                delete util;
+               
+               if (remove) { eliminateZeroOTUS(lookup); }
        
                return lookup;
        }
@@ -409,6 +413,47 @@ OrderVector SharedRAbundFloatVector::getOrderVector(map<string,int>* nameMap = N
                exit(1);
        }
 }
-
+//**********************************************************************************************************************
+int SharedRAbundFloatVector::eliminateZeroOTUS(vector<SharedRAbundFloatVector*>& thislookup) {
+       try {
+               
+               vector<SharedRAbundFloatVector*> newLookup;
+               for (int i = 0; i < thislookup.size(); i++) {
+                       SharedRAbundFloatVector* temp = new SharedRAbundFloatVector();
+                       temp->setLabel(thislookup[i]->getLabel());
+                       temp->setGroup(thislookup[i]->getGroup());
+                       newLookup.push_back(temp);
+               }
+               
+               //for each bin
+               for (int i = 0; i < thislookup[0]->getNumBins(); i++) {
+                       if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
+                       
+                       //look at each sharedRabund and make sure they are not all zero
+                       bool allZero = true;
+                       for (int j = 0; j < thislookup.size(); j++) {
+                               if (thislookup[j]->getAbundance(i) != 0) { allZero = false;  break;  }
+                       }
+                       
+                       //if they are not all zero add this bin
+                       if (!allZero) {
+                               for (int j = 0; j < thislookup.size(); j++) {
+                                       newLookup[j]->push_back(thislookup[j]->getAbundance(i), thislookup[j]->getGroup());
+                               }
+                       }
+               }
+               
+               for (int j = 0; j < thislookup.size(); j++) {  delete thislookup[j];  }
+               
+               thislookup = newLookup;
+               
+               return 0;
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SharedRAbundFloatVector", "eliminateZeroOTUS");
+               exit(1);
+       }
+}
 /***********************************************************************/
 
index a3407f48ec39aabed942dff22b91d4691c992539..b0d84916134478516504eba4d583a58d00ac5be0 100644 (file)
@@ -75,6 +75,8 @@ private:
        float numSeqs;
        string group;
        int index;      
+       
+       int eliminateZeroOTUS(vector<SharedRAbundFloatVector*>&);
 };
 
 
index 9276b7bfdc0ae441a883f42ddd0aabcddf1f04bd..a050ab3fe71998b5a8a6abf2b34b0e67f0083808 100644 (file)
@@ -70,8 +70,7 @@ SharedRAbundVector::SharedRAbundVector(ifstream& f) : DataVector(), maxRank(0),
                string holdLabel, nextLabel, groupN;
                individual newguy;
                
-               for (int i = 0; i < lookup.size(); i++) {  delete lookup[i]; lookup[i] = NULL; }
-               lookup.clear();
+               for (int i = 0; i < lookup.size(); i++) {  delete lookup[i]; lookup[i] = NULL; }  lookup.clear();
                
                if (globaldata->saveNextLabel == "") {  f >> label;  }
                else { label = globaldata->saveNextLabel; }
@@ -372,10 +371,12 @@ vector<SharedRAbundVector*> SharedRAbundVector::getSharedRAbundVectors(){
                util = new SharedUtil();
                
                util->setGroups(globaldata->Groups, globaldata->gGroupmap->namesOfGroups);
-
+               
+               bool remove = false;
                for (int i = 0; i < lookup.size(); i++) {
                        //if this sharedrabund is not from a group the user wants then delete it.
                        if (util->isValidGroup(lookup[i]->getGroup(), globaldata->Groups) == false) { 
+                               remove = true;
                                delete lookup[i]; lookup[i] = NULL;
                                lookup.erase(lookup.begin()+i); 
                                i--; 
@@ -383,6 +384,8 @@ vector<SharedRAbundVector*> SharedRAbundVector::getSharedRAbundVectors(){
                }
                
                delete util;
+               
+               if (remove) { eliminateZeroOTUS(lookup); }
        
                return lookup;
        }
@@ -391,6 +394,81 @@ vector<SharedRAbundVector*> SharedRAbundVector::getSharedRAbundVectors(){
                exit(1);
        }
 }
+//**********************************************************************************************************************
+int SharedRAbundVector::eliminateZeroOTUS(vector<SharedRAbundVector*>& thislookup) {
+               try {
+                       
+                       vector<SharedRAbundVector*> newLookup;
+                       for (int i = 0; i < thislookup.size(); i++) {
+                               SharedRAbundVector* temp = new SharedRAbundVector();
+                               temp->setLabel(thislookup[i]->getLabel());
+                               temp->setGroup(thislookup[i]->getGroup());
+                               newLookup.push_back(temp);
+                       }
+                       
+                       //for each bin
+                       for (int i = 0; i < thislookup[0]->getNumBins(); i++) {
+                               if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
+                               
+                               //look at each sharedRabund and make sure they are not all zero
+                               bool allZero = true;
+                               for (int j = 0; j < thislookup.size(); j++) {
+                                       if (thislookup[j]->getAbundance(i) != 0) { allZero = false;  break;  }
+                               }
+                               
+                               //if they are not all zero add this bin
+                               if (!allZero) {
+                                       for (int j = 0; j < thislookup.size(); j++) {
+                                               newLookup[j]->push_back(thislookup[j]->getAbundance(i), thislookup[j]->getGroup());
+                                       }
+                               }
+                       }
+                       
+                       for (int j = 0; j < thislookup.size(); j++) {  delete thislookup[j];  }
+                       
+                       thislookup = newLookup;
+                       
+                       return 0;
+                       
+               }
+               catch(exception& e) {
+                       m->errorOut(e, "SharedRAbundVector", "eliminateZeroOTUS");
+                       exit(1);
+               }
+       }
+       
+/***********************************************************************/
+vector<SharedRAbundFloatVector*> SharedRAbundVector::getSharedRAbundFloatVectors(vector<SharedRAbundVector*> thislookup){
+       try {
+               vector<SharedRAbundFloatVector*> newLookupFloat;        
+               for (int i = 0; i < lookup.size(); i++) {
+                       SharedRAbundFloatVector* temp = new SharedRAbundFloatVector();
+                       temp->setLabel(thislookup[i]->getLabel());
+                       temp->setGroup(thislookup[i]->getGroup());
+                       newLookupFloat.push_back(temp);
+               }
+               
+               for (int i = 0; i < thislookup.size(); i++) {
+                       
+                       for (int j = 0; j < thislookup[i]->getNumBins(); j++) {
+                               
+                               if (m->control_pressed) { return newLookupFloat; }
+                               
+                               int abund = thislookup[i]->getAbundance(j);
+                               
+                               float relabund = abund / (float) thislookup[i]->getNumSeqs();
+                               
+                               newLookupFloat[i]->push_back(relabund, thislookup[i]->getGroup());
+                       }
+               }
+               
+               return newLookupFloat;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SharedRAbundVector", "getSharedRAbundVectors");
+               exit(1);
+       }
+}
 /***********************************************************************/
 
 RAbundVector SharedRAbundVector::getRAbundVector() {
index 6584d1d2e9198cb5fed6ebc3d43e9d35f608da00..beece8e14a1082d7347a62fdc0e95ebcd24a9cc2 100644 (file)
@@ -13,6 +13,7 @@
 #include "datavector.hpp"
 #include "sharedordervector.h"
 #include "sharedsabundvector.h"
+#include "sharedrabundfloatvector.h"
 #include "rabundvector.hpp"
 #include "groupmap.h"
 
@@ -69,6 +70,7 @@ public:
        SharedSAbundVector getSharedSAbundVector();
        SharedRAbundVector getSharedRAbundVector();
        vector<SharedRAbundVector*> getSharedRAbundVectors();
+       vector<SharedRAbundFloatVector*> getSharedRAbundFloatVectors(vector<SharedRAbundVector*>);
        
 private:
        vector<individual>  data; 
@@ -80,6 +82,8 @@ private:
        int numSeqs;
        string group;
        int index;      
+       
+       int eliminateZeroOTUS(vector<SharedRAbundVector*>&);
 };