From: westcott Date: Mon, 10 Jan 2011 17:39:51 +0000 (+0000) Subject: pca command X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=commitdiff_plain;h=37eac2026d91179acda0494c4dcca22f176551b9 pca command --- diff --git a/Mothur.xcodeproj/project.pbxproj b/Mothur.xcodeproj/project.pbxproj index 20b0e48..c59979f 100644 --- a/Mothur.xcodeproj/project.pbxproj +++ b/Mothur.xcodeproj/project.pbxproj @@ -281,6 +281,7 @@ A7E9B98E12D37EC400DA6239 /* weightedlinkage.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B87E12D37EC400DA6239 /* weightedlinkage.cpp */; }; A7E9B98F12D37EC400DA6239 /* whittaker.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B87F12D37EC400DA6239 /* whittaker.cpp */; }; A7FC480E12D788F20055BC5C /* linearalgebra.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7FC480D12D788F20055BC5C /* linearalgebra.cpp */; }; + A7FC486712D795D60055BC5C /* pcacommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7FC486612D795D60055BC5C /* pcacommand.cpp */; }; /* End PBXBuildFile section */ /* Begin PBXCopyFilesBuildPhase section */ @@ -863,6 +864,8 @@ A7E9B88012D37EC400DA6239 /* whittaker.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = whittaker.h; sourceTree = ""; }; A7FC480C12D788F20055BC5C /* linearalgebra.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = linearalgebra.h; sourceTree = ""; }; A7FC480D12D788F20055BC5C /* linearalgebra.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = linearalgebra.cpp; sourceTree = ""; }; + A7FC486512D795D60055BC5C /* pcacommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = pcacommand.h; sourceTree = ""; }; + A7FC486612D795D60055BC5C /* pcacommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = pcacommand.cpp; sourceTree = ""; }; C6A0FF2C0290799A04C91782 /* mothur.1 */ = {isa = PBXFileReference; lastKnownFileType = text.man; path = mothur.1; sourceTree = ""; }; /* End PBXFileReference section */ @@ -1115,6 +1118,8 @@ A7E9B78212D37EC400DA6239 /* parselistscommand.h */, A7E9B78512D37EC400DA6239 /* parsimonycommand.cpp */, A7E9B78612D37EC400DA6239 /* parsimonycommand.h */, + A7FC486512D795D60055BC5C /* pcacommand.h */, + A7FC486612D795D60055BC5C /* pcacommand.cpp */, A7E9B78712D37EC400DA6239 /* pcoacommand.cpp */, A7E9B78812D37EC400DA6239 /* pcoacommand.h */, A7E9B78B12D37EC400DA6239 /* phylodiversitycommand.cpp */, @@ -1862,6 +1867,7 @@ A7E9B98F12D37EC400DA6239 /* whittaker.cpp in Sources */, A70332B712D3A13400761E33 /* makefile in Sources */, A7FC480E12D788F20055BC5C /* linearalgebra.cpp in Sources */, + A7FC486712D795D60055BC5C /* pcacommand.cpp in Sources */, ); runOnlyForDeploymentPostprocessing = 0; }; diff --git a/commandfactory.cpp b/commandfactory.cpp index e3eb2b2..f581b6d 100644 --- a/commandfactory.cpp +++ b/commandfactory.cpp @@ -105,6 +105,7 @@ #include "trimflowscommand.h" #include "corraxescommand.h" #include "shhhercommand.h" +#include "pcacommand.h" /*******************************************************/ @@ -213,6 +214,7 @@ CommandFactory::CommandFactory(){ commands["indicator"] = "indicator"; commands["consensus.seqs"] = "consensus.seqs"; commands["corr.axes"] = "corr.axes"; + commands["pca"] = "pca"; commands["pairwise.seqs"] = "MPIEnabled"; commands["pipeline.pds"] = "MPIEnabled"; commands["classify.seqs"] = "MPIEnabled"; @@ -333,7 +335,8 @@ Command* CommandFactory::getCommand(string commandName, string optionString){ else if(commandName == "phylotype") { command = new PhylotypeCommand(optionString); } else if(commandName == "mgcluster") { command = new MGClusterCommand(optionString); } else if(commandName == "pre.cluster") { command = new PreClusterCommand(optionString); } - else if(commandName == "pcoa") { command = new PCOACommand(optionString); } + else if(commandName == "pcoa") { command = new PCOACommand(optionString); } + else if(commandName == "pca") { command = new PCACommand(optionString); } else if(commandName == "otu.hierarchy") { command = new OtuHierarchyCommand(optionString); } else if(commandName == "set.dir") { command = new SetDirectoryCommand(optionString); } else if(commandName == "set.logfile") { command = new SetLogFileCommand(optionString); } @@ -457,7 +460,8 @@ Command* CommandFactory::getCommand(string commandName, string optionString, str else if(commandName == "phylotype") { pipecommand = new PhylotypeCommand(optionString); } else if(commandName == "mgcluster") { pipecommand = new MGClusterCommand(optionString); } else if(commandName == "pre.cluster") { pipecommand = new PreClusterCommand(optionString); } - else if(commandName == "pcoa") { pipecommand = new PCOACommand(optionString); } + else if(commandName == "pcoa") { pipecommand = new PCOACommand(optionString); } + else if(commandName == "pca") { pipecommand = new PCACommand(optionString); } else if(commandName == "otu.hierarchy") { pipecommand = new OtuHierarchyCommand(optionString); } else if(commandName == "set.dir") { pipecommand = new SetDirectoryCommand(optionString); } else if(commandName == "set.logfile") { pipecommand = new SetLogFileCommand(optionString); } @@ -569,6 +573,7 @@ Command* CommandFactory::getCommand(string commandName){ else if(commandName == "mgcluster") { shellcommand = new MGClusterCommand(); } else if(commandName == "pre.cluster") { shellcommand = new PreClusterCommand(); } else if(commandName == "pcoa") { shellcommand = new PCOACommand(); } + else if(commandName == "pca") { shellcommand = new PCACommand(); } else if(commandName == "otu.hierarchy") { shellcommand = new OtuHierarchyCommand(); } else if(commandName == "set.dir") { shellcommand = new SetDirectoryCommand(); } else if(commandName == "set.logfile") { shellcommand = new SetLogFileCommand(); } diff --git a/corraxescommand.cpp b/corraxescommand.cpp index 9ffc4b6..0d421b5 100644 --- a/corraxescommand.cpp +++ b/corraxescommand.cpp @@ -222,22 +222,21 @@ int CorrAxesCommand::execute(){ // use smart distancing to get right sharedRabund and convert to relabund if needed // /************************************************************************************/ if (sharedfile != "") { - getShared(); - if (m->control_pressed) { for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } return 0; } - if (lookup[0] == NULL) { m->mothurOut("[ERROR] reading shared file."); m->mothurOutEndLine(); return 0; } + InputData* input = new InputData(sharedfile, "sharedfile"); + getSharedFloat(input); + delete input; - //fills lookupFloat with relative abundance values from lookup - convertToRelabund(); - - for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } + if (m->control_pressed) { for (int i = 0; i < lookupFloat.size(); i++) { delete lookupFloat[i]; } return 0; } + if (lookupFloat[0] == NULL) { m->mothurOut("[ERROR] reading relabund file."); m->mothurOutEndLine(); return 0; } }else if (relabundfile != "") { - getSharedFloat(); + InputData* input = new InputData(relabundfile, "relabund"); + getSharedFloat(input); + delete input; + if (m->control_pressed) { for (int i = 0; i < lookupFloat.size(); i++) { delete lookupFloat[i]; } return 0; } if (lookupFloat[0] == NULL) { m->mothurOut("[ERROR] reading relabund file."); m->mothurOutEndLine(); return 0; } - if (pickedGroups) { eliminateZeroOTUS(lookupFloat); } - }else if (metadatafile != "") { getMetadata(); //reads metadata file and store in lookupFloat, saves column headings in metadataLabels for later if (m->control_pressed) { for (int i = 0; i < lookupFloat.size(); i++) { delete lookupFloat[i]; } return 0; } @@ -612,89 +611,12 @@ int CorrAxesCommand::calcKendall(map >& axes, ofstream& ou } } //********************************************************************************************************************** -int CorrAxesCommand::getShared(){ +int CorrAxesCommand::getSharedFloat(InputData* input){ try { - InputData* input = new InputData(sharedfile, "sharedfile"); - lookup = input->getSharedRAbundVectors(); - string lastLabel = lookup[0]->getLabel(); - - if (label == "") { label = lastLabel; delete input; return 0; } - - //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. - set labels; labels.insert(label); - set processedLabels; - set userLabels = labels; - - //as long as you are not at the end of the file or done wih the lines you want - while((lookup[0] != NULL) && (userLabels.size() != 0)) { - if (m->control_pressed) { delete input; return 0; } - - if(labels.count(lookup[0]->getLabel()) == 1){ - processedLabels.insert(lookup[0]->getLabel()); - userLabels.erase(lookup[0]->getLabel()); - break; - } - - if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) { - string saveLabel = lookup[0]->getLabel(); - - for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } - lookup = input->getSharedRAbundVectors(lastLabel); - - processedLabels.insert(lookup[0]->getLabel()); - userLabels.erase(lookup[0]->getLabel()); - - //restore real lastlabel to save below - lookup[0]->setLabel(saveLabel); - break; - } - - lastLabel = lookup[0]->getLabel(); - - //get next line to process - //prevent memory leak - for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } - lookup = input->getSharedRAbundVectors(); - } - - - if (m->control_pressed) { delete input; return 0; } - - //output error messages about any remaining user labels - set::iterator it; - bool needToRun = false; - for (it = userLabels.begin(); it != userLabels.end(); it++) { - m->mothurOut("Your file does not include the label " + *it); - if (processedLabels.count(lastLabel) != 1) { - m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine(); - needToRun = true; - }else { - m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine(); - } - } - - //run last label if you need to - if (needToRun == true) { - for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } } - lookup = input->getSharedRAbundVectors(lastLabel); - } - - delete input; - return 0; - } - catch(exception& e) { - m->errorOut(e, "CorrAxesCommand", "getShared"); - exit(1); - } -} -//********************************************************************************************************************** -int CorrAxesCommand::getSharedFloat(){ - try { - InputData* input = new InputData(relabundfile, "relabund"); lookupFloat = input->getSharedRAbundFloatVectors(); string lastLabel = lookupFloat[0]->getLabel(); - if (label == "") { label = lastLabel; delete input; return 0; } + if (label == "") { label = lastLabel; return 0; } //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. set labels; labels.insert(label); @@ -704,7 +626,7 @@ int CorrAxesCommand::getSharedFloat(){ //as long as you are not at the end of the file or done wih the lines you want while((lookupFloat[0] != NULL) && (userLabels.size() != 0)) { - if (m->control_pressed) { delete input; return 0; } + if (m->control_pressed) { return 0; } if(labels.count(lookupFloat[0]->getLabel()) == 1){ processedLabels.insert(lookupFloat[0]->getLabel()); @@ -735,7 +657,7 @@ int CorrAxesCommand::getSharedFloat(){ } - if (m->control_pressed) { delete input; return 0; } + if (m->control_pressed) { return 0; } //output error messages about any remaining user labels set::iterator it; @@ -756,7 +678,6 @@ int CorrAxesCommand::getSharedFloat(){ lookupFloat = input->getSharedRAbundFloatVectors(lastLabel); } - delete input; return 0; } catch(exception& e) { @@ -764,43 +685,6 @@ int CorrAxesCommand::getSharedFloat(){ exit(1); } } -/*****************************************************************/ -int CorrAxesCommand::convertToRelabund(){ - try { - - vector newLookup; - for (int i = 0; i < lookup.size(); i++) { - SharedRAbundFloatVector* temp = new SharedRAbundFloatVector(); - temp->setLabel(lookup[i]->getLabel()); - temp->setGroup(lookup[i]->getGroup()); - newLookup.push_back(temp); - } - - for (int i = 0; i < lookup.size(); i++) { - - for (int j = 0; j < lookup[i]->getNumBins(); j++) { - - if (m->control_pressed) { return 0; } - - int abund = lookup[i]->getAbundance(j); - - float relabund = abund / (float) lookup[i]->getNumSeqs(); - - newLookup[i]->push_back(relabund, lookup[i]->getGroup()); - } - } - - if (pickedGroups) { eliminateZeroOTUS(newLookup); } - - lookupFloat = newLookup; - - return 0; - } - catch(exception& e) { - m->errorOut(e, "CorrAxesCommand", "convertToRelabund"); - exit(1); - } -} //********************************************************************************************************************** int CorrAxesCommand::eliminateZeroOTUS(vector& thislookup) { try { diff --git a/corraxescommand.h b/corraxescommand.h index e02c52a..8547b0f 100644 --- a/corraxescommand.h +++ b/corraxescommand.h @@ -11,7 +11,6 @@ */ #include "command.hpp" -#include "sharedrabundvector.h" #include "sharedrabundfloatvector.h" #include "inputdata.h" @@ -47,14 +46,11 @@ private: vector outputNames, Groups; map > outputTypes; - vector lookup; vector lookupFloat; vector metadataLabels; - int getShared(); - int getSharedFloat(); + int getSharedFloat(InputData*); int getMetadata(); - int convertToRelabund(); int eliminateZeroOTUS(vector&); map > readAxes(); int calcPearson(map >&, ofstream&); diff --git a/datavector.hpp b/datavector.hpp index a5f96eb..3ecd407 100644 --- a/datavector.hpp +++ b/datavector.hpp @@ -16,6 +16,7 @@ class SharedListVector; class SharedOrderVector; class SharedSAbundVector; class SharedRAbundVector; +class SharedRAbundFloatVector; class DataVector { diff --git a/getrelabundcommand.cpp b/getrelabundcommand.cpp index 8c28c47..08e9228 100644 --- a/getrelabundcommand.cpp +++ b/getrelabundcommand.cpp @@ -275,8 +275,6 @@ int GetRelAbundCommand::execute(){ int GetRelAbundCommand::getRelAbundance(vector& thisLookUp, ofstream& out){ try { - if (pickedGroups) { eliminateZeroOTUS(thisLookUp); } - for (int i = 0; i < thisLookUp.size(); i++) { out << thisLookUp[i]->getLabel() << '\t' << thisLookUp[i]->getGroup() << '\t' << thisLookUp[i]->getNumBins() << '\t'; @@ -320,48 +318,5 @@ int GetRelAbundCommand::getRelAbundance(vector& thisLookUp, } } //********************************************************************************************************************** -int GetRelAbundCommand::eliminateZeroOTUS(vector& thislookup) { - try { - - vector newLookup; - for (int i = 0; i < thislookup.size(); i++) { - SharedRAbundVector* temp = new SharedRAbundVector(); - temp->setLabel(thislookup[i]->getLabel()); - temp->setGroup(thislookup[i]->getGroup()); - newLookup.push_back(temp); - } - - //for each bin - for (int i = 0; i < thislookup[0]->getNumBins(); i++) { - if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } return 0; } - - //look at each sharedRabund and make sure they are not all zero - bool allZero = true; - for (int j = 0; j < thislookup.size(); j++) { - if (thislookup[j]->getAbundance(i) != 0) { allZero = false; break; } - } - - //if they are not all zero add this bin - if (!allZero) { - for (int j = 0; j < thislookup.size(); j++) { - newLookup[j]->push_back(thislookup[j]->getAbundance(i), thislookup[j]->getGroup()); - } - } - } - - for (int j = 0; j < thislookup.size(); j++) { delete thislookup[j]; } - - thislookup = newLookup; - - return 0; - - } - catch(exception& e) { - m->errorOut(e, "GetRelAbundCommand", "eliminateZeroOTUS"); - exit(1); - } -} - -//********************************************************************************************************************** diff --git a/getrelabundcommand.h b/getrelabundcommand.h index 3a5f6ad..90d5f9a 100644 --- a/getrelabundcommand.h +++ b/getrelabundcommand.h @@ -43,7 +43,6 @@ private: map > outputTypes; int getRelAbundance(vector&, ofstream&); - int eliminateZeroOTUS(vector& thislookup); }; diff --git a/inputdata.cpp b/inputdata.cpp index 5d15427..283fb7e 100644 --- a/inputdata.cpp +++ b/inputdata.cpp @@ -519,6 +519,15 @@ vector InputData::getSharedRAbundFloatVectors(){ if (SharedRelAbund != NULL) { return SharedRelAbund->getSharedRAbundFloatVectors(); } + }else if (format == "sharedfile") { + SharedRAbundVector* SharedRAbund = new SharedRAbundVector(fileHandle); + if (SharedRAbund != NULL) { + vector lookup = SharedRAbund->getSharedRAbundVectors(); + vector lookupFloat = SharedRAbund->getSharedRAbundFloatVectors(lookup); + for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } lookup.clear(); + return lookupFloat; + } + } m->gobble(fileHandle); } @@ -560,8 +569,32 @@ vector InputData::getSharedRAbundFloatVectors(string l }else{ break; } m->gobble(in); } - } + }else if (format == "sharedfile") { + while (in.eof() != true) { + + SharedRAbundVector* SharedRAbund = new SharedRAbundVector(in); + if (SharedRAbund != NULL) { + thisLabel = SharedRAbund->getLabel(); + + //if you are at the last label + if (thisLabel == label) { + in.close(); + vector lookup = SharedRAbund->getSharedRAbundVectors(); + vector lookupFloat = SharedRAbund->getSharedRAbundFloatVectors(lookup); + for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } lookup.clear(); + return lookupFloat; + }else { + //so you don't loose this memory + vector lookup = SharedRAbund->getSharedRAbundVectors(); + for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } lookup.clear(); + delete SharedRAbund; + } + }else{ break; } + m->gobble(in); + } + } } + //this is created to signal to calling function that the input file is at eof vector null; null.push_back(NULL); diff --git a/linearalgebra.cpp b/linearalgebra.cpp index e015b5f..5de5a71 100644 --- a/linearalgebra.cpp +++ b/linearalgebra.cpp @@ -27,7 +27,7 @@ vector > LinearAlgebra::matrix_mult(vector > first product.resize(first_rows); for(int i=0;i& d, vector& e, vector > LinearAlgebra::calculateEuclidianDistance(vector< vector >& axes, int dimensions){ + try { + //make square matrix + vector< vector > dists; dists.resize(axes.size()); + for (int i = 0; i < dists.size(); i++) { dists[i].resize(axes.size(), 0.0); } + + if (dimensions == 1) { //one dimension calc = abs(x-y) + + for (int i = 0; i < dists.size(); i++) { + + if (m->control_pressed) { return dists; } + + for (int j = 0; j < i; j++) { + dists[i][j] = abs(axes[i][0] - axes[j][0]); + dists[j][i] = dists[i][j]; + } + } + + }else if (dimensions == 2) { //two dimension calc = sqrt ((x1 - y1)^2 + (x2 - y2)^2) + + for (int i = 0; i < dists.size(); i++) { + + if (m->control_pressed) { return dists; } + + for (int j = 0; j < i; j++) { + double firstDim = ((axes[i][0] - axes[j][0]) * (axes[i][0] - axes[j][0])); + double secondDim = ((axes[i][1] - axes[j][1]) * (axes[i][1] - axes[j][1])); + + dists[i][j] = sqrt((firstDim + secondDim)); + dists[j][i] = dists[i][j]; + } + } + + }else if (dimensions == 3) { //two dimension calc = sqrt ((x1 - y1)^2 + (x2 - y2)^2 + (x3 - y3)^2) + + for (int i = 0; i < dists.size(); i++) { + + if (m->control_pressed) { return dists; } + + for (int j = 0; j < i; j++) { + double firstDim = ((axes[i][0] - axes[j][0]) * (axes[i][0] - axes[j][0])); + double secondDim = ((axes[i][1] - axes[j][1]) * (axes[i][1] - axes[j][1])); + double thirdDim = ((axes[i][2] - axes[j][2]) * (axes[i][2] - axes[j][2])); + + dists[i][j] = sqrt((firstDim + secondDim + thirdDim)); + dists[j][i] = dists[i][j]; + } + } + + }else { m->mothurOut("[ERROR]: too many dimensions, aborting."); m->mothurOutEndLine(); m->control_pressed = true; } + + return dists; + } + catch(exception& e) { + m->errorOut(e, "LinearAlgebra", "calculateEuclidianDistance"); + exit(1); + } +} +/*********************************************************************************************************************************/ +double LinearAlgebra::calcPearson(vector< vector >& euclidDists, vector< vector >& userDists){ + try { + + //find average for - X + vector averageEuclid; averageEuclid.resize(euclidDists.size(), 0.0); + for (int i = 0; i < euclidDists.size(); i++) { + for (int j = 0; j < euclidDists[i].size(); j++) { + averageEuclid[i] += euclidDists[i][j]; + } + } + for (int i = 0; i < averageEuclid.size(); i++) { averageEuclid[i] = averageEuclid[i] / (float) euclidDists.size(); } + + //find average for - Y + vector averageUser; averageUser.resize(userDists.size(), 0.0); + for (int i = 0; i < userDists.size(); i++) { + for (int j = 0; j < userDists[i].size(); j++) { + averageUser[i] += userDists[i][j]; + } + } + for (int i = 0; i < averageUser.size(); i++) { averageUser[i] = averageUser[i] / (float) userDists.size(); } + + double numerator = 0.0; + double denomTerm1 = 0.0; + double denomTerm2 = 0.0; + + for (int i = 0; i < euclidDists.size(); i++) { + + for (int k = 0; k < i; k++) { + + float Yi = userDists[i][k]; + float Xi = euclidDists[i][k]; + + numerator += ((Xi - averageEuclid[k]) * (Yi - averageUser[k])); + denomTerm1 += ((Xi - averageEuclid[k]) * (Xi - averageEuclid[k])); + denomTerm2 += ((Yi - averageUser[k]) * (Yi - averageUser[k])); + } + } + + double denom = (sqrt(denomTerm1) * sqrt(denomTerm2)); + double r = numerator / denom; + + return r; + } + catch(exception& e) { + m->errorOut(e, "LinearAlgebra", "calculateEuclidianDistance"); + exit(1); + } +} +/*********************************************************************************************************************************/ diff --git a/linearalgebra.h b/linearalgebra.h index 2acf5cb..2866493 100644 --- a/linearalgebra.h +++ b/linearalgebra.h @@ -21,7 +21,8 @@ public: vector > matrix_mult(vector >, vector >); int tred2(vector >&, vector&, vector&); int qtli(vector&, vector&, vector >&); - + vector< vector > calculateEuclidianDistance(vector >&, int); + double calcPearson(vector >&, vector >&); private: MothurOut* m; diff --git a/metastatscommand.cpp b/metastatscommand.cpp index a734ce3..81715e0 100644 --- a/metastatscommand.cpp +++ b/metastatscommand.cpp @@ -347,7 +347,6 @@ int MetaStatsCommand::execute(){ int MetaStatsCommand::process(vector& thisLookUp){ try { - if (pickedGroups) { eliminateZeroOTUS(thisLookUp); } #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) if(processors == 1){ @@ -466,45 +465,3 @@ int MetaStatsCommand::driver(int start, int num, vector& th } } //********************************************************************************************************************** -int MetaStatsCommand::eliminateZeroOTUS(vector& thislookup) { - try { - - vector newLookup; - for (int i = 0; i < thislookup.size(); i++) { - SharedRAbundVector* temp = new SharedRAbundVector(); - temp->setLabel(thislookup[i]->getLabel()); - temp->setGroup(thislookup[i]->getGroup()); - newLookup.push_back(temp); - } - - //for each bin - for (int i = 0; i < thislookup[0]->getNumBins(); i++) { - if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } return 0; } - - //look at each sharedRabund and make sure they are not all zero - bool allZero = true; - for (int j = 0; j < thislookup.size(); j++) { - if (thislookup[j]->getAbundance(i) != 0) { allZero = false; break; } - } - - //if they are not all zero add this bin - if (!allZero) { - for (int j = 0; j < thislookup.size(); j++) { - newLookup[j]->push_back(thislookup[j]->getAbundance(i), thislookup[j]->getGroup()); - } - } - } - - for (int j = 0; j < thislookup.size(); j++) { delete thislookup[j]; } - - thislookup = newLookup; - - return 0; - - } - catch(exception& e) { - m->errorOut(e, "MetaStatsCommand", "eliminateZeroOTUS"); - exit(1); - } -} -//********************************************************************************************************************** diff --git a/metastatscommand.h b/metastatscommand.h index a38f3a7..8c633b5 100644 --- a/metastatscommand.h +++ b/metastatscommand.h @@ -54,7 +54,6 @@ private: float threshold; int process(vector&); - int eliminateZeroOTUS(vector&); int driver(int, int, vector&); }; diff --git a/pcacommand.cpp b/pcacommand.cpp new file mode 100644 index 0000000..0842c53 --- /dev/null +++ b/pcacommand.cpp @@ -0,0 +1,405 @@ +/* + * pcacommand.cpp + * mothur + * + * Created by westcott on 1/7/11. + * Copyright 2011 Schloss Lab. All rights reserved. + * + */ + +#include "pcacommand.h" +#include "inputdata.h" + +//********************************************************************************************************************** +vector PCACommand::getValidParameters(){ + try { + string Array[] = {"label", "groups","metric","outputdir","inputdir"}; + vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); + return myArray; + } + catch(exception& e) { + m->errorOut(e, "PCACommand", "getValidParameters"); + exit(1); + } +} +//********************************************************************************************************************** +PCACommand::PCACommand(){ + try { + abort = true; + //initialize outputTypes + vector tempOutNames; + outputTypes["pca"] = tempOutNames; + outputTypes["loadings"] = tempOutNames; + } + catch(exception& e) { + m->errorOut(e, "PCACommand", "PCACommand"); + exit(1); + } +} +//********************************************************************************************************************** +vector PCACommand::getRequiredParameters(){ + try { + vector myArray; + return myArray; + } + catch(exception& e) { + m->errorOut(e, "PCACommand", "getRequiredParameters"); + exit(1); + } +} +//********************************************************************************************************************** +vector PCACommand::getRequiredFiles(){ + try { + string Array[] = {"shared","relabund","or"}; + vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); + return myArray; + } + catch(exception& e) { + m->errorOut(e, "PCACommand", "getRequiredFiles"); + exit(1); + } +} +//********************************************************************************************************************** + +PCACommand::PCACommand(string option) { + try { + abort = false; + + globaldata = GlobalData::getInstance(); + + //allow user to run help + if(option == "help") { help(); abort = true; } + + else { + //valid paramters for this command + string Array[] = {"label","groups","metric","outputdir", "inputdir"}; + vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); + + OptionParser parser(option); + map parameters = parser. getParameters(); + + ValidParameters validParameter; + map::iterator it; + + //check to make sure all parameters are valid for command + for (it = parameters.begin(); it != parameters.end(); it++) { + if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; } + } + //if the user changes the input directory command factory will send this info to us in the output parameter + string inputDir = validParameter.validFile(parameters, "inputdir", false); if (inputDir == "not found"){ inputDir = ""; } + + //initialize outputTypes + vector tempOutNames; + outputTypes["pca"] = tempOutNames; + outputTypes["loadings"] = tempOutNames; + + //make sure the user has already run the read.otu command + if ((globaldata->getSharedFile() == "") && (globaldata->getRelAbundFile() == "")) { + m->mothurOut("You must read a list and a group, shared or relabund file before you can use the pca command."); m->mothurOutEndLine(); abort = true; + } + + if (globaldata->getSharedFile() != "") { mode = "shared"; inputFile = globaldata->getSharedFile(); } + if (globaldata->getRelAbundFile() != "") { mode = "relabund"; inputFile = globaldata->getRelAbundFile(); } + + //if the user changes the output directory command factory will send this info to us in the output parameter + outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ + outputDir = ""; + outputDir += m->hasPath(inputFile); //if user entered a file with a path then preserve it + } + + string temp = validParameter.validFile(parameters, "metric", false); if (temp == "not found"){ temp = "T"; } + metric = m->isTrue(temp); + + label = validParameter.validFile(parameters, "label", false); + if (label == "not found") { label = ""; m->mothurOut("You did not provide a label, I will use the first label in your inputfile."); m->mothurOutEndLine(); } + else { m->splitAtDash(label, labels); } + + groups = validParameter.validFile(parameters, "groups", false); + if (groups == "not found") { groups = ""; } + else { m->splitAtDash(groups, Groups); } + globaldata->Groups = Groups; + + } + + } + catch(exception& e) { + m->errorOut(e, "PCACommand", "PCACommand"); + exit(1); + } +} +//********************************************************************************************************************** +void PCACommand::help(){ + try { + m->mothurOut("The pca command can only be run after a successful read.otu command."); m->mothurOutEndLine(); + m->mothurOut("The pca command parameters are label, groups and metric. No parameters are required."); m->mothurOutEndLine(); + m->mothurOut("The label parameter is used to analyze specific labels in your input. Default is the first label in your shared or relabund file. Multpile labels may be separated by dashes.\n"); + m->mothurOut("The groups parameter allows you to specify which groups you would like analyzed. Groupnames are separated by dashes.\n"); + m->mothurOut("The metric parameter allows indicate you if would like the pearson correlation coefficient calculated. Default=True"); m->mothurOutEndLine(); + m->mothurOut("Example pca(groups=yourGroups).\n"); + m->mothurOut("Example pca(groups=A-B-C).\n"); + m->mothurOut("Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n\n"); + } + catch(exception& e) { + m->errorOut(e, "PCACommand", "help"); + exit(1); + } +} +//********************************************************************************************************************** +PCACommand::~PCACommand(){} +//********************************************************************************************************************** +int PCACommand::execute(){ + try { + + if (abort == true) { return 0; } + + cout.setf(ios::fixed, ios::floatfield); + cout.setf(ios::showpoint); + cerr.setf(ios::fixed, ios::floatfield); + cerr.setf(ios::showpoint); + + //get first line of shared file + vector< vector > matrix; + InputData* input; + if (mode == "shared") { + input = new InputData(inputFile, "sharedfile"); + }else if (mode == "relabund") { + input = new InputData(inputFile, "relabund"); + }else { m->mothurOut("[ERROR]: filetype not recognized."); m->mothurOutEndLine(); return 0; } + + vector lookupFloat = input->getSharedRAbundFloatVectors(); + string lastLabel = lookupFloat[0]->getLabel(); + + set processedLabels; + set userLabels = labels; + + //if the user gave no labels, then use the first one read + if (labels.size() == 0) { + label = lastLabel; + + process(lookupFloat); + } + + //as long as you are not at the end of the file or done wih the lines you want + while((lookupFloat[0] != NULL) && (userLabels.size() != 0)) { + + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } delete input; for (int i = 0; i < lookupFloat.size(); i++) { delete lookupFloat[i]; } lookupFloat.clear(); return 0; } + + if(labels.count(lookupFloat[0]->getLabel()) == 1){ + processedLabels.insert(lookupFloat[0]->getLabel()); + userLabels.erase(lookupFloat[0]->getLabel()); + + process(lookupFloat); + } + + if ((m->anyLabelsToProcess(lookupFloat[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) { + string saveLabel = lookupFloat[0]->getLabel(); + + for (int i = 0; i < lookupFloat.size(); i++) { delete lookupFloat[i]; } lookupFloat.clear(); + lookupFloat = input->getSharedRAbundFloatVectors(lastLabel); + + process(lookupFloat); + + processedLabels.insert(lookupFloat[0]->getLabel()); + userLabels.erase(lookupFloat[0]->getLabel()); + + //restore real lastlabel to save below + lookupFloat[0]->setLabel(saveLabel); + } + + lastLabel = lookupFloat[0]->getLabel(); + + //get next line to process + //prevent memory leak + for (int i = 0; i < lookupFloat.size(); i++) { delete lookupFloat[i]; } lookupFloat.clear(); + lookupFloat = input->getSharedRAbundFloatVectors(); + } + + + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } delete input; for (int i = 0; i < lookupFloat.size(); i++) { delete lookupFloat[i]; } lookupFloat.clear(); return 0; } + + //output error messages about any remaining user labels + set::iterator it; + bool needToRun = false; + for (it = userLabels.begin(); it != userLabels.end(); it++) { + m->mothurOut("Your file does not include the label " + *it); + if (processedLabels.count(lastLabel) != 1) { + m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine(); + needToRun = true; + }else { + m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine(); + } + } + + //run last label if you need to + if (needToRun == true) { + for (int i = 0; i < lookupFloat.size(); i++) { if (lookupFloat[i] != NULL) { delete lookupFloat[i]; } } lookupFloat.clear(); + lookupFloat = input->getSharedRAbundFloatVectors(lastLabel); + + process(lookupFloat); + + for (int i = 0; i < lookupFloat.size(); i++) { if (lookupFloat[i] != NULL) { delete lookupFloat[i]; } } lookupFloat.clear(); + } + + for (int i = 0; i < lookupFloat.size(); i++) { if (lookupFloat[i] != NULL) { delete lookupFloat[i]; } } lookupFloat.clear(); + delete input; + + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; } + + m->mothurOutEndLine(); + m->mothurOut("Output File Names: "); m->mothurOutEndLine(); + for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } + m->mothurOutEndLine(); + + return 0; + } + catch(exception& e) { + m->errorOut(e, "PCACommand", "execute"); + exit(1); + } +} +//********************************************************************************************************************** +vector< vector > PCACommand::createMatrix(vector lookupFloat){ + try { + vector< vector > matrix; matrix.resize(lookupFloat.size()); + + //fill matrix with shared files relative abundances + for (int i = 0; i < lookupFloat.size(); i++) { + for (int j = 0; j < lookupFloat[i]->getNumBins(); j++) { + matrix[i].push_back(lookupFloat[i]->getAbundance(j)); + } + } + + vector< vector > transposeMatrix; transposeMatrix.resize(matrix[0].size()); + for (int i = 0; i < transposeMatrix.size(); i++) { + for (int j = 0; j < matrix.size(); j++) { + transposeMatrix[i].push_back(matrix[j][i]); + } + } + + matrix = linearCalc.matrix_mult(matrix, transposeMatrix); + + return matrix; + } + catch(exception& e) { + m->errorOut(e, "PCACommand", "createMatrix"); + exit(1); + } +} +//********************************************************************************************************************** +int PCACommand::process(vector& lookupFloat){ + try { + m->mothurOut("\nProcessing " + lookupFloat[0]->getLabel()); m->mothurOutEndLine(); + + vector< vector > matrix; matrix.resize(lookupFloat.size()); + + //fill matrix with shared files relative abundances + for (int i = 0; i < lookupFloat.size(); i++) { + for (int j = 0; j < lookupFloat[i]->getNumBins(); j++) { + matrix[i].push_back(lookupFloat[i]->getAbundance(j)); + } + } + + vector< vector > transposeMatrix; transposeMatrix.resize(matrix[0].size()); + for (int i = 0; i < transposeMatrix.size(); i++) { + for (int j = 0; j < matrix.size(); j++) { + transposeMatrix[i].push_back(matrix[j][i]); + } + } + + matrix = linearCalc.matrix_mult(matrix, transposeMatrix); + + double offset = 0.0000; + vector d; + vector e; + vector > G = matrix; + vector > copy_G; + + for(int count=0;count<2;count++){ + linearCalc.tred2(G, d, e); if (m->control_pressed) { return 0; } + linearCalc.qtli(d, e, G); if (m->control_pressed) { return 0; } + offset = d[d.size()-1]; + if(offset > 0.0) break; + } + + if (m->control_pressed) { return 0; } + + string fbase = outputDir + m->getRootName(m->getSimpleName(inputFile)); + string outputFileName = fbase + lookupFloat[0]->getLabel(); + output(outputFileName, globaldata->Groups, G, d); + + if (metric) { + + for (int i = 1; i < 4; i++) { + + vector< vector > EuclidDists = linearCalc.calculateEuclidianDistance(G, i); //G is the pcoa file + + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; } + + double corr = linearCalc.calcPearson(EuclidDists, matrix); //G is the pcoa file, D is the users distance matrix + + m->mothurOut("Pearson's coefficient using " + toString(i) + " axis: " + toString(corr)); m->mothurOutEndLine(); + + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; } + } + } + + return 0; + } + catch(exception& e) { + m->errorOut(e, "PCACommand", "process"); + exit(1); + } +} +/*********************************************************************************************************************************/ + +void PCACommand::output(string fnameRoot, vector name_list, vector >& G, vector d) { + try { + int rank = name_list.size(); + double dsum = 0.0000; + for(int i=0;i= 0) { G[i][j] *= pow(d[j],0.5); } + else { G[i][j] = 0.00000; } + } + } + + ofstream pcaData((fnameRoot+".pca").c_str(), ios::trunc); + pcaData.setf(ios::fixed, ios::floatfield); + pcaData.setf(ios::showpoint); + outputNames.push_back(fnameRoot+".pca"); + outputTypes["pca"].push_back(fnameRoot+".pca"); + + ofstream pcaLoadings((fnameRoot+".pca.loadings").c_str(), ios::trunc); + pcaLoadings.setf(ios::fixed, ios::floatfield); + pcaLoadings.setf(ios::showpoint); + outputNames.push_back(fnameRoot+".pca.loadings"); + outputTypes["loadings"].push_back(fnameRoot+".pca.loadings"); + + pcaLoadings << "axis\tloading\n"; + for(int i=0;ierrorOut(e, "PCACommand", "output"); + exit(1); + } +} +/*********************************************************************************************************************************/ + + diff --git a/pcacommand.h b/pcacommand.h new file mode 100644 index 0000000..cf4348c --- /dev/null +++ b/pcacommand.h @@ -0,0 +1,52 @@ +#ifndef PCACOMMAND_H +#define PCACOMMAND_H + +/* + * pcacommand.h + * mothur + * + * Created by westcott on 1/7/11. + * Copyright 2011 Schloss Lab. All rights reserved. + * + */ + +#include "command.hpp" +#include "linearalgebra.h" +#include "globaldata.hpp" + + +/*****************************************************************/ +class PCACommand : public Command { + +public: + PCACommand(string); + PCACommand(); + ~PCACommand(); + vector getRequiredParameters(); + vector getValidParameters(); + vector getRequiredFiles(); + map > getOutputFiles() { return outputTypes; } + int execute(); + void help(); + +private: + GlobalData* globaldata; + + bool abort, metric; + string outputDir, mode, inputFile, label, groups; + vector outputNames, Groups; + set labels; + map > outputTypes; + LinearAlgebra linearCalc; + + vector< vector > createMatrix(vector); + int process(vector&); + void output(string, vector, vector >&, vector); + +}; + +/*****************************************************************/ + +#endif + + diff --git a/pcoacommand.cpp b/pcoacommand.cpp index 9375144..d1d919c 100644 --- a/pcoacommand.cpp +++ b/pcoacommand.cpp @@ -30,7 +30,6 @@ PCOACommand::PCOACommand(){ vector tempOutNames; outputTypes["pcoa"] = tempOutNames; outputTypes["loadings"] = tempOutNames; - outputTypes["corr"] = tempOutNames; } catch(exception& e) { m->errorOut(e, "PCOACommand", "PCOACommand"); @@ -102,7 +101,6 @@ PCOACommand::PCOACommand(string option) { vector tempOutNames; outputTypes["pcoa"] = tempOutNames; outputTypes["loadings"] = tempOutNames; - outputTypes["corr"] = tempOutNames; //required parameters phylipfile = validParameter.validFile(parameters, "phylip", true); @@ -192,11 +190,11 @@ int PCOACommand::execute(){ for (int i = 1; i < 4; i++) { - vector< vector > EuclidDists = calculateEuclidianDistance(G, i); //G is the pcoa file + vector< vector > EuclidDists = linearCalc.calculateEuclidianDistance(G, i); //G is the pcoa file if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; } - double corr = calcPearson(EuclidDists, D); //G is the pcoa file, D is the users distance matrix + double corr = linearCalc.calcPearson(EuclidDists, D); //G is the pcoa file, D is the users distance matrix m->mothurOut("Pearson's coefficient using " + toString(i) + " axis: " + toString(corr)); m->mothurOutEndLine(); @@ -217,114 +215,6 @@ int PCOACommand::execute(){ } } /*********************************************************************************************************************************/ -vector< vector > PCOACommand::calculateEuclidianDistance(vector< vector >& axes, int dimensions){ - try { - //make square matrix - vector< vector > dists; dists.resize(axes.size()); - for (int i = 0; i < dists.size(); i++) { dists[i].resize(axes.size(), 0.0); } - - if (dimensions == 1) { //one dimension calc = abs(x-y) - - for (int i = 0; i < dists.size(); i++) { - - if (m->control_pressed) { return dists; } - - for (int j = 0; j < i; j++) { - dists[i][j] = abs(axes[i][0] - axes[j][0]); - dists[j][i] = dists[i][j]; - } - } - - }else if (dimensions == 2) { //two dimension calc = sqrt ((x1 - y1)^2 + (x2 - y2)^2) - - for (int i = 0; i < dists.size(); i++) { - - if (m->control_pressed) { return dists; } - - for (int j = 0; j < i; j++) { - double firstDim = ((axes[i][0] - axes[j][0]) * (axes[i][0] - axes[j][0])); - double secondDim = ((axes[i][1] - axes[j][1]) * (axes[i][1] - axes[j][1])); - - dists[i][j] = sqrt((firstDim + secondDim)); - dists[j][i] = dists[i][j]; - } - } - - }else if (dimensions == 3) { //two dimension calc = sqrt ((x1 - y1)^2 + (x2 - y2)^2 + (x3 - y3)^2) - - for (int i = 0; i < dists.size(); i++) { - - if (m->control_pressed) { return dists; } - - for (int j = 0; j < i; j++) { - double firstDim = ((axes[i][0] - axes[j][0]) * (axes[i][0] - axes[j][0])); - double secondDim = ((axes[i][1] - axes[j][1]) * (axes[i][1] - axes[j][1])); - double thirdDim = ((axes[i][2] - axes[j][2]) * (axes[i][2] - axes[j][2])); - - dists[i][j] = sqrt((firstDim + secondDim + thirdDim)); - dists[j][i] = dists[i][j]; - } - } - - }else { m->mothurOut("[ERROR]: too many dimensions, aborting."); m->mothurOutEndLine(); m->control_pressed = true; } - - return dists; - } - catch(exception& e) { - m->errorOut(e, "PCOACommand", "calculateEuclidianDistance"); - exit(1); - } -} -/*********************************************************************************************************************************/ -double PCOACommand::calcPearson(vector< vector >& euclidDists, vector< vector >& userDists){ - try { - - //find average for - X - vector averageEuclid; averageEuclid.resize(euclidDists.size(), 0.0); - for (int i = 0; i < euclidDists.size(); i++) { - for (int j = 0; j < euclidDists[i].size(); j++) { - averageEuclid[i] += euclidDists[i][j]; - } - } - for (int i = 0; i < averageEuclid.size(); i++) { averageEuclid[i] = averageEuclid[i] / (float) euclidDists.size(); } - - //find average for - Y - vector averageUser; averageUser.resize(userDists.size(), 0.0); - for (int i = 0; i < userDists.size(); i++) { - for (int j = 0; j < userDists[i].size(); j++) { - averageUser[i] += userDists[i][j]; - } - } - for (int i = 0; i < averageUser.size(); i++) { averageUser[i] = averageUser[i] / (float) userDists.size(); } - - double numerator = 0.0; - double denomTerm1 = 0.0; - double denomTerm2 = 0.0; - - for (int i = 0; i < euclidDists.size(); i++) { - - for (int k = 0; k < i; k++) { - - float Yi = userDists[i][k]; - float Xi = euclidDists[i][k]; - - numerator += ((Xi - averageEuclid[k]) * (Yi - averageUser[k])); - denomTerm1 += ((Xi - averageEuclid[k]) * (Xi - averageEuclid[k])); - denomTerm2 += ((Yi - averageUser[k]) * (Yi - averageUser[k])); - } - } - - double denom = (sqrt(denomTerm1) * sqrt(denomTerm2)); - double r = numerator / denom; - - return r; - } - catch(exception& e) { - m->errorOut(e, "PCOACommand", "calculateEuclidianDistance"); - exit(1); - } -} -/*********************************************************************************************************************************/ void PCOACommand::get_comment(istream& f, char begin, char end){ try { diff --git a/pcoacommand.h b/pcoacommand.h index 8bb7c1b..c62b3d6 100644 --- a/pcoacommand.h +++ b/pcoacommand.h @@ -42,8 +42,6 @@ private: void read(string, vector&, vector >&); void recenter(double, vector >, vector >&); void output(string, vector, vector >&, vector); - vector< vector > calculateEuclidianDistance(vector >&, int); - double calcPearson(vector >&, vector >&); }; diff --git a/sharedrabundfloatvector.cpp b/sharedrabundfloatvector.cpp index 52ffbb8..d65cc20 100644 --- a/sharedrabundfloatvector.cpp +++ b/sharedrabundfloatvector.cpp @@ -288,17 +288,21 @@ vector SharedRAbundFloatVector::getSharedRAbundFloatVe util = new SharedUtil(); util->setGroups(globaldata->Groups, globaldata->gGroupmap->namesOfGroups); - + + bool remove = false; for (int i = 0; i < lookup.size(); i++) { //if this sharedrabund is not from a group the user wants then delete it. if (util->isValidGroup(lookup[i]->getGroup(), globaldata->Groups) == false) { delete lookup[i]; lookup[i] = NULL; lookup.erase(lookup.begin()+i); i--; + remove = true; } } delete util; + + if (remove) { eliminateZeroOTUS(lookup); } return lookup; } @@ -409,6 +413,47 @@ OrderVector SharedRAbundFloatVector::getOrderVector(map* nameMap = N exit(1); } } - +//********************************************************************************************************************** +int SharedRAbundFloatVector::eliminateZeroOTUS(vector& thislookup) { + try { + + vector newLookup; + for (int i = 0; i < thislookup.size(); i++) { + SharedRAbundFloatVector* temp = new SharedRAbundFloatVector(); + temp->setLabel(thislookup[i]->getLabel()); + temp->setGroup(thislookup[i]->getGroup()); + newLookup.push_back(temp); + } + + //for each bin + for (int i = 0; i < thislookup[0]->getNumBins(); i++) { + if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } return 0; } + + //look at each sharedRabund and make sure they are not all zero + bool allZero = true; + for (int j = 0; j < thislookup.size(); j++) { + if (thislookup[j]->getAbundance(i) != 0) { allZero = false; break; } + } + + //if they are not all zero add this bin + if (!allZero) { + for (int j = 0; j < thislookup.size(); j++) { + newLookup[j]->push_back(thislookup[j]->getAbundance(i), thislookup[j]->getGroup()); + } + } + } + + for (int j = 0; j < thislookup.size(); j++) { delete thislookup[j]; } + + thislookup = newLookup; + + return 0; + + } + catch(exception& e) { + m->errorOut(e, "SharedRAbundFloatVector", "eliminateZeroOTUS"); + exit(1); + } +} /***********************************************************************/ diff --git a/sharedrabundfloatvector.h b/sharedrabundfloatvector.h index a3407f4..b0d8491 100644 --- a/sharedrabundfloatvector.h +++ b/sharedrabundfloatvector.h @@ -75,6 +75,8 @@ private: float numSeqs; string group; int index; + + int eliminateZeroOTUS(vector&); }; diff --git a/sharedrabundvector.cpp b/sharedrabundvector.cpp index 9276b7b..a050ab3 100644 --- a/sharedrabundvector.cpp +++ b/sharedrabundvector.cpp @@ -70,8 +70,7 @@ SharedRAbundVector::SharedRAbundVector(ifstream& f) : DataVector(), maxRank(0), string holdLabel, nextLabel, groupN; individual newguy; - for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; lookup[i] = NULL; } - lookup.clear(); + for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; lookup[i] = NULL; } lookup.clear(); if (globaldata->saveNextLabel == "") { f >> label; } else { label = globaldata->saveNextLabel; } @@ -372,10 +371,12 @@ vector SharedRAbundVector::getSharedRAbundVectors(){ util = new SharedUtil(); util->setGroups(globaldata->Groups, globaldata->gGroupmap->namesOfGroups); - + + bool remove = false; for (int i = 0; i < lookup.size(); i++) { //if this sharedrabund is not from a group the user wants then delete it. if (util->isValidGroup(lookup[i]->getGroup(), globaldata->Groups) == false) { + remove = true; delete lookup[i]; lookup[i] = NULL; lookup.erase(lookup.begin()+i); i--; @@ -383,6 +384,8 @@ vector SharedRAbundVector::getSharedRAbundVectors(){ } delete util; + + if (remove) { eliminateZeroOTUS(lookup); } return lookup; } @@ -391,6 +394,81 @@ vector SharedRAbundVector::getSharedRAbundVectors(){ exit(1); } } +//********************************************************************************************************************** +int SharedRAbundVector::eliminateZeroOTUS(vector& thislookup) { + try { + + vector newLookup; + for (int i = 0; i < thislookup.size(); i++) { + SharedRAbundVector* temp = new SharedRAbundVector(); + temp->setLabel(thislookup[i]->getLabel()); + temp->setGroup(thislookup[i]->getGroup()); + newLookup.push_back(temp); + } + + //for each bin + for (int i = 0; i < thislookup[0]->getNumBins(); i++) { + if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } return 0; } + + //look at each sharedRabund and make sure they are not all zero + bool allZero = true; + for (int j = 0; j < thislookup.size(); j++) { + if (thislookup[j]->getAbundance(i) != 0) { allZero = false; break; } + } + + //if they are not all zero add this bin + if (!allZero) { + for (int j = 0; j < thislookup.size(); j++) { + newLookup[j]->push_back(thislookup[j]->getAbundance(i), thislookup[j]->getGroup()); + } + } + } + + for (int j = 0; j < thislookup.size(); j++) { delete thislookup[j]; } + + thislookup = newLookup; + + return 0; + + } + catch(exception& e) { + m->errorOut(e, "SharedRAbundVector", "eliminateZeroOTUS"); + exit(1); + } + } + +/***********************************************************************/ +vector SharedRAbundVector::getSharedRAbundFloatVectors(vector thislookup){ + try { + vector newLookupFloat; + for (int i = 0; i < lookup.size(); i++) { + SharedRAbundFloatVector* temp = new SharedRAbundFloatVector(); + temp->setLabel(thislookup[i]->getLabel()); + temp->setGroup(thislookup[i]->getGroup()); + newLookupFloat.push_back(temp); + } + + for (int i = 0; i < thislookup.size(); i++) { + + for (int j = 0; j < thislookup[i]->getNumBins(); j++) { + + if (m->control_pressed) { return newLookupFloat; } + + int abund = thislookup[i]->getAbundance(j); + + float relabund = abund / (float) thislookup[i]->getNumSeqs(); + + newLookupFloat[i]->push_back(relabund, thislookup[i]->getGroup()); + } + } + + return newLookupFloat; + } + catch(exception& e) { + m->errorOut(e, "SharedRAbundVector", "getSharedRAbundVectors"); + exit(1); + } +} /***********************************************************************/ RAbundVector SharedRAbundVector::getRAbundVector() { diff --git a/sharedrabundvector.h b/sharedrabundvector.h index 6584d1d..beece8e 100644 --- a/sharedrabundvector.h +++ b/sharedrabundvector.h @@ -13,6 +13,7 @@ #include "datavector.hpp" #include "sharedordervector.h" #include "sharedsabundvector.h" +#include "sharedrabundfloatvector.h" #include "rabundvector.hpp" #include "groupmap.h" @@ -69,6 +70,7 @@ public: SharedSAbundVector getSharedSAbundVector(); SharedRAbundVector getSharedRAbundVector(); vector getSharedRAbundVectors(); + vector getSharedRAbundFloatVectors(vector); private: vector data; @@ -80,6 +82,8 @@ private: int numSeqs; string group; int index; + + int eliminateZeroOTUS(vector&); };