#include "corraxescommand.h"
#include "sharedutilities.h"
+#include "linearalgebra.h"
-//********************************************************************************************************************
-//sorts highes to lowest
-inline bool compareSpearman(spearmanRank left, spearmanRank right){
- return (left.score > right.score);
-}
//**********************************************************************************************************************
-vector<string> CorrAxesCommand::getValidParameters(){
+vector<string> CorrAxesCommand::setParameters(){
try {
- string Array[] = {"axes","shared","relabund","numaxes","label","groups","method","metadata","outputdir","inputdir"};
- vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
+ CommandParameter paxes("axes", "InputTypes", "", "", "none", "none", "none","corraxes",false,true,true); parameters.push_back(paxes);
+ CommandParameter pshared("shared", "InputTypes", "", "", "SharedRelMeta", "SharedRelMeta", "none","",false,false,true); parameters.push_back(pshared);
+ CommandParameter prelabund("relabund", "InputTypes", "", "", "SharedRelMeta", "SharedRelMeta", "none","",false,false,true); parameters.push_back(prelabund);
+ CommandParameter pmetadata("metadata", "InputTypes", "", "", "SharedRelMeta", "SharedRelMeta", "none","",false,false); parameters.push_back(pmetadata);
+ CommandParameter pnumaxes("numaxes", "Number", "", "3", "", "", "","",false,false); parameters.push_back(pnumaxes);
+ CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
+ CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
+ CommandParameter pmethod("method", "Multiple", "pearson-spearman-kendall", "pearson", "", "", "","",false,false); parameters.push_back(pmethod);
+ CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
+ CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
+
+ vector<string> myArray;
+ for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
return myArray;
}
catch(exception& e) {
- m->errorOut(e, "CorrAxesCommand", "getValidParameters");
+ m->errorOut(e, "CorrAxesCommand", "setParameters");
exit(1);
}
}
//**********************************************************************************************************************
-vector<string> CorrAxesCommand::getRequiredParameters(){
+string CorrAxesCommand::getHelpString(){
try {
- string Array[] = {"axes"};
- vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
- return myArray;
+ string helpString = "";
+ helpString += "The corr.axes command reads a shared, relabund or metadata file as well as an axes file and calculates the correlation coefficient.\n";
+ helpString += "The corr.axes command parameters are shared, relabund, axes, metadata, groups, method, numaxes and label. The shared, relabund or metadata and axes parameters are required. If shared is given the relative abundance is calculated.\n";
+ helpString += "The groups parameter allows you to specify which of the groups you would like included. The group names are separated by dashes.\n";
+ helpString += "The label parameter allows you to select what distance level you would like used, if none is given the first distance is used.\n";
+ helpString += "The method parameter allows you to select what method you would like to use. Options are pearson, spearman and kendall. Default=pearson.\n";
+ helpString += "The numaxes parameter allows you to select the number of axes you would like to use. Default=3.\n";
+ helpString += "The corr.axes command should be in the following format: corr.axes(axes=yourPcoaFile, shared=yourSharedFile, method=yourMethod).\n";
+ helpString += "Example corr.axes(axes=genus.pool.thetayc.genus.lt.pcoa, shared=genus.pool.shared, method=kendall).\n";
+ helpString += "The corr.axes command outputs a .corr.axes file.\n";
+ helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
+ return helpString;
}
catch(exception& e) {
- m->errorOut(e, "CorrAxesCommand", "getRequiredParameters");
+ m->errorOut(e, "CorrAxesCommand", "getHelpString");
exit(1);
}
}
//**********************************************************************************************************************
-CorrAxesCommand::CorrAxesCommand(){
- try {
- abort = true;
- //initialize outputTypes
- vector<string> tempOutNames;
- outputTypes["corr.axes"] = tempOutNames;
- }
- catch(exception& e) {
- m->errorOut(e, "CorrAxesCommand", "CorrAxesCommand");
- exit(1);
- }
+string CorrAxesCommand::getOutputPattern(string type) {
+ try {
+ string pattern = "";
+
+ if (type == "corraxes") { pattern = "[filename],[tag],corr.axes"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
+
+ return pattern;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "CorrAxesCommand", "getOutputPattern");
+ exit(1);
+ }
}
//**********************************************************************************************************************
-vector<string> CorrAxesCommand::getRequiredFiles(){
+CorrAxesCommand::CorrAxesCommand(){
try {
- vector<string> myArray;
- return myArray;
+ abort = true; calledHelp = true;
+ setParameters();
+ vector<string> tempOutNames;
+ outputTypes["corraxes"] = tempOutNames;
}
catch(exception& e) {
- m->errorOut(e, "CorrAxesCommand", "getRequiredFiles");
+ m->errorOut(e, "CorrAxesCommand", "CorrAxesCommand");
exit(1);
}
}
//**********************************************************************************************************************
CorrAxesCommand::CorrAxesCommand(string option) {
try {
- abort = false;
- globaldata = GlobalData::getInstance();
+ abort = false; calledHelp = false;
//allow user to run help
- if(option == "help") { help(); abort = true; }
+ if(option == "help") { help(); abort = true; calledHelp = true; }
+ else if(option == "citation") { citation(); abort = true; calledHelp = true;}
else {
- //valid paramters for this command
- string Array[] = {"axes","shared","relabund","numaxes","label","groups","method","metadata","outputdir","inputdir"};
- vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
+ vector<string> myArray = setParameters();
OptionParser parser(option);
map<string, string> parameters = parser.getParameters();
}
vector<string> tempOutNames;
- outputTypes["corr.axes"] = tempOutNames;
+ outputTypes["corraxes"] = tempOutNames;
//if the user changes the input directory command factory will send this info to us in the output parameter
string inputDir = validParameter.validFile(parameters, "inputdir", false);
sharedfile = validParameter.validFile(parameters, "shared", true);
if (sharedfile == "not open") { abort = true; }
else if (sharedfile == "not found") { sharedfile = ""; }
- else { inputFileName = sharedfile; }
+ else { inputFileName = sharedfile; m->setSharedFile(sharedfile); }
relabundfile = validParameter.validFile(parameters, "relabund", true);
if (relabundfile == "not open") { abort = true; }
else if (relabundfile == "not found") { relabundfile = ""; }
- else { inputFileName = relabundfile; }
+ else { inputFileName = relabundfile; m->setRelAbundFile(relabundfile); }
metadatafile = validParameter.validFile(parameters, "metadata", true);
if (metadatafile == "not open") { abort = true; }
pickedGroups = true;
m->splitAtDash(groups, Groups);
}
- globaldata->Groups = Groups;
+ m->setGroups(Groups);
outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = m->hasPath(inputFileName); }
label = validParameter.validFile(parameters, "label", false);
if (label == "not found") { label = ""; m->mothurOut("You did not provide a label, I will use the first label in your inputfile."); m->mothurOutEndLine(); label=""; }
- if ((relabundfile == "") && (sharedfile == "") && (metadatafile == "")) { m->mothurOut("You must provide either a shared, relabund, or metadata file."); m->mothurOutEndLine(); abort = true; }
+ if ((relabundfile == "") && (sharedfile == "") && (metadatafile == "")) {
+ //is there are current file available for any of these?
+ //give priority to shared, then relabund
+ //if there is a current shared file, use it
+ sharedfile = m->getSharedFile();
+ if (sharedfile != "") { inputFileName = sharedfile; m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
+ else {
+ relabundfile = m->getRelAbundFile();
+ if (relabundfile != "") { inputFileName = relabundfile; m->mothurOut("Using " + relabundfile + " as input file for the relabund parameter."); m->mothurOutEndLine(); }
+ else {
+ m->mothurOut("You must provide either a shared, relabund, or metadata file."); m->mothurOutEndLine(); abort = true;
+ }
+ }
+ }
if (metadatafile != "") {
if ((relabundfile != "") || (sharedfile != "")) { m->mothurOut("You may only use one of the following : shared, relabund or metadata file."); m->mothurOutEndLine(); abort = true; }
}
string temp;
temp = validParameter.validFile(parameters, "numaxes", false); if (temp == "not found"){ temp = "3"; }
- convert(temp, numaxes);
+ m->mothurConvert(temp, numaxes);
method = validParameter.validFile(parameters, "method", false); if (method == "not found"){ method = "pearson"; }
}
//**********************************************************************************************************************
-void CorrAxesCommand::help(){
- try {
- m->mothurOut("The corr.axes command reads a shared, relabund or metadata file as well as an axes file and calculates the correlation coefficient.\n");
- m->mothurOut("The corr.axes command parameters are shared, relabund, axes, metadata, groups, method, numaxes and label. The shared, relabund or metadata and axes parameters are required. If shared is given the relative abundance is calculated.\n");
- m->mothurOut("The groups parameter allows you to specify which of the groups you would like included. The group names are separated by dashes.\n");
- m->mothurOut("The label parameter allows you to select what distance level you would like used, if none is given the first distance is used.\n");
- m->mothurOut("The method parameter allows you to select what method you would like to use. Options are pearson, spearman and kendall. Default=pearson.\n");
- m->mothurOut("The numaxes parameter allows you to select the number of axes you would like to use. Default=3.\n");
- m->mothurOut("The corr.axes command should be in the following format: corr.axes(axes=yourPcoaFile, shared=yourSharedFile, method=yourMethod).\n");
- m->mothurOut("Example corr.axes(axes=genus.pool.thetayc.genus.lt.pcoa, shared=genus.pool.shared, method=kendall).\n");
- m->mothurOut("The corr.axes command outputs a .corr.axes file.\n");
- m->mothurOut("Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n\n");
- }
- catch(exception& e) {
- m->errorOut(e, "CorrAxesCommand", "help");
- exit(1);
- }
-}
-
-//**********************************************************************************************************************
-
-CorrAxesCommand::~CorrAxesCommand(){}
-
-//**********************************************************************************************************************
-
int CorrAxesCommand::execute(){
try {
- if (abort == true) { return 0; }
+ if (abort == true) { if (calledHelp) { return 0; } return 2; }
/*************************************************************************************/
// use smart distancing to get right sharedRabund and convert to relabund if needed //
/*************************************************************************************/
// calc the r values //
/************************************************************************************/
-
- string outputFileName = outputDir + m->getRootName(m->getSimpleName(inputFileName)) + method + ".corr.axes";
- outputNames.push_back(outputFileName); outputTypes["corr.axes"].push_back(outputFileName);
+ map<string, string> variables;
+ variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(inputFileName));
+ variables["[tag]"] = method;
+ string outputFileName = getOutputFileName("corraxes", variables);
+ outputNames.push_back(outputFileName); outputTypes["corraxes"].push_back(outputFileName);
ofstream out;
m->openOutputFile(outputFileName, out);
out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
if (metadatafile == "") { out << "OTU"; }
else { out << "Feature"; }
- for (int i = 0; i < numaxes; i++) { out << '\t' << "axis" << (i+1); }
+ for (int i = 0; i < numaxes; i++) { out << '\t' << "axis" << (i+1) << "\tp-value"; }
out << "\tlength" << endl;
if (method == "pearson") { calcPearson(axes, out); }
int CorrAxesCommand::calcPearson(map<string, vector<float> >& axes, ofstream& out) {
try {
+ LinearAlgebra linear;
+
//find average of each axis - X
vector<float> averageAxes; averageAxes.resize(numaxes, 0.0);
for (map<string, vector<float> >::iterator it = axes.begin(); it != axes.end(); it++) {
//for each otu
for (int i = 0; i < lookupFloat[0]->getNumBins(); i++) {
- if (metadatafile == "") { out << i+1; }
+ if (metadatafile == "") { out << m->currentSharedBinLabels[i]; }
else { out << metadataLabels[i]; }
//find the averages this otu - Y
double denom = (sqrt(denomTerm1) * sqrt(denomTerm2));
r = numerator / denom;
+
+ if (isnan(r) || isinf(r)) { r = 0.0; }
+
rValues[k] = r;
out << '\t' << r;
+
+ double sig = linear.calcPearsonSig(lookupFloat.size(), r);
+
+ out << '\t' << sig;
}
double sum = 0;
int CorrAxesCommand::calcSpearman(map<string, vector<float> >& axes, ofstream& out) {
try {
- /*axes.clear();
- axes["1a"].push_back(1);
- axes["1b"].push_back(1);
- axes["1c"].push_back(1);
- axes["2a"].push_back(2);
- axes["2b"].push_back(2);
- axes["2c"].push_back(2);
- axes["3a"].push_back(3);
- axes["3b"].push_back(3);
- axes["3c"].push_back(3);
- axes["4a"].push_back(4);
- axes["4b"].push_back(4);
- axes["4c"].push_back(4);
- axes["5a"].push_back(5);
- axes["5b"].push_back(5);
- axes["5c"].push_back(5);*/
-
+ LinearAlgebra linear;
+ vector<double> sf;
+
//format data
vector< map<float, int> > tableX; tableX.resize(numaxes);
map<float, int>::iterator itTable;
vector<spearmanRank> ties;
int rankTotal = 0;
+ double sfTemp = 0.0;
for (int j = 0; j < scores[i].size(); j++) {
rankTotal += (j+1);
ties.push_back(scores[i][j]);
float thisrank = rankTotal / (float) ties.size();
rankAxes[ties[k].name].push_back(thisrank);
}
+ int t = ties.size();
+ sfTemp += (t*t*t-t);
ties.clear();
rankTotal = 0;
}
}
}
}
+ sf.push_back(sfTemp);
}
- /*cout << endl;
- lookupFloat.clear();
- lookupFloat.resize(15);
- for (int i = 0; i < lookupFloat.size(); i++) {
- lookupFloat[i] = new SharedRAbundFloatVector();
- }
- lookupFloat[0]->push_back(0.2288227, "1a"); lookupFloat[0]->setGroup("1a");
- lookupFloat[1]->push_back(0.7394062, "1b"); lookupFloat[1]->setGroup("1b");
- lookupFloat[2]->push_back(0.4521187, "1c"); lookupFloat[2]->setGroup("1c");
- lookupFloat[3]->push_back(0.1598630, "2a"); lookupFloat[3]->setGroup("2a");
- lookupFloat[4]->push_back(0.09588156, "2b"); lookupFloat[4]->setGroup("2b");
-
- lookupFloat[5]->push_back(0.933174, "2c"); lookupFloat[5]->setGroup("2c");
- lookupFloat[6]->push_back(0.3958304, "3a"); lookupFloat[6]->setGroup("3a");;
- lookupFloat[7]->push_back(0.2364419, "3b"); lookupFloat[7]->setGroup("3b");
- lookupFloat[8]->push_back(0.1697712, "3c"); lookupFloat[8]->setGroup("3c");
- lookupFloat[9]->push_back(0.4077173, "4a"); lookupFloat[9]->setGroup("4a");
-
- lookupFloat[10]->push_back(0.6116547, "4b"); lookupFloat[10]->setGroup("4b");
- lookupFloat[11]->push_back(0.9374322, "4c"); lookupFloat[11]->setGroup("4c");
- lookupFloat[12]->push_back(0.852184, "5a"); lookupFloat[12]->setGroup("5a");
- lookupFloat[13]->push_back(0.845094, "5b"); lookupFloat[13]->setGroup("5b");
- lookupFloat[14]->push_back(0.5795778, "5c"); lookupFloat[14]->setGroup("5c");*/
-
+
//for each otu
for (int i = 0; i < lookupFloat[0]->getNumBins(); i++) {
- if (metadatafile == "") { out << i+1; }
+ if (metadatafile == "") { out << m->currentSharedBinLabels[i]; }
else { out << metadataLabels[i]; }
//find the ranks of this otu - Y
sort(otuScores.begin(), otuScores.end(), compareSpearman);
+ double sg = 0.0;
map<string, float> rankOtus;
vector<spearmanRank> ties;
int rankTotal = 0;
float thisrank = rankTotal / (float) ties.size();
rankOtus[ties[k].name] = thisrank;
}
+ int t = ties.size();
+ sg += (t*t*t-t);
ties.clear();
rankTotal = 0;
}
p = (SX2 + SY2 - di) / (2.0 * sqrt((SX2*SY2)));
+ if (isnan(p) || isinf(p)) { p = 0.0; }
+
out << '\t' << p;
pValues[j] = p;
+
+ double sig = linear.calcSpearmanSig(n, sf[j], sg, di);
+ out << '\t' << sig;
+
}
double sum = 0;
int CorrAxesCommand::calcKendall(map<string, vector<float> >& axes, ofstream& out) {
try {
+ LinearAlgebra linear;
+
//format data
vector< vector<spearmanRank> > scores; scores.resize(numaxes);
for (map<string, vector<float> >::iterator it = axes.begin(); it != axes.end(); it++) {
//for each otu
for (int i = 0; i < lookupFloat[0]->getNumBins(); i++) {
- if (metadatafile == "") { out << i+1; }
+ if (metadatafile == "") { out << m->currentSharedBinLabels[i]; }
else { out << metadataLabels[i]; }
//find the ranks of this otu - Y
}
}
}
+
+
vector<double> pValues(numaxes);
//calc spearman ranks for each axis for this otu
for (int j = 0; j < numaxes; j++) {
- int P = 0;
- //assemble otus ranks in same order as axis ranks
+ int numCoor = 0;
+ int numDisCoor = 0;
+
vector<spearmanRank> otus;
+ vector<spearmanRank> otusTemp;
for (int l = 0; l < scores[j].size(); l++) {
spearmanRank member(scores[j][l].name, rankOtus[scores[j][l].name]);
otus.push_back(member);
}
+ int count = 0;
for (int l = 0; l < scores[j].size(); l++) {
int numWithHigherRank = 0;
+ int numWithLowerRank = 0;
float thisrank = otus[l].score;
- for (int u = l; u < scores[j].size(); u++) {
+ for (int u = l+1; u < scores[j].size(); u++) {
if (otus[u].score > thisrank) { numWithHigherRank++; }
+ else if (otus[u].score < thisrank) { numWithLowerRank++; }
+ count++;
}
- P += numWithHigherRank;
+ numCoor += numWithHigherRank;
+ numDisCoor += numWithLowerRank;
}
- int n = lookupFloat.size();
-
- double p = ( (4 * P) / (float) (n * (n - 1)) ) - 1.0;
-
+ double p = (numCoor - numDisCoor) / (float) count;
+ if (isnan(p) || isinf(p)) { p = 0.0; }
+
out << '\t' << p;
pValues[j] = p;
-
+
+ double sig = linear.calcKendallSig(scores[j].size(), p);
+
+ out << '\t' << sig;
}
double sum = 0;
}
//for each bin
+ vector<string> newBinLabels;
+ string snumBins = toString(thislookup[0]->getNumBins());
for (int i = 0; i < thislookup[0]->getNumBins(); i++) {
if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } return 0; }
for (int j = 0; j < thislookup.size(); j++) {
newLookup[j]->push_back(thislookup[j]->getAbundance(i), thislookup[j]->getGroup());
}
+
+ //if there is a bin label use it otherwise make one
+ string binLabel = "Otu";
+ string sbinNumber = toString(i+1);
+ if (sbinNumber.length() < snumBins.length()) {
+ int diff = snumBins.length() - sbinNumber.length();
+ for (int h = 0; h < diff; h++) { binLabel += "0"; }
+ }
+ binLabel += sbinNumber;
+ if (i < m->currentSharedBinLabels.size()) { binLabel = m->currentSharedBinLabels[i]; }
+
+ newBinLabels.push_back(binLabel);
}
}
for (int j = 0; j < thislookup.size(); j++) { delete thislookup[j]; }
thislookup = newLookup;
+ m->currentSharedBinLabels = newBinLabels;
return 0;
vector<string> groupNames;
ifstream in;
- m->openInputFile(axesfile, in);
+ m->openInputFile(metadatafile, in);
string headerLine = m->getline(in); m->gobble(in);
- istringstream iss (headerLine,istringstream::in);
-
- //read the first label, because it refers to the groups
- string columnLabel;
- iss >> columnLabel; m->gobble(iss);
+ vector<string> pieces = m->splitWhiteSpace(headerLine);
//save names of columns you are reading
- while (!iss.eof()) {
- iss >> columnLabel; m->gobble(iss);
- metadataLabels.push_back(columnLabel);
+ for (int i = 1; i < pieces.size(); i++) {
+ metadataLabels.push_back(pieces[i]);
}
int count = metadataLabels.size();
-
+
//read rest of file
while (!in.eof()) {
string group = "";
in >> group; m->gobble(in);
groupNames.push_back(group);
-
+
SharedRAbundFloatVector* tempLookup = new SharedRAbundFloatVector();
tempLookup->setGroup(group);
tempLookup->setLabel("1");
for (int i = 0; i < count; i++) {
float temp = 0.0;
in >> temp;
-
tempLookup->push_back(temp, group);
}
//remove any groups the user does not want, and set globaldata->groups with only valid groups
SharedUtil* util;
util = new SharedUtil();
-
- util->setGroups(globaldata->Groups, groupNames);
+ Groups = m->getGroups();
+ util->setGroups(Groups, groupNames);
+ m->setGroups(Groups);
for (int i = 0; i < lookupFloat.size(); i++) {
//if this sharedrabund is not from a group the user wants then delete it.
- if (util->isValidGroup(lookupFloat[i]->getGroup(), globaldata->Groups) == false) {
+ if (util->isValidGroup(lookupFloat[i]->getGroup(), m->getGroups()) == false) {
delete lookupFloat[i]; lookupFloat[i] = NULL;
lookupFloat.erase(lookupFloat.begin()+i);
i--;