From: westcott Date: Wed, 9 Feb 2011 17:18:48 +0000 (+0000) Subject: added mantel command X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=commitdiff_plain;h=5553e33be3a45eee6bed2ac9a5c4ca0aa0e8d5e4 added mantel command --- diff --git a/Mothur.xcodeproj/project.pbxproj b/Mothur.xcodeproj/project.pbxproj index 9216cc1..49131e5 100644 --- a/Mothur.xcodeproj/project.pbxproj +++ b/Mothur.xcodeproj/project.pbxproj @@ -287,6 +287,7 @@ A7E9B98D12D37EC400DA6239 /* weighted.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B87C12D37EC400DA6239 /* weighted.cpp */; }; A7E9B98E12D37EC400DA6239 /* weightedlinkage.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B87E12D37EC400DA6239 /* weightedlinkage.cpp */; }; A7E9B98F12D37EC400DA6239 /* whittaker.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B87F12D37EC400DA6239 /* whittaker.cpp */; }; + A7FA10021302E097003860FE /* mantelcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7FA10011302E096003860FE /* mantelcommand.cpp */; }; A7FC480E12D788F20055BC5C /* linearalgebra.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7FC480D12D788F20055BC5C /* linearalgebra.cpp */; }; A7FC486712D795D60055BC5C /* pcacommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7FC486612D795D60055BC5C /* pcacommand.cpp */; }; /* End PBXBuildFile section */ @@ -884,6 +885,8 @@ A7E9B87E12D37EC400DA6239 /* weightedlinkage.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = weightedlinkage.cpp; sourceTree = SOURCE_ROOT; }; A7E9B87F12D37EC400DA6239 /* whittaker.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = whittaker.cpp; sourceTree = ""; }; A7E9B88012D37EC400DA6239 /* whittaker.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = whittaker.h; sourceTree = ""; }; + A7FA10001302E096003860FE /* mantelcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = mantelcommand.h; sourceTree = ""; }; + A7FA10011302E096003860FE /* mantelcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = mantelcommand.cpp; sourceTree = ""; }; A7FC480C12D788F20055BC5C /* linearalgebra.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = linearalgebra.h; sourceTree = ""; }; A7FC480D12D788F20055BC5C /* linearalgebra.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = linearalgebra.cpp; sourceTree = ""; }; A7FC486512D795D60055BC5C /* pcacommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = pcacommand.h; sourceTree = ""; }; @@ -1123,6 +1126,8 @@ A7E9B73C12D37EC400DA6239 /* libshuffcommand.h */, A7E9B73D12D37EC400DA6239 /* listseqscommand.cpp */, A7E9B73E12D37EC400DA6239 /* listseqscommand.h */, + A7FA10001302E096003860FE /* mantelcommand.h */, + A7FA10011302E096003860FE /* mantelcommand.cpp */, A7E9B74312D37EC400DA6239 /* makegroupcommand.cpp */, A7E9B74412D37EC400DA6239 /* makegroupcommand.h */, A7E9B74912D37EC400DA6239 /* matrixoutputcommand.cpp */, @@ -1610,7 +1615,7 @@ attributes = { ORGANIZATIONNAME = "Schloss Lab"; }; - buildConfigurationList = 1DEB928908733DD80010E9CD /* Build configuration list for PBXProject "Mothur" */; + buildConfigurationList = 1DEB928908733DD80010E9CD /* Build configuration list for PBXProject "mothur" */; compatibilityVersion = "Xcode 3.1"; developmentRegion = English; hasScannedForEncodings = 1; @@ -1915,6 +1920,7 @@ 7E6BE10A12F710D8007ADDBE /* refchimeratest.cpp in Sources */, A7A61F2D130062E000E05B6B /* amovacommand.cpp in Sources */, A75790591301749D00A30DAB /* homovacommand.cpp in Sources */, + A7FA10021302E097003860FE /* mantelcommand.cpp in Sources */, ); runOnlyForDeploymentPostprocessing = 0; }; @@ -2030,7 +2036,7 @@ defaultConfigurationIsVisible = 0; defaultConfigurationName = Release; }; - 1DEB928908733DD80010E9CD /* Build configuration list for PBXProject "Mothur" */ = { + 1DEB928908733DD80010E9CD /* Build configuration list for PBXProject "mothur" */ = { isa = XCConfigurationList; buildConfigurations = ( 1DEB928A08733DD80010E9CD /* Debug */, diff --git a/amovacommand.cpp b/amovacommand.cpp index 0a9b53b..e1a161d 100644 --- a/amovacommand.cpp +++ b/amovacommand.cpp @@ -318,6 +318,7 @@ AmovaCommand::AmovaCommand(string option) { void AmovaCommand::help(){ try { + m->mothurOut("Referenced: Anderson MJ (2001). A new method for non-parametric multivariate analysis of variance. Austral Ecol 26: 32-46.\n"); m->mothurOut("The amova command can only be executed after a successful read.otu command of a list and group or shared file, or by providing a phylip formatted distance matrix.\n"); m->mothurOut("The amova command outputs a .amova file. \n"); m->mothurOut("The amova command parameters are phylip, iters, groups, label, design, sets and processors. The design parameter is required.\n"); diff --git a/chimeraslayercommand.cpp b/chimeraslayercommand.cpp index 67b70f2..38c097f 100644 --- a/chimeraslayercommand.cpp +++ b/chimeraslayercommand.cpp @@ -273,6 +273,9 @@ ChimeraSlayerCommand::ChimeraSlayerCommand(string option) { temp = validParameter.validFile(parameters, "trim", false); if (temp == "not found") { temp = "f"; } trim = m->isTrue(temp); + //temp = validParameter.validFile(parameters, "trimera", false); if (temp == "not found") { temp = "f"; } + //trimera = m->isTrue(temp); + search = validParameter.validFile(parameters, "search", false); if (search == "not found") { search = "distance"; } temp = validParameter.validFile(parameters, "iters", false); if (temp == "not found") { temp = "100"; } @@ -309,6 +312,7 @@ void ChimeraSlayerCommand::help(){ m->mothurOut("When using MPI, the processors parameter is set to the number of MPI processes running. \n"); #endif m->mothurOut("The trim parameter allows you to output a new fasta file containing your sequences with the chimeric ones trimmed to include only their longest piece, default=F. \n"); + //m->mothurOut("The trimera parameter allows you to check both peices of a chimeric sequence for chimeras, thus looking for trimeras and quadmeras. default=F. \n"); m->mothurOut("The window parameter allows you to specify the window size for searching for chimeras, default=50. \n"); m->mothurOut("The increment parameter allows you to specify how far you move each window while finding chimeric sequences, default=5.\n"); m->mothurOut("The numwanted parameter allows you to specify how many sequences you would each query sequence compared with, default=15.\n"); @@ -619,6 +623,9 @@ int ChimeraSlayerCommand::driver(linePair* filePos, string outputFName, string f Sequence* trimmed = chimera->print(out, out2); if (trim) { trimmed->printSequence(out3); delete trimmed; } + + //do you want to check both pieces for chimeras + //if (trimera) {} } count++; } @@ -701,6 +708,9 @@ int ChimeraSlayerCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_Fil MPI_File_write_shared(outFastaMPI, buf2, length, MPI_CHAR, &status); delete buf2; } + + //do you want to check both pieces for chimeras + //if (trimera) {} } } diff --git a/chimeraslayercommand.h b/chimeraslayercommand.h index 0de118f..ff19337 100644 --- a/chimeraslayercommand.h +++ b/chimeraslayercommand.h @@ -48,7 +48,7 @@ private: int driverMPI(int, int, MPI_File&, MPI_File&, MPI_File&, MPI_File&, vector&); #endif - bool abort, realign, trim; + bool abort, realign, trim, trimera; string fastafile, templatefile, outputDir, search, namefile, includeAbunds; int processors, window, iters, increment, numwanted, ksize, match, mismatch, parents, minSimilarity, minCoverage, minBS, minSNP, numSeqs, templateSeqsLength; float divR; diff --git a/commandfactory.cpp b/commandfactory.cpp index 924e3f7..525d81b 100644 --- a/commandfactory.cpp +++ b/commandfactory.cpp @@ -111,6 +111,7 @@ #include "mergegroupscommand.h" #include "amovacommand.h" #include "homovacommand.h" +#include "mantelcommand.h" /*******************************************************/ @@ -224,6 +225,7 @@ CommandFactory::CommandFactory(){ commands["remove.rare"] = "remove.rare"; commands["amova"] = "amova"; commands["homova"] = "homova"; + commands["mantel"] = "mantel"; commands["merge.groups"] = "merge.groups"; commands["pairwise.seqs"] = "MPIEnabled"; commands["pipeline.pds"] = "MPIEnabled"; @@ -388,6 +390,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString){ else if(commandName == "merge.groups") { command = new MergeGroupsCommand(optionString); } else if(commandName == "amova") { command = new AmovaCommand(optionString); } else if(commandName == "homova") { command = new HomovaCommand(optionString); } + else if(commandName == "mantel") { command = new MantelCommand(optionString); } else { command = new NoCommand(optionString); } return command; @@ -517,6 +520,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString, str else if(commandName == "merge.groups") { pipecommand = new MergeGroupsCommand(optionString); } else if(commandName == "amova") { pipecommand = new AmovaCommand(optionString); } else if(commandName == "homova") { pipecommand = new HomovaCommand(optionString); } + else if(commandName == "mantel") { pipecommand = new MantelCommand(optionString); } else { pipecommand = new NoCommand(optionString); } return pipecommand; @@ -634,6 +638,7 @@ Command* CommandFactory::getCommand(string commandName){ else if(commandName == "merge.groups") { shellcommand = new MergeGroupsCommand(); } else if(commandName == "amova") { shellcommand = new AmovaCommand(); } else if(commandName == "homova") { shellcommand = new HomovaCommand(); } + else if(commandName == "mantel") { shellcommand = new MantelCommand(); } else { shellcommand = new NoCommand(); } return shellcommand; diff --git a/corraxescommand.cpp b/corraxescommand.cpp index 58fb364..4f8dccb 100644 --- a/corraxescommand.cpp +++ b/corraxescommand.cpp @@ -10,16 +10,6 @@ #include "corraxescommand.h" #include "sharedutilities.h" -//******************************************************************************************************************** -//sorts highest to lowest -inline bool compareSpearman(spearmanRank left, spearmanRank right){ - return (left.score > right.score); -} -//******************************************************************************************************************** -//sorts lowest to highest -inline bool compareSpearmanReverse(spearmanRank left, spearmanRank right){ - return (left.score < right.score); -} //********************************************************************************************************************** vector CorrAxesCommand::getValidParameters(){ try { diff --git a/corraxescommand.h b/corraxescommand.h index 8547b0f..bbe87ab 100644 --- a/corraxescommand.h +++ b/corraxescommand.h @@ -14,14 +14,6 @@ #include "sharedrabundfloatvector.h" #include "inputdata.h" -/***************************************************************/ -struct spearmanRank { - string name; - float score; - - spearmanRank(string n, float s) : name(n), score(s) {} -}; -/***************************************************************/ class CorrAxesCommand : public Command { public: diff --git a/homovacommand.cpp b/homovacommand.cpp index 32a7765..a8e2d13 100644 --- a/homovacommand.cpp +++ b/homovacommand.cpp @@ -318,6 +318,7 @@ HomovaCommand::HomovaCommand(string option) { void HomovaCommand::help(){ try { + m->mothurOut("Referenced: Stewart CN, Excoffier L (1996). Assessing population genetic structure and variability with RAPD data: Application to Vaccinium macrocarpon (American Cranberry). J Evol Biol 9: 153-71.\n"); m->mothurOut("The homova command can only be executed after a successful read.otu command of a list and group or shared file, or by providing a phylip formatted distance matrix.\n"); m->mothurOut("The homova command outputs a .homova file. \n"); m->mothurOut("The homova command parameters are phylip, iters, groups, label, design, sets and processors. The design parameter is required.\n"); diff --git a/linearalgebra.cpp b/linearalgebra.cpp index a2ee221..9308627 100644 --- a/linearalgebra.cpp +++ b/linearalgebra.cpp @@ -379,10 +379,291 @@ double LinearAlgebra::calcPearson(vector< vector >& euclidDists, vector< } catch(exception& e) { - m->errorOut(e, "LinearAlgebra", "calculateEuclidianDistance"); + m->errorOut(e, "LinearAlgebra", "calcPearson"); exit(1); } } /*********************************************************************************************************************************/ +//assumes both matrices are square and the same size +double LinearAlgebra::calcSpearman(vector< vector >& euclidDists, vector< vector >& userDists){ + try { + double r; + + //format data + map tableX; + map::iterator itTable; + vector scores; + + for (int i = 0; i < euclidDists.size(); i++) { + for (int j = 0; j < i; j++) { + spearmanRank member(toString(scores.size()), euclidDists[i][j]); + scores.push_back(member); + + //count number of repeats + itTable = tableX.find(euclidDists[i][j]); + if (itTable == tableX.end()) { + tableX[euclidDists[i][j]] = 1; + }else { + tableX[euclidDists[i][j]]++; + } + } + } + + //sort scores + sort(scores.begin(), scores.end(), compareSpearman); + + //calc LX + double Lx = 0.0; + for (itTable = tableX.begin(); itTable != tableX.end(); itTable++) { + double tx = (double) itTable->second; + Lx += ((pow(tx, 3.0) - tx) / 12.0); + } + + //find ranks of xi + map rankEuclid; + vector ties; + int rankTotal = 0; + for (int j = 0; j < scores.size(); j++) { + rankTotal += (j+1); + ties.push_back(scores[j]); + + if (j != (scores.size()-1)) { // you are not the last so you can look ahead + if (scores[j].score != scores[j+1].score) { // you are done with ties, rank them and continue + + for (int k = 0; k < ties.size(); k++) { + float thisrank = rankTotal / (float) ties.size(); + rankEuclid[ties[k].name] = thisrank; + } + ties.clear(); + rankTotal = 0; + } + }else { // you are the last one + + for (int k = 0; k < ties.size(); k++) { + float thisrank = rankTotal / (float) ties.size(); + rankEuclid[ties[k].name] = thisrank; + } + } + } + + + //format data + map tableY; + scores.clear(); + + for (int i = 0; i < userDists.size(); i++) { + for (int j = 0; j < i; j++) { + spearmanRank member(toString(scores.size()), userDists[i][j]); + scores.push_back(member); + + //count number of repeats + itTable = tableY.find(userDists[i][j]); + if (itTable == tableY.end()) { + tableY[userDists[i][j]] = 1; + }else { + tableY[userDists[i][j]]++; + } + } + } + + //sort scores + sort(scores.begin(), scores.end(), compareSpearman); + + //calc LX + double Ly = 0.0; + for (itTable = tableY.begin(); itTable != tableY.end(); itTable++) { + double ty = (double) itTable->second; + Ly += ((pow(ty, 3.0) - ty) / 12.0); + } + + //find ranks of yi + map rankUser; + ties.clear(); + rankTotal = 0; + for (int j = 0; j < scores.size(); j++) { + rankTotal += (j+1); + ties.push_back(scores[j]); + + if (j != (scores.size()-1)) { // you are not the last so you can look ahead + if (scores[j].score != scores[j+1].score) { // you are done with ties, rank them and continue + + for (int k = 0; k < ties.size(); k++) { + float thisrank = rankTotal / (float) ties.size(); + rankUser[ties[k].name] = thisrank; + } + ties.clear(); + rankTotal = 0; + } + }else { // you are the last one + + for (int k = 0; k < ties.size(); k++) { + float thisrank = rankTotal / (float) ties.size(); + rankUser[ties[k].name] = thisrank; + } + } + } + + + double di = 0.0; + int count = 0; + for (int i = 0; i < userDists.size(); i++) { + for (int j = 0; j < i; j++) { + + float xi = rankEuclid[toString(count)]; + float yi = rankUser[toString(count)]; + + di += ((xi - yi) * (xi - yi)); + + count++; + } + } + + double n = (double) count; + + double SX2 = ((pow(n, 3.0) - n) / 12.0) - Lx; + double SY2 = ((pow(n, 3.0) - n) / 12.0) - Ly; + + r = (SX2 + SY2 - di) / (2.0 * sqrt((SX2*SY2))); + + //divide by zero error + if (isnan(r) || isinf(r)) { r = 0.0; } + + return r; + + } + catch(exception& e) { + m->errorOut(e, "LinearAlgebra", "calcSpearman"); + exit(1); + } +} + +/*********************************************************************************************************************************/ +//assumes both matrices are square and the same size +double LinearAlgebra::calcKendall(vector< vector >& euclidDists, vector< vector >& userDists){ + try { + double r; + + //format data + vector scores; + for (int i = 0; i < euclidDists.size(); i++) { + for (int j = 0; j < i; j++) { + spearmanRank member(toString(scores.size()), euclidDists[i][j]); + scores.push_back(member); + } + } + + //sort scores + sort(scores.begin(), scores.end(), compareSpearman); + + //find ranks of xi + map rankEuclid; + vector ties; + int rankTotal = 0; + for (int j = 0; j < scores.size(); j++) { + rankTotal += (j+1); + ties.push_back(scores[j]); + + if (j != (scores.size()-1)) { // you are not the last so you can look ahead + if (scores[j].score != scores[j+1].score) { // you are done with ties, rank them and continue + + for (int k = 0; k < ties.size(); k++) { + float thisrank = rankTotal / (float) ties.size(); + rankEuclid[ties[k].name] = thisrank; + } + ties.clear(); + rankTotal = 0; + } + }else { // you are the last one + + for (int k = 0; k < ties.size(); k++) { + float thisrank = rankTotal / (float) ties.size(); + rankEuclid[ties[k].name] = thisrank; + } + } + } + + vector scoresUser; + for (int i = 0; i < userDists.size(); i++) { + for (int j = 0; j < i; j++) { + spearmanRank member(toString(scoresUser.size()), userDists[i][j]); + scoresUser.push_back(member); + } + } + + //sort scores + sort(scoresUser.begin(), scoresUser.end(), compareSpearman); + + //find ranks of yi + map rankUser; + ties.clear(); + rankTotal = 0; + for (int j = 0; j < scoresUser.size(); j++) { + rankTotal += (j+1); + ties.push_back(scoresUser[j]); + + if (j != (scoresUser.size()-1)) { // you are not the last so you can look ahead + if (scoresUser[j].score != scoresUser[j+1].score) { // you are done with ties, rank them and continue + + for (int k = 0; k < ties.size(); k++) { + float thisrank = rankTotal / (float) ties.size(); + rankUser[ties[k].name] = thisrank; + } + ties.clear(); + rankTotal = 0; + } + }else { // you are the last one + + for (int k = 0; k < ties.size(); k++) { + float thisrank = rankTotal / (float) ties.size(); + rankUser[ties[k].name] = thisrank; + } + } + } + + int numCoor = 0; + int numDisCoor = 0; + + //order user ranks + vector user; + for (int l = 0; l < scores.size(); l++) { + spearmanRank member(scores[l].name, rankUser[scores[l].name]); + user.push_back(member); + } + + int count = 0; + for (int l = 0; l < scores.size(); l++) { + + int numWithHigherRank = 0; + int numWithLowerRank = 0; + float thisrank = user[l].score; + + for (int u = l; u < scores.size(); u++) { + if (user[u].score > thisrank) { numWithHigherRank++; } + else if (user[u].score < thisrank) { numWithLowerRank++; } + count++; + } + + numCoor += numWithHigherRank; + numDisCoor += numWithLowerRank; + } + + //comparing to yourself + count -= userDists.size(); + + r = (numCoor - numDisCoor) / (float) count; + + //divide by zero error + if (isnan(r) || isinf(r)) { r = 0.0; } + + return r; + + } + catch(exception& e) { + m->errorOut(e, "LinearAlgebra", "calcKendall"); + exit(1); + } +} + +/*********************************************************************************************************************************/ diff --git a/linearalgebra.h b/linearalgebra.h index 70f7699..e1f6f7c 100644 --- a/linearalgebra.h +++ b/linearalgebra.h @@ -12,6 +12,7 @@ #include "mothurout.h" + class LinearAlgebra { public: @@ -24,6 +25,8 @@ public: vector< vector > calculateEuclidianDistance(vector >&, int); //pass in axes and number of dimensions vector< vector > calculateEuclidianDistance(vector >&); //pass in axes double calcPearson(vector >&, vector >&); + double calcSpearman(vector >&, vector >&); + double calcKendall(vector >&, vector >&); private: MothurOut* m; diff --git a/mantelcommand.cpp b/mantelcommand.cpp new file mode 100644 index 0000000..0f6d0d7 --- /dev/null +++ b/mantelcommand.cpp @@ -0,0 +1,322 @@ +/* + * mantelcommand.cpp + * mothur + * + * Created by westcott on 2/9/11. + * Copyright 2011 Schloss Lab. All rights reserved. + * + */ + +#include "mantelcommand.h" +#include "readphylipvector.h" + +//********************************************************************************************************************** +vector MantelCommand::getValidParameters(){ + try { + string Array[] = {"phylip1","phylip2","method","iters","outputdir","inputdir"}; + vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); + return myArray; + } + catch(exception& e) { + m->errorOut(e, "MantelCommand", "getValidParameters"); + exit(1); + } +} +//********************************************************************************************************************** +vector MantelCommand::getRequiredParameters(){ + try { + string Array[] = {"phylip1", "phylip2"}; + vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); + return myArray; + } + catch(exception& e) { + m->errorOut(e, "MantelCommand", "getRequiredParameters"); + exit(1); + } +} +//********************************************************************************************************************** +MantelCommand::MantelCommand(){ + try { + abort = true; calledHelp = true; + vector tempOutNames; + outputTypes["mantel"] = tempOutNames; + } + catch(exception& e) { + m->errorOut(e, "MantelCommand", "MantelCommand"); + exit(1); + } +} + +//********************************************************************************************************************** +vector MantelCommand::getRequiredFiles(){ + try { + vector myArray; + return myArray; + } + catch(exception& e) { + m->errorOut(e, "MantelCommand", "getRequiredFiles"); + exit(1); + } +} +//********************************************************************************************************************** +MantelCommand::MantelCommand(string option) { + try { + abort = false; calledHelp = false; + + //allow user to run help + if(option == "help") { help(); abort = true; calledHelp = true; } + + else { + //valid paramters for this command + string Array[] = {"phylip1","phylip2","method","iters","outputdir","inputdir"}; + vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); + + OptionParser parser(option); + map parameters = parser.getParameters(); + + ValidParameters validParameter; + map::iterator it; + + //check to make sure all parameters are valid for command + for (it = parameters.begin(); it != parameters.end(); it++) { + if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; } + } + + vector tempOutNames; + outputTypes["mantel"] = tempOutNames; + + //if the user changes the input directory command factory will send this info to us in the output parameter + string inputDir = validParameter.validFile(parameters, "inputdir", false); + if (inputDir == "not found"){ inputDir = ""; } + else { + string path; + it = parameters.find("phylip1"); + //user has given a template file + if(it != parameters.end()){ + path = m->hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["phylip1"] = inputDir + it->second; } + } + + it = parameters.find("phylip2"); + //user has given a template file + if(it != parameters.end()){ + path = m->hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["phylip2"] = inputDir + it->second; } + } + } + + + //check for required parameters + phylipfile1 = validParameter.validFile(parameters, "phylip1", true); + if (phylipfile1 == "not open") { phylipfile1 = ""; abort = true; } + else if (phylipfile1 == "not found") { phylipfile1 = ""; m->mothurOut("phylip1 is a required parameter for the mantel command."); m->mothurOutEndLine(); abort = true; } + + phylipfile2 = validParameter.validFile(parameters, "phylip2", true); + if (phylipfile2 == "not open") { phylipfile2 = ""; abort = true; } + else if (phylipfile2 == "not found") { phylipfile2 = ""; m->mothurOut("phylip2 is a required parameter for the mantel command."); m->mothurOutEndLine(); abort = true; } + + outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = m->hasPath(phylipfile1); } + + method = validParameter.validFile(parameters, "method", false); if (method == "not found"){ method = "pearson"; } + + string temp = validParameter.validFile(parameters, "iters", false); if (temp == "not found") { temp = "1000"; } + convert(temp, iters); + + if ((method != "pearson") && (method != "spearman") && (method != "kendall")) { m->mothurOut(method + " is not a valid method. Valid methods are pearson, spearman, and kendall."); m->mothurOutEndLine(); abort = true; } + } + } + catch(exception& e) { + m->errorOut(e, "MantelCommand", "MantelCommand"); + exit(1); + } +} +//********************************************************************************************************************** + +void MantelCommand::help(){ + try { + m->mothurOut("The mantel command reads two distance matrices and calculates the mantel correlation coefficient.\n"); + m->mothurOut("The mantel command parameters are phylip1, phylip2 and method. The phylip1 and phylip2 parameters are required. Matrices must be the same size and contain the same names.\n"); + m->mothurOut("The method parameter allows you to select what method you would like to use. Options are pearson, spearman and kendall. Default=pearson.\n"); + m->mothurOut("The mantel command should be in the following format: mantel(phylip1=veg.dist, phylip2=env.dist).\n"); + m->mothurOut("The mantel command outputs a .mantel file.\n"); + m->mothurOut("Note: No spaces between parameter labels (i.e. phylip1), '=' and parameters (i.e. veg.dist).\n\n"); + } + catch(exception& e) { + m->errorOut(e, "MantelCommand", "help"); + exit(1); + } +} + +//********************************************************************************************************************** + +MantelCommand::~MantelCommand(){} + +//********************************************************************************************************************** + +int MantelCommand::execute(){ + try { + + if (abort == true) { if (calledHelp) { return 0; } return 2; } + + /***************************************************/ + // reading distance files // + /***************************************************/ + + //read phylip1 + ReadPhylipVector readMatrix(phylipfile1); + vector< vector > matrix1; + vector names1 = readMatrix.read(matrix1); + + if (m->control_pressed) { return 0; } + + //read phylip2 + ReadPhylipVector readMatrix2(phylipfile2); + vector temp; //seqDist - int, int, float + vector names2 = readMatrix2.read(temp); + + if (m->control_pressed) { return 0; } + + //fill matrix2 making sure to make sure the distances are in the same order as matrix1 + vector< vector > matrix2; + if (names1 == names2) { //then everything is in same order and same size + + //initialize space + matrix2.resize(names2.size()); + for (int i = 0; i < matrix2.size(); i++) { matrix2[i].resize(names2.size(), 0.0); } + + //fill matrix2 + for (int i = 0; i < temp.size(); i++) { + matrix2[temp[i].seq1][temp[i].seq2] = temp[i].dist; + matrix2[temp[i].seq2][temp[i].seq1] = temp[i].dist; + } + + }else if (names1.size() != names2.size()) { //wrong size no need to order, abort + m->mothurOut("[ERROR]: distance matrices are not the same size, aborting."); m->mothurOutEndLine(); + m->control_pressed = true; + }else { //sizes are the same, but either the names are different or they are in different order + + //map location of name in names1 to location of name in names2 + map names1Map; + map::iterator it; + for (int i = 0; i < names1.size(); i++) { names1Map[names1[i]] = i; } + + map names2Map; + bool nameError = false; + for (int i = 0; i < names2.size(); i++) { + + //if you find one name error stop looking + if (!nameError) { + it = names1Map.find(names2[i]); + if (it == names1Map.end()) { nameError = true; } + } + + //are names different + names2Map[names2[i]] = i; + } + + //initialize space + matrix2.resize(names2.size()); + for (int i = 0; i < matrix2.size(); i++) { matrix2[i].resize(names2.size(), 0.0); } + + //fill matrix2 + //are we comparing apples to apples? + if (nameError) { + m->mothurOut("[WARNING]: Names do not match between distance files. Comparing based on order in files."); m->mothurOutEndLine(); + + for (int i = 0; i < temp.size(); i++) { + matrix2[temp[i].seq1][temp[i].seq2] = temp[i].dist; + matrix2[temp[i].seq2][temp[i].seq1] = temp[i].dist; + } + + }else { //no name error just different orders, so reorder + + for (int i = 0; i < temp.size(); i++) { + + //what's the location of this distance comparison in matrix1 + string matrix2NameI = names2[temp[i].seq1]; + string matrix2NameJ = names2[temp[i].seq2]; + int locationI = names1Map[matrix2NameI]; + int locationJ = names1Map[matrix2NameJ]; + + matrix2[locationI][locationJ] = temp[i].dist; + matrix2[locationJ][locationI] = temp[i].dist; + } + } + + } + + //frees up space + temp.clear(); + + if (m->control_pressed) { return 0; } + + /***************************************************/ + // calculating mantel and signifigance // + /***************************************************/ + + //calc mantel coefficient + LinearAlgebra linear; + double mantel = 0.0; + if (method == "pearson") { mantel = linear.calcPearson(matrix1, matrix2); } + else if (method == "spearman") { mantel = linear.calcSpearman(matrix1, matrix2); } + else if (method == "kendall") { mantel = linear.calcKendall(matrix1, matrix2); } + + + //calc signifigance + int count = 0; + for (int i = 0; i < iters; i++) { + + if (m->control_pressed) { return 0; } + + //randomize matrix2 + vector< vector > matrix2Copy = matrix2; + random_shuffle(matrix2Copy.begin(), matrix2Copy.end()); + + //calc random mantel + double randomMantel = 0.0; + if (method == "pearson") { randomMantel = linear.calcPearson(matrix1, matrix2Copy); } + else if (method == "spearman") { randomMantel = linear.calcSpearman(matrix1, matrix2Copy); } + else if (method == "kendall") { randomMantel = linear.calcKendall(matrix1, matrix2Copy); } + + if (randomMantel >= mantel) { count++; } + } + + double pValue = count / (float) iters; + + if (m->control_pressed) { return 0; } + + string outputFile = outputDir + m->getRootName(m->getSimpleName(phylipfile1)) + "mantel"; + outputNames.push_back(outputFile); outputTypes["mantel"].push_back(outputFile); + ofstream out; + + m->openOutputFile(outputFile, out); + + out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint); + cout.setf(ios::fixed, ios::floatfield); cout.setf(ios::showpoint); + + out << "Mantel\tpValue" << endl; + out << mantel << '\t' << pValue << endl; + + out.close(); + + cout << "\nmantel = " << mantel << "\tpValue = " << pValue << endl; + m->mothurOutJustToLog("\nmantel = " + toString(mantel) + "\tpValue = " + toString(pValue) + "\n"); + + m->mothurOutEndLine(); + m->mothurOut("Output File Names: "); m->mothurOutEndLine(); + for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } + m->mothurOutEndLine(); + + return 0; + } + catch(exception& e) { + m->errorOut(e, "MantelCommand", "execute"); + exit(1); + } +} + +//********************************************************************************************************************** + + diff --git a/mantelcommand.h b/mantelcommand.h new file mode 100644 index 0000000..0646b29 --- /dev/null +++ b/mantelcommand.h @@ -0,0 +1,42 @@ +#ifndef MANTELCOMMAND_H +#define MANTELCOMMAND_H + +/* + * mantelcommand.h + * mothur + * + * Created by westcott on 2/9/11. + * Copyright 2011 Schloss Lab. All rights reserved. + * + */ + +#include "command.hpp" +#include "linearalgebra.h" + +class MantelCommand : public Command { +public: + MantelCommand(string); + MantelCommand(); + ~MantelCommand(); + vector getRequiredParameters(); + vector getValidParameters(); + vector getRequiredFiles(); + map > getOutputFiles() { return outputTypes; } + int execute(); + void help(); + +private: + + string phylipfile1, phylipfile2, outputDir, method; + bool abort; + int iters; + + vector outputNames; + map > outputTypes; +}; + + +#endif + + + diff --git a/mothur.h b/mothur.h index b0ad51e..6e5833f 100644 --- a/mothur.h +++ b/mothur.h @@ -124,6 +124,23 @@ struct distlinePair { int end; }; +/***************************************************************/ +struct spearmanRank { + string name; + float score; + + spearmanRank(string n, float s) : name(n), score(s) {} +}; +//******************************************************************************************************************** +//sorts highest to lowest +inline bool compareSpearman(spearmanRank left, spearmanRank right){ + return (left.score > right.score); +} +//******************************************************************************************************************** +//sorts lowest to highest +inline bool compareSpearmanReverse(spearmanRank left, spearmanRank right){ + return (left.score < right.score); +} /************************************************************/ //sorts lowest to highest inline bool compareDistLinePairs(distlinePair left, distlinePair right){