From d1c97b8c04bb75faca1e76ffad60b37a4d789d3d Mon Sep 17 00:00:00 2001 From: Sarah Westcott Date: Wed, 2 Oct 2013 07:23:27 -0400 Subject: [PATCH] changes while testing --- Mothur.xcodeproj/project.pbxproj | 16 +- chao1.cpp | 2 +- classifyrfsharedcommand.cpp | 8 +- classifyrfsharedcommand.h | 4 +- classifysharedcommand.cpp | 407 ------------------------------- classifysharedcommand.h | 54 ---- commandfactory.cpp | 8 +- kruskalwalliscommand.cpp | 14 +- kruskalwalliscommand.h | 2 +- lefsecommand.cpp | 45 ++-- lefsecommand.h | 4 +- linearalgebra.cpp | 27 +- makebiomcommand.cpp | 55 ++++- makebiomcommand.h | 6 +- makefile | 4 +- makelefsecommand.cpp | 4 +- renameseqscommand.cpp | 2 +- screenseqscommand.cpp | 5 +- setlogfilecommand.cpp | 2 +- sharedjest.cpp | 6 + 20 files changed, 120 insertions(+), 555 deletions(-) delete mode 100755 classifysharedcommand.cpp delete mode 100755 classifysharedcommand.h diff --git a/Mothur.xcodeproj/project.pbxproj b/Mothur.xcodeproj/project.pbxproj index 8304f07..4fb82f5 100644 --- a/Mothur.xcodeproj/project.pbxproj +++ b/Mothur.xcodeproj/project.pbxproj @@ -31,7 +31,6 @@ A721AB77161C573B009860A1 /* taxonomynode.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A721AB73161C573B009860A1 /* taxonomynode.cpp */; }; A724D2B7153C8628000A826F /* makebiomcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A724D2B6153C8628000A826F /* makebiomcommand.cpp */; }; A727864412E9E28C00F86ABA /* removerarecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A727864312E9E28C00F86ABA /* removerarecommand.cpp */; }; - A7386C231619CCE600651424 /* classifysharedcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7386C211619CCE600651424 /* classifysharedcommand.cpp */; }; A7386C251619E52300651424 /* abstractdecisiontree.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7386C241619E52200651424 /* abstractdecisiontree.cpp */; }; A7386C29161A110800651424 /* decisiontree.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7386C28161A110700651424 /* decisiontree.cpp */; }; A73901081588C40900ED2ED6 /* loadlogfilecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A73901071588C40900ED2ED6 /* loadlogfilecommand.cpp */; }; @@ -346,6 +345,7 @@ A7E9B98E12D37EC400DA6239 /* weightedlinkage.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B87E12D37EC400DA6239 /* weightedlinkage.cpp */; }; A7E9B98F12D37EC400DA6239 /* whittaker.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B87F12D37EC400DA6239 /* whittaker.cpp */; }; A7EEB0F514F29BFE00344B83 /* classifytreecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7EEB0F414F29BFD00344B83 /* classifytreecommand.cpp */; }; + A7F24FC317EA36600021DC9A /* classifyrfsharedcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7F24FC117EA365F0021DC9A /* classifyrfsharedcommand.cpp */; }; A7F9F5CF141A5E500032F693 /* sequenceparser.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7F9F5CE141A5E500032F693 /* sequenceparser.cpp */; }; A7FA10021302E097003860FE /* mantelcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7FA10011302E096003860FE /* mantelcommand.cpp */; }; A7FA2AC714A0E881007C09A6 /* bsplvb.f in Sources */ = {isa = PBXBuildFile; fileRef = A7FA2ABC14A0E881007C09A6 /* bsplvb.f */; }; @@ -448,8 +448,6 @@ A7386C1E1619CACB00651424 /* macros.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = macros.h; sourceTree = ""; }; A7386C1F1619CACB00651424 /* randomforest.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = randomforest.hpp; sourceTree = ""; }; A7386C201619CACB00651424 /* rftreenode.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = rftreenode.hpp; sourceTree = ""; }; - A7386C211619CCE600651424 /* classifysharedcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = classifysharedcommand.cpp; sourceTree = ""; }; - A7386C221619CCE600651424 /* classifysharedcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = classifysharedcommand.h; sourceTree = ""; }; A7386C241619E52200651424 /* abstractdecisiontree.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = abstractdecisiontree.cpp; sourceTree = ""; }; A7386C28161A110700651424 /* decisiontree.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = decisiontree.cpp; sourceTree = ""; }; A73901051588C3EF00ED2ED6 /* loadlogfilecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = loadlogfilecommand.h; sourceTree = ""; }; @@ -1096,6 +1094,8 @@ A7E9B88012D37EC400DA6239 /* whittaker.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = whittaker.h; sourceTree = ""; }; A7EEB0F414F29BFD00344B83 /* classifytreecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = classifytreecommand.cpp; sourceTree = ""; }; A7EEB0F714F29C1B00344B83 /* classifytreecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = classifytreecommand.h; sourceTree = ""; }; + A7F24FC117EA365F0021DC9A /* classifyrfsharedcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = classifyrfsharedcommand.cpp; sourceTree = ""; }; + A7F24FC217EA365F0021DC9A /* classifyrfsharedcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = classifyrfsharedcommand.h; sourceTree = ""; }; A7F9F5CD141A5E500032F693 /* sequenceparser.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = sequenceparser.h; sourceTree = ""; }; A7F9F5CE141A5E500032F693 /* sequenceparser.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = sequenceparser.cpp; sourceTree = ""; }; A7FA10001302E096003860FE /* mantelcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = mantelcommand.h; sourceTree = ""; }; @@ -1351,8 +1351,8 @@ A7E9B69012D37EC400DA6239 /* classifyotucommand.cpp */, A7E9B69312D37EC400DA6239 /* classifyseqscommand.h */, A7E9B69212D37EC400DA6239 /* classifyseqscommand.cpp */, - A7386C221619CCE600651424 /* classifysharedcommand.h */, - A7386C211619CCE600651424 /* classifysharedcommand.cpp */, + A7F24FC217EA365F0021DC9A /* classifyrfsharedcommand.h */, + A7F24FC117EA365F0021DC9A /* classifyrfsharedcommand.cpp */, A7EEB0F714F29C1B00344B83 /* classifytreecommand.h */, A7EEB0F414F29BFD00344B83 /* classifytreecommand.cpp */, A7E9B69712D37EC400DA6239 /* clearcutcommand.h */, @@ -2364,7 +2364,6 @@ A7E0243D15B4520A00A5F046 /* sparsedistancematrix.cpp in Sources */, A741FAD215D1688E0067BCC5 /* sequencecountparser.cpp in Sources */, A7C7DAB915DA758B0059B0CF /* sffmultiplecommand.cpp in Sources */, - A7386C231619CCE600651424 /* classifysharedcommand.cpp in Sources */, A7386C251619E52300651424 /* abstractdecisiontree.cpp in Sources */, A7386C29161A110800651424 /* decisiontree.cpp in Sources */, A77E1938161B201E00DB1A2A /* randomforest.cpp in Sources */, @@ -2394,6 +2393,7 @@ A7190B221768E0DF00A9AFA6 /* lefsecommand.cpp in Sources */, A77916E8176F7F7600EEFE18 /* designmap.cpp in Sources */, A7D9378A17B146B5001E90B0 /* wilcox.cpp in Sources */, + A7F24FC317EA36600021DC9A /* classifyrfsharedcommand.cpp in Sources */, ); runOnlyForDeploymentPostprocessing = 0; }; @@ -2483,8 +2483,8 @@ GCC_MODEL_TUNING = ""; GCC_OPTIMIZATION_LEVEL = s; GCC_PREPROCESSOR_DEFINITIONS = ( - "VERSION=\"\\\"1.31.0\\\"\"", - "RELEASE_DATE=\"\\\"5/24/2013\\\"\"", + "VERSION=\"\\\"1.32.0\\\"\"", + "RELEASE_DATE=\"\\\"10/01/2013\\\"\"", ); GCC_VERSION = ""; GCC_WARN_ABOUT_MISSING_NEWLINE = YES; diff --git a/chao1.cpp b/chao1.cpp index ad1faa2..d7ec7ea 100644 --- a/chao1.cpp +++ b/chao1.cpp @@ -29,7 +29,7 @@ EstOutput Chao1::getValues(SAbundVector* rank){ }else{ doubles = 0.0; } double chaovar = 0.0000; -//cout << "singles = " << singles << " doubles = " << doubles << " sobs = " << sobs << endl; +//cout << "singles = " << singles << " doubles = " << doubles << " sobs = " << sobs << endl; double chao = sobs + singles*(singles-1)/(2*(doubles+1)); if(singles > 0 && doubles > 0){ diff --git a/classifyrfsharedcommand.cpp b/classifyrfsharedcommand.cpp index d2cd9f9..efee5e3 100755 --- a/classifyrfsharedcommand.cpp +++ b/classifyrfsharedcommand.cpp @@ -47,12 +47,12 @@ vector ClassifyRFSharedCommand::setParameters(){ string ClassifyRFSharedCommand::getHelpString(){ try { string helpString = ""; - helpString += "The classify.shared command allows you to ....\n"; - helpString += "The classify.shared command parameters are: shared, design, label, groups, otupersplit.\n"; + helpString += "The classify.rf command allows you to ....\n"; + helpString += "The classify.rf command parameters are: shared, design, label, groups, otupersplit.\n"; helpString += "The label parameter is used to analyze specific labels in your input.\n"; helpString += "The groups parameter allows you to specify which of the groups in your designfile you would like analyzed.\n"; - helpString += "The classify.shared should be in the following format: \n"; - helpString += "classify.shared(shared=yourSharedFile, design=yourDesignFile)\n"; + helpString += "The classify.rf should be in the following format: \n"; + helpString += "classify.rf(shared=yourSharedFile, design=yourDesignFile)\n"; return helpString; } catch(exception& e) { diff --git a/classifyrfsharedcommand.h b/classifyrfsharedcommand.h index 6a948b2..7b6ca13 100755 --- a/classifyrfsharedcommand.h +++ b/classifyrfsharedcommand.h @@ -19,11 +19,11 @@ public: ~ClassifyRFSharedCommand() {}; vector setParameters(); - string getCommandName() { return "classifyrf.shared"; } + string getCommandName() { return "classify.rf"; } string getCommandCategory() { return "OTU-Based Approaches"; } string getHelpString(); string getOutputPattern(string); - string getCitation() { return "http://www.mothur.org/wiki/Classifyrf.shared\n"; } + string getCitation() { return "http://www.mothur.org/wiki/Classify.rf\n"; } string getDescription() { return "implements the random forest machine learning algorithm to identify OTUs that can be used to differentiate between various groups of samples"; } int execute(); diff --git a/classifysharedcommand.cpp b/classifysharedcommand.cpp deleted file mode 100755 index c7eb6cd..0000000 --- a/classifysharedcommand.cpp +++ /dev/null @@ -1,407 +0,0 @@ -// -// classifysharedcommand.cpp -// Mothur -// -// Created by Abu Zaher Md. Faridee on 8/13/12. -// Copyright (c) 2012 Schloss Lab. All rights reserved. -// - -#include "classifysharedcommand.h" -#include "randomforest.hpp" -#include "decisiontree.hpp" -#include "rftreenode.hpp" - -//********************************************************************************************************************** -vector ClassifySharedCommand::setParameters(){ - try { - //CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors); - CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","summary",false,true,true); parameters.push_back(pshared); - CommandParameter pdesign("design", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(pdesign); - CommandParameter potupersplit("otupersplit", "Multiple", "log2-squareroot", "log2", "", "", "","",false,false); parameters.push_back(potupersplit); - CommandParameter psplitcriteria("splitcriteria", "Multiple", "gainratio-infogain", "gainratio", "", "", "","",false,false); parameters.push_back(psplitcriteria); - CommandParameter pnumtrees("numtrees", "Number", "", "100", "", "", "","",false,false); parameters.push_back(pnumtrees); - - // parameters related to pruning - CommandParameter pdopruning("prune", "Boolean", "", "T", "", "", "", "", false, false); parameters.push_back(pdopruning); - CommandParameter ppruneaggrns("pruneaggressiveness", "Number", "", "0.9", "", "", "", "", false, false); parameters.push_back(ppruneaggrns); - CommandParameter pdiscardhetrees("discarderrortrees", "Boolean", "", "T", "", "", "", "", false, false); parameters.push_back(pdiscardhetrees); - CommandParameter phetdiscardthreshold("errorthreshold", "Number", "", "0.4", "", "", "", "", false, false); parameters.push_back(phetdiscardthreshold); - CommandParameter psdthreshold("stdthreshold", "Number", "", "0.0", "", "", "", "", false, false); parameters.push_back(psdthreshold); - // pruning params end - - CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups); - CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel); - CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); - CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); - - vector myArray; - for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } - return myArray; - } - catch(exception& e) { - m->errorOut(e, "ClassifySharedCommand", "setParameters"); - exit(1); - } -} -//********************************************************************************************************************** -string ClassifySharedCommand::getHelpString(){ - try { - string helpString = ""; - helpString += "The classify.shared command allows you to ....\n"; - helpString += "The classify.shared command parameters are: shared, design, label, groups, otupersplit.\n"; - helpString += "The label parameter is used to analyze specific labels in your input.\n"; - helpString += "The groups parameter allows you to specify which of the groups in your designfile you would like analyzed.\n"; - helpString += "The classify.shared should be in the following format: \n"; - helpString += "classify.shared(shared=yourSharedFile, design=yourDesignFile)\n"; - return helpString; - } - catch(exception& e) { - m->errorOut(e, "ClassifySharedCommand", "getHelpString"); - exit(1); - } -} -//********************************************************************************************************************** -string ClassifySharedCommand::getOutputPattern(string type) { - try { - string pattern = ""; - - if (type == "summary") { pattern = "[filename],[distance],summary"; } //makes file like: amazon.0.03.fasta - else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; } - - return pattern; - } - catch(exception& e) { - m->errorOut(e, "ClassifySharedCommand", "getOutputPattern"); - exit(1); - } -} -//********************************************************************************************************************** - -ClassifySharedCommand::ClassifySharedCommand() { - try { - abort = true; calledHelp = true; - setParameters(); - vector tempOutNames; - outputTypes["summary"] = tempOutNames; - } - catch(exception& e) { - m->errorOut(e, "ClassifySharedCommand", "ClassifySharedCommand"); - exit(1); - } -} - -//********************************************************************************************************************** -ClassifySharedCommand::ClassifySharedCommand(string option) { - try { - abort = false; calledHelp = false; - allLines = 1; - - //allow user to run help - if(option == "help") { help(); abort = true; calledHelp = true; } - else if(option == "citation") { citation(); abort = true; calledHelp = true;} - - else { - //valid paramters for this command - vector myArray = setParameters(); - - OptionParser parser(option); - map parameters = parser.getParameters(); - - ValidParameters validParameter; - map::iterator it; - //check to make sure all parameters are valid for command - for (it = parameters.begin(); it != parameters.end(); it++) { - if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; } - } - vector tempOutNames; - outputTypes["summary"] = tempOutNames; - - //if the user changes the input directory command factory will send this info to us in the output parameter - string inputDir = validParameter.validFile(parameters, "inputdir", false); - if (inputDir == "not found"){ inputDir = ""; } - else { - string path; - it = parameters.find("shared"); - //user has given a shared file - if(it != parameters.end()){ - path = m->hasPath(it->second); - //if the user has not given a path then, add inputdir. else leave path alone. - if (path == "") { parameters["shared"] = inputDir + it->second; } - } - - it = parameters.find("design"); - //user has given a design file - if(it != parameters.end()){ - path = m->hasPath(it->second); - //if the user has not given a path then, add inputdir. else leave path alone. - if (path == "") { parameters["design"] = inputDir + it->second; } - } - - } - //check for parameters - //get shared file, it is required - sharedfile = validParameter.validFile(parameters, "shared", true); - if (sharedfile == "not open") { sharedfile = ""; abort = true; } - else if (sharedfile == "not found") { - //if there is a current shared file, use it - sharedfile = m->getSharedFile(); - if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); } - else { m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; } - }else { m->setSharedFile(sharedfile); } - - //get design file, it is required - designfile = validParameter.validFile(parameters, "design", true); - if (designfile == "not open") { sharedfile = ""; abort = true; } - else if (designfile == "not found") { - //if there is a current shared file, use it - designfile = m->getDesignFile(); - if (designfile != "") { m->mothurOut("Using " + designfile + " as input file for the design parameter."); m->mothurOutEndLine(); } - else { m->mothurOut("You have no current designfile and the design parameter is required."); m->mothurOutEndLine(); abort = true; } - }else { m->setDesignFile(designfile); } - - - //if the user changes the output directory command factory will send this info to us in the output parameter - outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ - outputDir = m->hasPath(sharedfile); //if user entered a file with a path then preserve it - } - - // NEW CODE for OTU per split selection criteria - string temp = validParameter.validFile(parameters, "splitcriteria", false); - if (temp == "not found") { temp = "gainratio"; } - if ((temp == "gainratio") || (temp == "infogain")) { - treeSplitCriterion = temp; - } else { m->mothurOut("Not a valid tree splitting criterio. Valid tree splitting criteria are 'gainratio' and 'infogain'."); - m->mothurOutEndLine(); - abort = true; - } - - temp = validParameter.validFile(parameters, "numtrees", false); if (temp == "not found"){ temp = "100"; } - m->mothurConvert(temp, numDecisionTrees); - - // parameters for pruning - temp = validParameter.validFile(parameters, "prune", false); - if (temp == "not found") { temp = "f"; } - doPruning = m->isTrue(temp); - - temp = validParameter.validFile(parameters, "pruneaggressiveness", false); - if (temp == "not found") { temp = "0.9"; } - m->mothurConvert(temp, pruneAggressiveness); - - temp = validParameter.validFile(parameters, "discarderrortrees", false); - if (temp == "not found") { temp = "f"; } - discardHighErrorTrees = m->isTrue(temp); - - temp = validParameter.validFile(parameters, "errorthreshold", false); - if (temp == "not found") { temp = "0.4"; } - m->mothurConvert(temp, highErrorTreeDiscardThreshold); - - temp = validParameter.validFile(parameters, "otupersplit", false); - if (temp == "not found") { temp = "log2"; } - if ((temp == "squareroot") || (temp == "log2")) { - optimumFeatureSubsetSelectionCriteria = temp; - } else { m->mothurOut("Not a valid OTU per split selection method. Valid OTU per split selection methods are 'log2' and 'squareroot'."); - m->mothurOutEndLine(); - abort = true; - } - - temp = validParameter.validFile(parameters, "stdthreshold", false); - if (temp == "not found") { temp = "0.0"; } - m->mothurConvert(temp, featureStandardDeviationThreshold); - - // end of pruning params - - //Groups must be checked later to make sure they are valid. SharedUtilities has functions of check the validity, just make to so m->setGroups() after the checks. If you are using these with a shared file no need to check the SharedRAbundVector class will call SharedUtilites for you, kinda nice, huh? - string groups = validParameter.validFile(parameters, "groups", false); - if (groups == "not found") { groups = ""; } - else { m->splitAtDash(groups, Groups); } - m->setGroups(Groups); - - //Commonly used to process list, rabund, sabund, shared and relabund files. Look at "smart distancing" examples below in the execute function. - string label = validParameter.validFile(parameters, "label", false); - if (label == "not found") { label = ""; } - else { - if(label != "all") { m->splitAtDash(label, labels); allLines = 0; } - else { allLines = 1; } - } - } - - } - catch(exception& e) { - m->errorOut(e, "ClassifySharedCommand", "ClassifySharedCommand"); - exit(1); - } -} -//********************************************************************************************************************** -int ClassifySharedCommand::execute() { - try { - - if (abort == true) { if (calledHelp) { return 0; } return 2; } - - InputData input(sharedfile, "sharedfile"); - vector lookup = input.getSharedRAbundVectors(); - - //read design file - designMap.readDesignMap(designfile); - - string lastLabel = lookup[0]->getLabel(); - set processedLabels; - set userLabels = labels; - - //as long as you are not at the end of the file or done wih the lines you want - while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) { - - if (m->control_pressed) { for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } return 0; } - - if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){ - - m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine(); - - processSharedAndDesignData(lookup); - - processedLabels.insert(lookup[0]->getLabel()); - userLabels.erase(lookup[0]->getLabel()); - } - - if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) { - string saveLabel = lookup[0]->getLabel(); - - for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } - lookup = input.getSharedRAbundVectors(lastLabel); - m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine(); - processSharedAndDesignData(lookup); - - processedLabels.insert(lookup[0]->getLabel()); - userLabels.erase(lookup[0]->getLabel()); - - //restore real lastlabel to save below - lookup[0]->setLabel(saveLabel); - } - - lastLabel = lookup[0]->getLabel(); - //prevent memory leak - for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; lookup[i] = NULL; } - - if (m->control_pressed) { return 0; } - - //get next line to process - lookup = input.getSharedRAbundVectors(); - } - - if (m->control_pressed) { return 0; } - - //output error messages about any remaining user labels - set::iterator it; - bool needToRun = false; - for (it = userLabels.begin(); it != userLabels.end(); it++) { - m->mothurOut("Your file does not include the label " + *it); - if (processedLabels.count(lastLabel) != 1) { - m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine(); - needToRun = true; - }else { - m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine(); - } - } - - //run last label if you need to - if (needToRun == true) { - for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } } - lookup = input.getSharedRAbundVectors(lastLabel); - - m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine(); - - processSharedAndDesignData(lookup); - - for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } - - } - - m->mothurOutEndLine(); - m->mothurOut("Output File Names: "); m->mothurOutEndLine(); - for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } - m->mothurOutEndLine(); - - return 0; - - } - catch(exception& e) { - m->errorOut(e, "ClassifySharedCommand", "execute"); - exit(1); - } -} -//********************************************************************************************************************** - -void ClassifySharedCommand::processSharedAndDesignData(vector lookup){ - try { -// for (int i = 0; i < designMap->getNamesOfGroups().size(); i++) { -// string groupName = designMap->getNamesOfGroups()[i]; -// cout << groupName << endl; -// } - -// for (int i = 0; i < designMap->getNumSeqs(); i++) { -// string sharedGroupName = designMap->getNamesSeqs()[i]; -// string treatmentName = designMap->getGroup(sharedGroupName); -// cout << sharedGroupName << " : " << treatmentName << endl; -// } - - map treatmentToIntMap; - map intToTreatmentMap; - for (int i = 0; i < designMap.getNumGroups(); i++) { - string treatmentName = designMap.getNamesOfGroups()[i]; - treatmentToIntMap[treatmentName] = i; - intToTreatmentMap[i] = treatmentName; - } - - int numSamples = lookup.size(); - int numFeatures = lookup[0]->getNumBins(); - - int numRows = numSamples; - int numColumns = numFeatures + 1; // extra one space needed for the treatment/outcome - - vector< vector > dataSet(numRows, vector(numColumns, 0)); - - vector names; - - for (int i = 0; i < lookup.size(); i++) { - string sharedGroupName = lookup[i]->getGroup(); - names.push_back(sharedGroupName); - string treatmentName = designMap.getGroup(sharedGroupName); - - int j = 0; - for (; j < lookup[i]->getNumBins(); j++) { - int otuCount = lookup[i]->getAbundance(j); - dataSet[i][j] = otuCount; - } - dataSet[i][j] = treatmentToIntMap[treatmentName]; - } - - RandomForest randomForest(dataSet, numDecisionTrees, treeSplitCriterion, doPruning, pruneAggressiveness, discardHighErrorTrees, highErrorTreeDiscardThreshold, optimumFeatureSubsetSelectionCriteria, featureStandardDeviationThreshold); - - randomForest.populateDecisionTrees(); - randomForest.calcForrestErrorRate(); - randomForest.printConfusionMatrix(intToTreatmentMap); - - map variables; - variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + "RF."; - variables["[distance]"] = lookup[0]->getLabel(); - string filename = getOutputFileName("summary", variables); - outputNames.push_back(filename); outputTypes["summary"].push_back(filename); - randomForest.calcForrestVariableImportance(filename); - - // - map variable; - variable["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + "misclassifications."; - variable["[distance]"] = lookup[0]->getLabel(); - string mc_filename = getOutputFileName("summary", variable); - outputNames.push_back(mc_filename); outputTypes["summary"].push_back(mc_filename); - randomForest.getMissclassifications(mc_filename, intToTreatmentMap, names); - // - - m->mothurOutEndLine(); - } - catch(exception& e) { - m->errorOut(e, "ClassifySharedCommand", "processSharedAndDesignData"); - exit(1); - } -} -//********************************************************************************************************************** - diff --git a/classifysharedcommand.h b/classifysharedcommand.h deleted file mode 100755 index 7a6907f..0000000 --- a/classifysharedcommand.h +++ /dev/null @@ -1,54 +0,0 @@ -// -// classifysharedcommand.h -// Mothur -// -// Created by Abu Zaher Md. Faridee on 8/13/12. -// Copyright (c) 2012 Schloss Lab. All rights reserved. -// - -#ifndef __Mothur__classifysharedcommand__ -#define __Mothur__classifysharedcommand__ - -#include "command.hpp" -#include "inputdata.h" - -class ClassifySharedCommand : public Command { -public: - ClassifySharedCommand(); - ClassifySharedCommand(string); - ~ClassifySharedCommand() {}; - - vector setParameters(); - string getCommandName() { return "classify.shared"; } - string getCommandCategory() { return "OTU-Based Approaches"; } - string getHelpString(); - string getOutputPattern(string); - string getCitation() { return "http://www.mothur.org/wiki/Classify.shared\n"; } - string getDescription() { return "implements the random forest machine learning algorithm to identify OTUs that can be used to differentiate between various groups of samples"; } - int execute(); - - void help() { m->mothurOut(getHelpString()); } - -private: - bool abort; - string outputDir; - vector outputNames, Groups; - - string sharedfile, designfile; - set labels; - bool allLines; - - int processors; - bool useTiming; - - GroupMap designMap; - - int numDecisionTrees; - string treeSplitCriterion, optimumFeatureSubsetSelectionCriteria; - bool doPruning, discardHighErrorTrees; - double pruneAggressiveness, highErrorTreeDiscardThreshold, featureStandardDeviationThreshold; - - void processSharedAndDesignData(vector lookup); -}; - -#endif /* defined(__Mothur__classifysharedcommand__) */ diff --git a/commandfactory.cpp b/commandfactory.cpp index 03c3e49..8ab0a95 100644 --- a/commandfactory.cpp +++ b/commandfactory.cpp @@ -306,7 +306,7 @@ CommandFactory::CommandFactory(){ commands["make.table"] = "make.table"; commands["sff.multiple"] = "sff.multiple"; commands["quit"] = "MPIEnabled"; - commands["classifyrf.shared"] = "classifyrf.shared"; + commands["classify.rf"] = "classify.rf"; commands["filter.shared"] = "filter.shared"; commands["primer.design"] = "primer.design"; commands["get.dists"] = "get.dists"; @@ -533,7 +533,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString){ else if(commandName == "make.contigs") { command = new MakeContigsCommand(optionString); } else if(commandName == "load.logfile") { command = new LoadLogfileCommand(optionString); } else if(commandName == "sff.multiple") { command = new SffMultipleCommand(optionString); } - else if(commandName == "classifyrf.shared") { command = new ClassifyRFSharedCommand(optionString); } + else if(commandName == "classify.rf") { command = new ClassifyRFSharedCommand(optionString); } else if(commandName == "filter.shared") { command = new FilterSharedCommand(optionString); } else if(commandName == "primer.design") { command = new PrimerDesignCommand(optionString); } else if(commandName == "get.dists") { command = new GetDistsCommand(optionString); } @@ -701,7 +701,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString, str else if(commandName == "make.contigs") { pipecommand = new MakeContigsCommand(optionString); } else if(commandName == "load.logfile") { pipecommand = new LoadLogfileCommand(optionString); } else if(commandName == "sff.multiple") { pipecommand = new SffMultipleCommand(optionString); } - else if(commandName == "classifyrf.shared") { pipecommand = new ClassifyRFSharedCommand(optionString); } + else if(commandName == "classify.rf") { pipecommand = new ClassifyRFSharedCommand(optionString); } else if(commandName == "filter.shared") { pipecommand = new FilterSharedCommand(optionString); } else if(commandName == "primer.design") { pipecommand = new PrimerDesignCommand(optionString); } else if(commandName == "get.dists") { pipecommand = new GetDistsCommand(optionString); } @@ -855,7 +855,7 @@ Command* CommandFactory::getCommand(string commandName){ else if(commandName == "make.contigs") { shellcommand = new MakeContigsCommand(); } else if(commandName == "load.logfile") { shellcommand = new LoadLogfileCommand(); } else if(commandName == "sff.multiple") { shellcommand = new SffMultipleCommand(); } - else if(commandName == "classifyrf.shared") { shellcommand = new ClassifyRFSharedCommand(); } + else if(commandName == "classify.rf") { shellcommand = new ClassifyRFSharedCommand(); } else if(commandName == "filter.shared") { shellcommand = new FilterSharedCommand(); } else if(commandName == "primer.design") { shellcommand = new PrimerDesignCommand(); } else if(commandName == "get.dists") { shellcommand = new GetDistsCommand(); } diff --git a/kruskalwalliscommand.cpp b/kruskalwalliscommand.cpp index 805d21e..777444b 100644 --- a/kruskalwalliscommand.cpp +++ b/kruskalwalliscommand.cpp @@ -15,7 +15,6 @@ vector KruskalWallisCommand::setParameters(){ CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","summary",false,true,true); parameters.push_back(pshared); CommandParameter pclass("class", "String", "", "", "", "", "","",false,false); parameters.push_back(pclass); CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel); - CommandParameter pclasses("classes", "String", "", "", "", "", "","",false,false); parameters.push_back(pclasses); //every command must have inputdir and outputdir. This allows mothur users to redirect input and output files. CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); @@ -34,9 +33,8 @@ string KruskalWallisCommand::getHelpString(){ try { string helpString = ""; helpString += "The kruskal.wallis command allows you to ....\n"; - helpString += "The kruskal.wallis command parameters are: shared, design, class, label and classes.\n"; + helpString += "The kruskal.wallis command parameters are: shared, design, class, label and classes.\n"; helpString += "The class parameter is used to indicate the which category you would like used for the Kruskal Wallis analysis. If none is provided first category is used.\n"; - helpString += "The classes parameter is used to indicate the classes you would like to use. Clases should be inputted in the following format: classes=label-label. For example to include groups from treatment with value early or late and age= young or old. class=treatment-age.\n"; helpString += "The label parameter is used to indicate which distances in the shared file you would like to use. labels are separated by dashes.\n"; helpString += "The kruskal.wallis command should be in the following format: kruskal.wallis(shared=final.an.shared, design=final.design, class=treatment).\n"; return helpString; @@ -159,9 +157,6 @@ KruskalWallisCommand::KruskalWallisCommand(string option) { mclass = validParameter.validFile(parameters, "class", false); if (mclass == "not found") { mclass = ""; } - classes = validParameter.validFile(parameters, "classes", false); - if (classes == "not found") { classes = ""; } - } } @@ -178,13 +173,6 @@ int KruskalWallisCommand::execute(){ if (abort == true) { if (calledHelp) { return 0; } return 2; } DesignMap designMap(designfile); - //if user set classes set groups=those classes - if (classes != "") { - map > thisClasses = m->parseClasses(classes); - vector groups = designMap.getNamesUnique(thisClasses); - if (groups.size() != 0) { m->setGroups(groups); } - else { m->mothurOut("[ERROR]: no groups meet your classes requirement, quitting.\n"); return 0; } - } //if user did not select class use first column if (mclass == "") { mclass = designMap.getDefaultClass(); m->mothurOut("\nYou did not provide a class, using " + mclass +".\n\n"); } diff --git a/kruskalwalliscommand.h b/kruskalwalliscommand.h index a82d208..73ef643 100644 --- a/kruskalwalliscommand.h +++ b/kruskalwalliscommand.h @@ -42,7 +42,7 @@ public: private: bool abort, allLines; - string outputDir, sharedfile, designfile, mclass, classes; + string outputDir, sharedfile, designfile, mclass; vector outputNames; set labels; diff --git a/lefsecommand.cpp b/lefsecommand.cpp index 3575e02..8d1768c 100644 --- a/lefsecommand.cpp +++ b/lefsecommand.cpp @@ -17,7 +17,7 @@ vector LefseCommand::setParameters(){ CommandParameter pclass("class", "String", "", "", "", "", "","",false,false); parameters.push_back(pclass); CommandParameter psubclass("subclass", "String", "", "", "", "", "","",false,false); parameters.push_back(psubclass); CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel); - CommandParameter pclasses("classes", "String", "", "", "", "", "","",false,false); parameters.push_back(pclasses); + //CommandParameter pclasses("classes", "String", "", "", "", "", "","",false,false); parameters.push_back(pclasses); CommandParameter palpha("aalpha", "Number", "", "0.05", "", "", "","",false,false); parameters.push_back(palpha); CommandParameter pwalpha("walpha", "Number", "", "0.05", "", "", "","",false,false); parameters.push_back(pwalpha); @@ -58,7 +58,7 @@ string LefseCommand::getHelpString(){ try { string helpString = ""; helpString += "The lefse command allows you to ....\n"; - helpString += "The lefse command parameters are: shared, design, class, subclass, label, classes, walpha, aalpha, lda, wilc, iters, curv, fboots, strict, minc, multiclass and norm.\n"; + helpString += "The lefse command parameters are: shared, design, class, subclass, label, walpha, aalpha, lda, wilc, iters, curv, fboots, strict, minc, multiclass and norm.\n"; helpString += "The class parameter is used to indicate the which category you would like used for the Kruskal Wallis analysis. If none is provided first category is used.\n"; helpString += "The subclass parameter is used to indicate the .....If none is provided, second category is used, or if only one category subclass is ignored. \n"; helpString += "The aalpha parameter is used to set the alpha value for the Krukal Wallis Anova test Default=0.05. \n"; @@ -67,13 +67,13 @@ string LefseCommand::getHelpString(){ helpString += "The wilc parameter is used to indicate whether to perform the Wilcoxon test. Default=T. \n"; helpString += "The iters parameter is used to set the number of bootstrap iteration for LDA. Default=30. \n"; //helpString += "The wilcsamename parameter is used to indicate whether perform the wilcoxon test only among the subclasses with the same name. Default=F. \n"; - helpString += "The curv parameter is used to set whether perform the wilcoxon test ing the Curtis's approach [BETA VERSION] Default=F. \n"; + helpString += "The curv parameter is used to set whether perform the wilcoxon testing the Curtis's approach [BETA VERSION] Default=F. \n"; helpString += "The norm parameter is used to multiply relative abundances by 1000000. Recommended when very low values are present. Default=T. \n"; helpString += "The fboots parameter is used to set the subsampling fraction value for each bootstrap iteration. Default=0.67. \n"; helpString += "The strict parameter is used to set the multiple testing correction options. 0 no correction (more strict, default), 1 correction for independent comparisons, 2 correction for independent comparison. Options = 0,1,2. Default=0. \n"; helpString += "The minc parameter is used to minimum number of samples per subclass for performing wilcoxon test. Default=10. \n"; helpString += "The multiclass parameter is used to (for multiclass tasks) set whether the test is performed in a one-against-one ( onevone - more strict!) or in a one-against-all setting ( onevall - less strict). Default=onevall. \n"; - helpString += "The classes parameter is used to indicate the classes you would like to use. Classes should be inputted in the following format: classes=label-label. For example to include groups from treatment with value early or late and age= young or old. class=treatment-age.\n"; + //helpString += "The classes parameter is used to indicate the classes you would like to use. Classes should be inputted in the following format: classes=label-label. For example to include groups from treatment with value early or late and age= young or old. class=treatment-age.\n"; helpString += "The label parameter is used to indicate which distances in the shared file you would like to use. labels are separated by dashes.\n"; helpString += "The lefse command should be in the following format: lefse(shared=final.an.shared, design=final.design, class=treatment, subclass=age).\n"; return helpString; @@ -199,9 +199,6 @@ LefseCommand::LefseCommand(string option) { subclass = validParameter.validFile(parameters, "subclass", false); if (subclass == "not found") { subclass = mclass; } - classes = validParameter.validFile(parameters, "classes", false); - if (classes == "not found") { classes = ""; } - string temp = validParameter.validFile(parameters, "aalpha", false); if (temp == "not found") { temp = "0.05"; } m->mothurConvert(temp, anovaAlpha); @@ -266,22 +263,16 @@ LefseCommand::LefseCommand(string option) { int LefseCommand::execute(){ try { + srand(1982); //for reading lefse formatted file and running in mothur for testing - pass number of rows used for design file if (false) { makeShared(1); exit(1); } if (abort == true) { if (calledHelp) { return 0; } return 2; } DesignMap designMap(designfile); - //if user set classes set groups=those classes - if (classes != "") { - map > thisClasses = m->parseClasses(classes); - vector groups = designMap.getNamesUnique(thisClasses); - if (groups.size() != 0) { m->setGroups(groups); } - else { m->mothurOut("[ERROR]: no groups meet your classes requirement, quitting.\n"); return 0; } - } //if user did not select class use first column - if (mclass == "") { mclass = designMap.getDefaultClass(); m->mothurOut("\nYou did not provide a class, using " + mclass +".\n\n"); } + if (mclass == "") { mclass = designMap.getDefaultClass(); m->mothurOut("\nYou did not provide a class, using " + mclass +".\n\n"); if (subclass == "") { subclass = mclass; } } InputData input(sharedfile, "sharedfile"); vector lookup = input.getSharedRAbundFloatVectors(); @@ -365,6 +356,7 @@ int LefseCommand::execute(){ m->mothurOut("Output File Names: "); m->mothurOutEndLine(); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } m->mothurOutEndLine(); + srand(time(NULL)); return 0; } @@ -383,8 +375,8 @@ int LefseCommand::process(vector& lookup, DesignMap& d map > class2SubClasses; //maps class name to vector of its subclasses map > subClass2GroupIndex; //maps subclass name to vector of indexes in lookup from that subclass. old -> 1,2,3 means groups in location 1,2,3 of lookup are from old. Saves time below. map > class2GroupIndex; //maps subclass name to vector of indexes in lookup from that class. old -> 1,2,3 means groups in location 1,2,3 of lookup are from old. Saves time below. + if (normMillion) { normalize(lookup); } for (int j = 0; j < lookup.size(); j++) { - if (normMillion) { normalize(lookup); } string group = lookup[j]->getGroup(); string treatment = designMap.get(group, mclass); //get value for this group in this category string thisSub = designMap.get(group, subclass); @@ -426,12 +418,16 @@ int LefseCommand::process(vector& lookup, DesignMap& d int numSigBeforeWilcox = significantOtuLabels.size(); + if (m->debug) { m->mothurOut("[DEBUG]: completed Kruskal Wallis\n"); } + //check for subclass string wilcoxString = ""; if ((subclass != "") && wilc) { significantOtuLabels = runWilcoxon(lookup, designMap, significantOtuLabels, class2SubClasses, subClass2GroupIndex, subclass2Class); wilcoxString += " ( " + toString(numSigBeforeWilcox) + " ) before internal wilcoxon"; } int numSigAfterWilcox = significantOtuLabels.size(); + if (m->debug) { m->mothurOut("[DEBUG]: completed Wilcoxon\n"); } + m->mothurOut("\nNumber of significantly discriminative features: " + toString(numSigAfterWilcox) + wilcoxString + ".\n"); map sigOTUSLDA; @@ -441,6 +437,8 @@ int LefseCommand::process(vector& lookup, DesignMap& d } else { m->mothurOut("No features with significant differences between the classes.\n"); } + if (m->debug) { m->mothurOut("[DEBUG]: completed lda\n"); } + printResults(means, significantOtuLabels, sigOTUSLDA, lookup[0]->getLabel(), classes); return 0; @@ -461,7 +459,9 @@ int LefseCommand::normalize(vector& lookup) { } for (int i = 0; i < lookup.size(); i++) { - for (int j = 0; j < lookup[i]->getNumBins(); j++) { lookup[i]->set(j, lookup[i]->getAbundance(j)*mul[i], lookup[i]->getGroup()); } + for (int j = 0; j < lookup[i]->getNumBins(); j++) { + lookup[i]->set(j, lookup[i]->getAbundance(j)*mul[i], lookup[i]->getGroup()); + } } return 0; @@ -713,15 +713,22 @@ map LefseCommand::testLDA(vector& lookup, for (int i = 0; i < numBins; i++) { if (m->control_pressed) { break; } + if (m->debug) { m->mothurOut("[DEBUG]: bin = " + toString(i) + "\n."); } + it = bins.find(i); if (it != bins.end()) { //flagged in Kruskal Wallis and Wilcoxon(if we ran it) + if (m->debug) { m->mothurOut("[DEBUG]:flagged bin = " + toString(i) + "\n."); } + //fill x with this OTUs abundances vector x; for (int j = 0; j < lookup.size(); j++) { x.push_back(lookup[j]->getAbundance(i)); } //go through classes for (map >::iterator it = class2GroupIndex.begin(); it != class2GroupIndex.end(); it++) { + + if (m->debug) { m->mothurOut("[DEBUG]: class = " + it->first + "\n."); } + //max(float(feats['class'].count(c))*0.5,4) //max(numGroups in this class*0.5, 4.0) double necessaryNum = ((double)((it->second).size())*0.5); @@ -760,10 +767,14 @@ map LefseCommand::testLDA(vector& lookup, minCl = (int)((float)(minCl*fBoots*fBoots*0.05)); minCl = max(minCl, 1); + if (m->debug) { m->mothurOut("[DEBUG]: about to start iters. \n."); } + vector< vector< vector > > results;//[iters][numComparison][numOTUs] for (int j = 0; j < iters; j++) { if (m->control_pressed) { return sigOTUS; } + if (m->debug) { m->mothurOut("[DEBUG]: iter = " + toString(j) + "\n."); } + //find "good" random vector vector rand_s; for (int h = 0; h < 1000; h++) { //generate a vector of length fractionNumGroups with range 0 to numGroups-1 diff --git a/lefsecommand.h b/lefsecommand.h index 1c5ac6f..cb1bbb6 100644 --- a/lefsecommand.h +++ b/lefsecommand.h @@ -41,7 +41,7 @@ public: string getOutputPattern(string); string getHelpString(); - string getCitation() { return "http://www.mothur.org/wiki/Lefse"; } + string getCitation() { return "Segata, N., J. Izard, L. Waldron, D. Gevers, L. Miropolsky, W. S. Garrett, and C. Huttenhower. 2011. Metagenomic biomarker discovery and explanation. Genome Biol 12:R60, http://www.mothur.org/wiki/Lefse"; } string getDescription() { return "brief description"; } int execute(); @@ -49,7 +49,7 @@ public: private: bool abort, allLines, wilc, wilcsamename, curv, subject, normMillion; - string outputDir, sharedfile, designfile, mclass, subclass, classes, rankTec, multiClassStrat; + string outputDir, sharedfile, designfile, mclass, subclass, rankTec, multiClassStrat; vector outputNames; set labels; double anovaAlpha, wilcoxonAlpha, fBoots, ldaThreshold; diff --git a/linearalgebra.cpp b/linearalgebra.cpp index fdd95ec..b208aab 100644 --- a/linearalgebra.cpp +++ b/linearalgebra.cpp @@ -10,6 +10,8 @@ #include "linearalgebra.h" #include "wilcox.h" +#define PI 3.1415926535897932384626433832795 + // This class references functions used from "Numerical Recipes in C++" // /*********************************************************************************************************************************/ @@ -1320,24 +1322,14 @@ double LinearAlgebra::calcKruskalWallis(vector& values, double& pV } } /*********************************************************************************************************************************/ -//python random.normalvariate - thanks http://madssj.com/blog/2008/05/07/porting-normalvariate-from-python-to-c/ -double LinearAlgebra::normalvariate(double mu, double sigma) { +double LinearAlgebra::normalvariate(double mean, double standardDeviation) { try { - double NV_MAGICCONST = 1.7155277699214135; /* (4 * exp(-0.5) / sqrt(2.0)); */ - unsigned long int MAX_RANDOM = 2147483647; /* (2 ** 31) - 1; */ - - double u1, u2, z, zz; - for (;;) { - if (m->control_pressed) { break; } - u1 = ((float)random()) / MAX_RANDOM; - u2 = 1.0 - (((float)random()) / MAX_RANDOM); - z = NV_MAGICCONST * (u1 - 0.5) / u2; - zz = z * z / 4.0; - if (zz <= -(log(u2))) { - break; - } - } - return mu + z * sigma; + double u1 = ((double)(rand()) + 1.0 )/( (double)(RAND_MAX) + 1.0); + double u2 = ((double)(rand()) + 1.0 )/( (double)(RAND_MAX) + 1.0); + //double r = sqrt( -2.0*log(u1) ); + //double theta = 2.0*PI*u2; + //cout << cos(8.*atan(1.)*u2)*sqrt(-2.*log(u1)) << endl; + return cos(8.*atan(1.)*u2)*sqrt(-2.*log(u1)); } catch(exception& e) { m->errorOut(e, "LinearAlgebra", "normalvariate"); @@ -1367,6 +1359,7 @@ double LinearAlgebra::pnorm(double x){ double y = 1.0 - (((((a5*t + a4)*t) + a3)*t + a2)*t + a1)*t*exp(-x*x); return 0.5*(1.0 + sign*y); + } catch(exception& e) { m->errorOut(e, "LinearAlgebra", "pnorm"); diff --git a/makebiomcommand.cpp b/makebiomcommand.cpp index a101883..7cf8295 100644 --- a/makebiomcommand.cpp +++ b/makebiomcommand.cpp @@ -93,11 +93,13 @@ vector MakeBiomCommand::setParameters(){ try { CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","biom",false,true,true); parameters.push_back(pshared); - CommandParameter pcontaxonomy("contaxonomy", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(pcontaxonomy); + CommandParameter pcontaxonomy("constaxonomy", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(pcontaxonomy); + //CommandParameter preference("referencetax", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(preference); CommandParameter pmetadata("metadata", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(pmetadata); CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups); CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel); - CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); + //CommandParameter ppicrust("picrust", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(ppicrust); + CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); CommandParameter pmatrixtype("matrixtype", "Multiple", "sparse-dense", "sparse", "", "", "","",false,false); parameters.push_back(pmatrixtype); @@ -114,12 +116,14 @@ vector MakeBiomCommand::setParameters(){ string MakeBiomCommand::getHelpString(){ try { string helpString = ""; - helpString += "The make.biom command parameters are shared, contaxonomy, metadata, groups, matrixtype and label. shared is required, unless you have a valid current file.\n"; + helpString += "The make.biom command parameters are shared, contaxonomy, metadata, groups, matrixtype and label. shared is required, unless you have a valid current file.\n"; //, picrust and referencetax helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like included. The group names are separated by dashes.\n"; helpString += "The label parameter allows you to select what distance levels you would like, and are also separated by dashes.\n"; helpString += "The matrixtype parameter allows you to select what type you would like to make. Choices are sparse and dense, default is sparse.\n"; helpString += "The contaxonomy file is the taxonomy file outputted by classify.otu(list=yourListfile, taxonomy=yourTaxonomyFile). Be SURE that the you are the constaxonomy file distance matches the shared file distance. ie, for *.0.03.cons.taxonomy set label=0.03. Mothur is smart enough to handle shared files that have been subsampled. It is used to assign taxonomy information to the metadata of rows.\n"; helpString += "The metadata parameter is used to provide experimental parameters to the columns. Things like 'sample1 gut human_gut'. \n"; + //helpString += "The picrust parameter is used to indicate the biom file is for input to picrust. NOTE: Picrust requires a greengenes taxonomy. \n"; + //helpString += "The referencetax parameter is used with the picrust parameter. Picrust requires the name of the reference taxonomy sequence to be in the biom file. \n"; helpString += "The make.biom command should be in the following format: make.biom(shared=yourShared, groups=yourGroups, label=yourLabels).\n"; helpString += "Example make.biom(shared=abrecovery.an.shared, groups=A-B-C).\n"; helpString += "The default value for groups is all the groups in your groupfile, and all labels in your inputfile will be used.\n"; @@ -202,12 +206,20 @@ MakeBiomCommand::MakeBiomCommand(string option) { if (path == "") { parameters["shared"] = inputDir + it->second; } } - it = parameters.find("contaxonomy"); + it = parameters.find("constaxonomy"); //user has given a template file if(it != parameters.end()){ path = m->hasPath(it->second); //if the user has not given a path then, add inputdir. else leave path alone. - if (path == "") { parameters["contaxonomy"] = inputDir + it->second; } + if (path == "") { parameters["constaxonomy"] = inputDir + it->second; } + } + + it = parameters.find("referencetax"); + //user has given a template file + if(it != parameters.end()){ + path = m->hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["referencetax"] = inputDir + it->second; } } it = parameters.find("metadata"); @@ -233,9 +245,13 @@ MakeBiomCommand::MakeBiomCommand(string option) { //if the user changes the output directory command factory will send this info to us in the output parameter outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = m->hasPath(sharedfile); } - contaxonomyfile = validParameter.validFile(parameters, "contaxonomy", true); + contaxonomyfile = validParameter.validFile(parameters, "constaxonomy", true); if (contaxonomyfile == "not found") { contaxonomyfile = ""; } else if (contaxonomyfile == "not open") { contaxonomyfile = ""; abort = true; } + + //referenceTax = validParameter.validFile(parameters, "referencetax", true); + //if (referenceTax == "not found") { referenceTax = ""; } + //else if (referenceTax == "not open") { referenceTax = ""; abort = true; } metadatafile = validParameter.validFile(parameters, "metadata", true); if (metadatafile == "not found") { metadatafile = ""; } @@ -249,6 +265,13 @@ MakeBiomCommand::MakeBiomCommand(string option) { if(label != "all") { m->splitAtDash(label, labels); allLines = 0; } else { allLines = 1; } } + + //string temp = validParameter.validFile(parameters, "picrust", false); if (temp == "not found"){ temp = "f"; } + //picrust = m->isTrue(temp); + //if (picrust && ((contaxonomyfile == "") || (referenceTax == ""))) { + //m->mothurOut("[ERROR]: the picrust parameter requires a consensus taxonomy with greengenes taxonomy the reference."); m->mothurOutEndLine(); abort = true; + //} + picrust=false; groups = validParameter.validFile(parameters, "groups", false); if (groups == "not found") { groups = ""; } @@ -406,7 +429,8 @@ int MakeBiomCommand::getBiom(vector& lookup){ out << spaces + "\"type\": \"OTU table\",\n" + spaces + "\"generated_by\": \"" << mothurString << "\",\n" + spaces + "\"date\": \"" << dateString << "\",\n"; int numBins = lookup[0]->getNumBins(); - vector metadata = getMetaData(lookup); + vector picrustLabels; + vector metadata = getMetaData(lookup, picrustLabels); if (m->control_pressed) { out.close(); return 0; } @@ -423,10 +447,11 @@ int MakeBiomCommand::getBiom(vector& lookup){ string rowBack = "\", \"metadata\":"; for (int i = 0; i < numBins-1; i++) { if (m->control_pressed) { out.close(); return 0; } - out << rowFront << m->currentBinLabels[i] << rowBack << metadata[i] << "},\n"; + if (!picrust) { out << rowFront << m->currentBinLabels[i] << rowBack << metadata[i] << "},\n"; } + else { out << rowFront << picrustLabels[i] << rowBack << metadata[i] << "},\n"; } } - out << rowFront << m->currentBinLabels[(numBins-1)] << rowBack << metadata[(numBins-1)] << "}\n" + spaces + "],\n"; - + if (!picrust) { out << rowFront << m->currentBinLabels[(numBins-1)] << rowBack << metadata[(numBins-1)] << "}\n" + spaces + "],\n"; } + else { out << rowFront << picrustLabels[(numBins-1)] << rowBack << metadata[(numBins-1)] << "}\n" + spaces + "],\n"; } //get column info /*"columns": [ {"id":"Sample1", "metadata":null}, @@ -446,7 +471,7 @@ int MakeBiomCommand::getBiom(vector& lookup){ out << rowFront << lookup[(lookup.size()-1)]->getGroup() << colBack << sampleMetadata[lookup.size()-1] << "}\n" + spaces + "],\n"; out << spaces + "\"matrix_type\": \"" << format << "\",\n" + spaces + "\"matrix_element_type\": \"int\",\n"; - out << spaces + "\"shape\": [" << m->currentBinLabels.size() << "," << lookup.size() << "],\n"; + out << spaces + "\"shape\": [" << numBins << "," << lookup.size() << "],\n"; out << spaces + "\"data\": ["; vector dataRows; @@ -518,7 +543,7 @@ int MakeBiomCommand::getBiom(vector& lookup){ } } //********************************************************************************************************************** -vector MakeBiomCommand::getMetaData(vector& lookup){ +vector MakeBiomCommand::getMetaData(vector& lookup, vector& picrustLabels){ try { vector metadata; @@ -581,7 +606,7 @@ vector MakeBiomCommand::getMetaData(vector& lookup) //traverse the binLabels forming the metadata strings and saving them //make sure to sanity check map::iterator it; - for (int i = 0; i < m->currentBinLabels.size(); i++) { + for (int i = 0; i < lookup[0]->getNumBins(); i++) { if (m->control_pressed) { return metadata; } @@ -589,6 +614,10 @@ vector MakeBiomCommand::getMetaData(vector& lookup) if (it == labelTaxMap.end()) { m->mothurOut("[ERROR]: can't find taxonomy information for " + m->currentBinLabels[i] + ".\n"); m->control_pressed = true; } else { + if (picrust) { + string temp = it->second; m->removeConfidences(temp); + picrustLabels.push_back(temp); + } vector bootstrapValues; string data = "{\"taxonomy\":["; diff --git a/makebiomcommand.h b/makebiomcommand.h index 7ea381f..1ff3985 100644 --- a/makebiomcommand.h +++ b/makebiomcommand.h @@ -36,14 +36,14 @@ public: private: - string sharedfile, contaxonomyfile, metadatafile, groups, outputDir, format, label; + string sharedfile, contaxonomyfile, metadatafile, groups, outputDir, format, label, referenceTax; vector outputNames, Groups, sampleMetadata; set labels; - bool abort, allLines; + bool abort, allLines, picrust; int getBiom(vector&); - vector getMetaData(vector&); + vector getMetaData(vector&, vector&); vector parseTax(string tax, vector& scores); int getSampleMetaData(vector&); }; diff --git a/makefile b/makefile index 04de533..7fc8608 100644 --- a/makefile +++ b/makefile @@ -15,8 +15,8 @@ USEREADLINE ?= yes CYGWIN_BUILD ?= no USECOMPRESSION ?= no MOTHUR_FILES="\"Enter_your_default_path_here\"" -RELEASE_DATE = "\"8/19/2013\"" -VERSION = "\"1.31.2\"" +RELEASE_DATE = "\"10/01/2013\"" +VERSION = "\"1.32.0\"" FORTAN_COMPILER = gfortran FORTRAN_FLAGS = diff --git a/makelefsecommand.cpp b/makelefsecommand.cpp index 9f895f6..73f9db2 100644 --- a/makelefsecommand.cpp +++ b/makelefsecommand.cpp @@ -43,8 +43,8 @@ string MakeLefseCommand::getHelpString(){ helpString += "The constaxonomy parameter is used to input your taxonomy file. http://www.wiki.mothur.org/wiki/Constaxonomy_file. The contaxonomy file is the taxonomy file outputted by classify.otu(list=yourListfile, taxonomy=yourTaxonomyFile). Be SURE that the you are the constaxonomy file distance matches the shared file distance. ie, for *.0.03.cons.taxonomy set label=0.03. Mothur is smart enough to handle shared files that have been subsampled. \n"; helpString += "The scale parameter allows you to select what scale you would like to use to convert your shared file abundances to relative abundances. Choices are totalgroup, totalotu, averagegroup, averageotu, default is totalgroup.\n"; helpString += "The label parameter allows you to select what distance level you would like used, if none is given the first distance is used.\n"; - helpString += "The make.lefse command should be in the following format: make.lefse(list=yourListFile, taxonomy=outputFromClassify.seqsCommand, name=yourNameFile)\n"; - helpString += "make.lefse(shared=final.an.list, taxonomy=final.an.taxonomy, name=final.names)\n"; + helpString += "The make.lefse command should be in the following format: make.lefse(shared=yourSharedFile)\n"; + helpString += "make.lefse(shared=final.an.shared)\n"; return helpString; } catch(exception& e) { diff --git a/renameseqscommand.cpp b/renameseqscommand.cpp index 449454b..27999d0 100644 --- a/renameseqscommand.cpp +++ b/renameseqscommand.cpp @@ -15,7 +15,7 @@ vector RenameSeqsCommand::setParameters(){ try { CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","fasta",false,true,true); parameters.push_back(pfasta); CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none","name",false,false,true); parameters.push_back(pname); - CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","group",false,false,true); parameters.push_back(pgroup); + CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","group",false,true,true); parameters.push_back(pgroup); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); diff --git a/screenseqscommand.cpp b/screenseqscommand.cpp index e7b2c6a..e40c412 100644 --- a/screenseqscommand.cpp +++ b/screenseqscommand.cpp @@ -2177,19 +2177,18 @@ int ScreenSeqsCommand::screenCountFile(map badSeqNames){ if (m->control_pressed) { goodCountOut.close(); in.close(); m->mothurRemove(goodCountFile); return 0; } in >> name; m->gobble(in); - in >> thisTotal; m->gobble(in); + in >> thisTotal; rest = m->getline(in); m->gobble(in); it = badSeqNames.find(name); if(it != badSeqNames.end()){ - badSeqNames.erase(it); + badSeqNames.erase(it); } else{ goodCountOut << name << '\t' << thisTotal << '\t' << rest << endl; } } - if (m->control_pressed) { goodCountOut.close(); in.close(); m->mothurRemove(goodCountFile); return 0; } //we were unable to remove some of the bad sequences diff --git a/setlogfilecommand.cpp b/setlogfilecommand.cpp index 41c8e74..329fd1f 100644 --- a/setlogfilecommand.cpp +++ b/setlogfilecommand.cpp @@ -91,7 +91,7 @@ int SetLogFileCommand::execute(){ if (directory == "") { commandFactory->setLogfileName(name, append); }else if (m->dirCheck(directory)) { - commandFactory->setLogfileName(name, append); + commandFactory->setLogfileName(name, append); } return 0; diff --git a/sharedjest.cpp b/sharedjest.cpp index 9a1cfbb..237821a 100644 --- a/sharedjest.cpp +++ b/sharedjest.cpp @@ -34,12 +34,18 @@ EstOutput Jest::getValues(vector shared) { *chaoS1Sabund = shared[0]->getSAbundVector(); *chaoS2Sabund = shared[1]->getSAbundVector(); + + //chaoS1Sabund->print(cout); + //chaoS2Sabund->print(cout); S12 = sharedChao->getValues(shared); S1 = chaoS1->getValues(chaoS1Sabund); S2 = chaoS2->getValues(chaoS2Sabund); + + //cout << S12[0] << '\t' << S1[0] << '\t' << S2[0] << endl; data[0] = 1.0 - S12[0] / (float)(S1[0] + S2[0] - S12[0]); + //cout << data[0] << endl; if (isnan(data[0]) || isinf(data[0])) { data[0] = 0; } -- 2.39.2