]> git.donarmstrong.com Git - mothur.git/commitdiff
changes while testing
authorSarah Westcott <mothur.westcott@gmail.com>
Wed, 2 Oct 2013 11:23:27 +0000 (07:23 -0400)
committerSarah Westcott <mothur.westcott@gmail.com>
Wed, 2 Oct 2013 11:23:27 +0000 (07:23 -0400)
20 files changed:
Mothur.xcodeproj/project.pbxproj
chao1.cpp
classifyrfsharedcommand.cpp
classifyrfsharedcommand.h
classifysharedcommand.cpp [deleted file]
classifysharedcommand.h [deleted file]
commandfactory.cpp
kruskalwalliscommand.cpp
kruskalwalliscommand.h
lefsecommand.cpp
lefsecommand.h
linearalgebra.cpp
makebiomcommand.cpp
makebiomcommand.h
makefile
makelefsecommand.cpp
renameseqscommand.cpp
screenseqscommand.cpp
setlogfilecommand.cpp
sharedjest.cpp

index 8304f072147391d834a1a82f002ee60a697c04fe..4fb82f5e8acacfb590ac2ac4b44620b70566ad87 100644 (file)
@@ -31,7 +31,6 @@
                A721AB77161C573B009860A1 /* taxonomynode.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A721AB73161C573B009860A1 /* taxonomynode.cpp */; };
                A724D2B7153C8628000A826F /* makebiomcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A724D2B6153C8628000A826F /* makebiomcommand.cpp */; };
                A727864412E9E28C00F86ABA /* removerarecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A727864312E9E28C00F86ABA /* removerarecommand.cpp */; };
-               A7386C231619CCE600651424 /* classifysharedcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7386C211619CCE600651424 /* classifysharedcommand.cpp */; };
                A7386C251619E52300651424 /* abstractdecisiontree.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7386C241619E52200651424 /* abstractdecisiontree.cpp */; };
                A7386C29161A110800651424 /* decisiontree.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7386C28161A110700651424 /* decisiontree.cpp */; };
                A73901081588C40900ED2ED6 /* loadlogfilecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A73901071588C40900ED2ED6 /* loadlogfilecommand.cpp */; };
                A7E9B98E12D37EC400DA6239 /* weightedlinkage.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B87E12D37EC400DA6239 /* weightedlinkage.cpp */; };
                A7E9B98F12D37EC400DA6239 /* whittaker.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B87F12D37EC400DA6239 /* whittaker.cpp */; };
                A7EEB0F514F29BFE00344B83 /* classifytreecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7EEB0F414F29BFD00344B83 /* classifytreecommand.cpp */; };
+               A7F24FC317EA36600021DC9A /* classifyrfsharedcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7F24FC117EA365F0021DC9A /* classifyrfsharedcommand.cpp */; };
                A7F9F5CF141A5E500032F693 /* sequenceparser.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7F9F5CE141A5E500032F693 /* sequenceparser.cpp */; };
                A7FA10021302E097003860FE /* mantelcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7FA10011302E096003860FE /* mantelcommand.cpp */; };
                A7FA2AC714A0E881007C09A6 /* bsplvb.f in Sources */ = {isa = PBXBuildFile; fileRef = A7FA2ABC14A0E881007C09A6 /* bsplvb.f */; };
                A7386C1E1619CACB00651424 /* macros.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = macros.h; sourceTree = "<group>"; };
                A7386C1F1619CACB00651424 /* randomforest.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = randomforest.hpp; sourceTree = "<group>"; };
                A7386C201619CACB00651424 /* rftreenode.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = rftreenode.hpp; sourceTree = "<group>"; };
-               A7386C211619CCE600651424 /* classifysharedcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = classifysharedcommand.cpp; sourceTree = "<group>"; };
-               A7386C221619CCE600651424 /* classifysharedcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = classifysharedcommand.h; sourceTree = "<group>"; };
                A7386C241619E52200651424 /* abstractdecisiontree.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = abstractdecisiontree.cpp; sourceTree = "<group>"; };
                A7386C28161A110700651424 /* decisiontree.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = decisiontree.cpp; sourceTree = "<group>"; };
                A73901051588C3EF00ED2ED6 /* loadlogfilecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = loadlogfilecommand.h; sourceTree = "<group>"; };
                A7E9B88012D37EC400DA6239 /* whittaker.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = whittaker.h; sourceTree = "<group>"; };
                A7EEB0F414F29BFD00344B83 /* classifytreecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = classifytreecommand.cpp; sourceTree = "<group>"; };
                A7EEB0F714F29C1B00344B83 /* classifytreecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = classifytreecommand.h; sourceTree = "<group>"; };
+               A7F24FC117EA365F0021DC9A /* classifyrfsharedcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = classifyrfsharedcommand.cpp; sourceTree = "<group>"; };
+               A7F24FC217EA365F0021DC9A /* classifyrfsharedcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = classifyrfsharedcommand.h; sourceTree = "<group>"; };
                A7F9F5CD141A5E500032F693 /* sequenceparser.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = sequenceparser.h; sourceTree = "<group>"; };
                A7F9F5CE141A5E500032F693 /* sequenceparser.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = sequenceparser.cpp; sourceTree = "<group>"; };
                A7FA10001302E096003860FE /* mantelcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = mantelcommand.h; sourceTree = "<group>"; };
                                A7E9B69012D37EC400DA6239 /* classifyotucommand.cpp */,
                                A7E9B69312D37EC400DA6239 /* classifyseqscommand.h */,
                                A7E9B69212D37EC400DA6239 /* classifyseqscommand.cpp */,
-                               A7386C221619CCE600651424 /* classifysharedcommand.h */,
-                               A7386C211619CCE600651424 /* classifysharedcommand.cpp */,
+                               A7F24FC217EA365F0021DC9A /* classifyrfsharedcommand.h */,
+                               A7F24FC117EA365F0021DC9A /* classifyrfsharedcommand.cpp */,
                                A7EEB0F714F29C1B00344B83 /* classifytreecommand.h */,
                                A7EEB0F414F29BFD00344B83 /* classifytreecommand.cpp */,
                                A7E9B69712D37EC400DA6239 /* clearcutcommand.h */,
                                A7E0243D15B4520A00A5F046 /* sparsedistancematrix.cpp in Sources */,
                                A741FAD215D1688E0067BCC5 /* sequencecountparser.cpp in Sources */,
                                A7C7DAB915DA758B0059B0CF /* sffmultiplecommand.cpp in Sources */,
-                               A7386C231619CCE600651424 /* classifysharedcommand.cpp in Sources */,
                                A7386C251619E52300651424 /* abstractdecisiontree.cpp in Sources */,
                                A7386C29161A110800651424 /* decisiontree.cpp in Sources */,
                                A77E1938161B201E00DB1A2A /* randomforest.cpp in Sources */,
                                A7190B221768E0DF00A9AFA6 /* lefsecommand.cpp in Sources */,
                                A77916E8176F7F7600EEFE18 /* designmap.cpp in Sources */,
                                A7D9378A17B146B5001E90B0 /* wilcox.cpp in Sources */,
+                               A7F24FC317EA36600021DC9A /* classifyrfsharedcommand.cpp in Sources */,
                        );
                        runOnlyForDeploymentPostprocessing = 0;
                };
                                GCC_MODEL_TUNING = "";
                                GCC_OPTIMIZATION_LEVEL = s;
                                GCC_PREPROCESSOR_DEFINITIONS = (
-                                       "VERSION=\"\\\"1.31.0\\\"\"",
-                                       "RELEASE_DATE=\"\\\"5/24/2013\\\"\"",
+                                       "VERSION=\"\\\"1.32.0\\\"\"",
+                                       "RELEASE_DATE=\"\\\"10/01/2013\\\"\"",
                                );
                                GCC_VERSION = "";
                                GCC_WARN_ABOUT_MISSING_NEWLINE = YES;
index ad1faa242b81c1520d29fb6395fa5f5825d179ec..d7ec7ea7015da078452562bd526d56944d590560 100644 (file)
--- a/chao1.cpp
+++ b/chao1.cpp
@@ -29,7 +29,7 @@ EstOutput Chao1::getValues(SAbundVector* rank){
                }else{ doubles = 0.0;  }
 
                double chaovar = 0.0000;
-//cout << "singles = " << singles << " doubles = " << doubles << " sobs = " << sobs << endl;   
+//cout << "singles = " << singles << " doubles = " << doubles << " sobs = " << sobs << endl;
                double chao = sobs + singles*(singles-1)/(2*(doubles+1));
        
                if(singles > 0 && doubles > 0){
index d2cd9f9d38fd9ded1d04775ff594c89a0dc42cec..efee5e3265d78c726a2174ac778889f769c65f25 100755 (executable)
@@ -47,12 +47,12 @@ vector<string> ClassifyRFSharedCommand::setParameters(){
 string ClassifyRFSharedCommand::getHelpString(){       
        try {
                string helpString = "";
-               helpString += "The classify.shared command allows you to ....\n";
-               helpString += "The classify.shared command parameters are: shared, design, label, groups, otupersplit.\n";
+               helpString += "The classify.rf command allows you to ....\n";
+               helpString += "The classify.rf command parameters are: shared, design, label, groups, otupersplit.\n";
         helpString += "The label parameter is used to analyze specific labels in your input.\n";
                helpString += "The groups parameter allows you to specify which of the groups in your designfile you would like analyzed.\n";
-               helpString += "The classify.shared should be in the following format: \n";
-               helpString += "classify.shared(shared=yourSharedFile, design=yourDesignFile)\n";
+               helpString += "The classify.rf should be in the following format: \n";
+               helpString += "classify.rf(shared=yourSharedFile, design=yourDesignFile)\n";
                return helpString;
        }
        catch(exception& e) {
index 6a948b2fc0d41d91e9a26898d2aa55687fd050b8..7b6ca132f6bacd2d93e0c3aa89669954089b9fce 100755 (executable)
@@ -19,11 +19,11 @@ public:
   ~ClassifyRFSharedCommand() {};
   
   vector<string> setParameters();
-  string getCommandName()                      { return "classifyrf.shared";     }
+  string getCommandName()                      { return "classify.rf";     }
   string getCommandCategory()          { return "OTU-Based Approaches";                }  
   string getHelpString();      
   string getOutputPattern(string);
-  string getCitation() { return "http://www.mothur.org/wiki/Classifyrf.shared\n"; }
+  string getCitation() { return "http://www.mothur.org/wiki/Classify.rf\n"; }
   string getDescription()              { return "implements the random forest machine learning algorithm to identify OTUs that can be used to differentiate between various groups of samples"; }
   int execute();
   
diff --git a/classifysharedcommand.cpp b/classifysharedcommand.cpp
deleted file mode 100755 (executable)
index c7eb6cd..0000000
+++ /dev/null
@@ -1,407 +0,0 @@
-//
-//  classifysharedcommand.cpp
-//  Mothur
-//
-//  Created by Abu Zaher Md. Faridee on 8/13/12.
-//  Copyright (c) 2012 Schloss Lab. All rights reserved.
-//
-
-#include "classifysharedcommand.h"
-#include "randomforest.hpp"
-#include "decisiontree.hpp"
-#include "rftreenode.hpp"
-
-//**********************************************************************************************************************
-vector<string> ClassifySharedCommand::setParameters(){ 
-       try {
-               //CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);        
-        CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","summary",false,true,true); parameters.push_back(pshared);             
-        CommandParameter pdesign("design", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(pdesign);    
-        CommandParameter potupersplit("otupersplit", "Multiple", "log2-squareroot", "log2", "", "", "","",false,false); parameters.push_back(potupersplit);
-        CommandParameter psplitcriteria("splitcriteria", "Multiple", "gainratio-infogain", "gainratio", "", "", "","",false,false); parameters.push_back(psplitcriteria);
-               CommandParameter pnumtrees("numtrees", "Number", "", "100", "", "", "","",false,false); parameters.push_back(pnumtrees);
-        
-            // parameters related to pruning
-        CommandParameter pdopruning("prune", "Boolean", "", "T", "", "", "", "", false, false); parameters.push_back(pdopruning);
-        CommandParameter ppruneaggrns("pruneaggressiveness", "Number", "", "0.9", "", "", "", "", false, false); parameters.push_back(ppruneaggrns);
-        CommandParameter pdiscardhetrees("discarderrortrees", "Boolean", "", "T", "", "", "", "", false, false); parameters.push_back(pdiscardhetrees);
-        CommandParameter phetdiscardthreshold("errorthreshold", "Number", "", "0.4", "", "", "", "", false, false); parameters.push_back(phetdiscardthreshold);
-        CommandParameter psdthreshold("stdthreshold", "Number", "", "0.0", "", "", "", "", false, false); parameters.push_back(psdthreshold);
-            // pruning params end
-
-        CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
-               CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
-               CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
-               CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
-               
-               vector<string> myArray;
-               for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
-               return myArray;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "ClassifySharedCommand", "setParameters");
-               exit(1);
-       }
-}
-//**********************************************************************************************************************
-string ClassifySharedCommand::getHelpString(){ 
-       try {
-               string helpString = "";
-               helpString += "The classify.shared command allows you to ....\n";
-               helpString += "The classify.shared command parameters are: shared, design, label, groups, otupersplit.\n";
-        helpString += "The label parameter is used to analyze specific labels in your input.\n";
-               helpString += "The groups parameter allows you to specify which of the groups in your designfile you would like analyzed.\n";
-               helpString += "The classify.shared should be in the following format: \n";
-               helpString += "classify.shared(shared=yourSharedFile, design=yourDesignFile)\n";
-               return helpString;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "ClassifySharedCommand", "getHelpString");
-               exit(1);
-       }
-}
-//**********************************************************************************************************************
-string ClassifySharedCommand::getOutputPattern(string type) {
-    try {
-        string pattern = "";
-        
-        if (type == "summary") {  pattern = "[filename],[distance],summary"; } //makes file like: amazon.0.03.fasta
-        else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
-        
-        return pattern;
-    }
-    catch(exception& e) {
-        m->errorOut(e, "ClassifySharedCommand", "getOutputPattern");
-        exit(1);
-    }
-}
-//**********************************************************************************************************************
-
-ClassifySharedCommand::ClassifySharedCommand() {
-  try {
-    abort = true; calledHelp = true;
-    setParameters();
-    vector<string> tempOutNames;
-    outputTypes["summary"] = tempOutNames; 
-  }
-  catch(exception& e) {
-    m->errorOut(e, "ClassifySharedCommand", "ClassifySharedCommand");
-    exit(1);
-  }
-}
-
-//**********************************************************************************************************************
-ClassifySharedCommand::ClassifySharedCommand(string option) {
-  try {
-    abort = false; calledHelp = false;
-    allLines = 1;
-      
-      //allow user to run help
-    if(option == "help") { help(); abort = true; calledHelp = true; }
-    else if(option == "citation") { citation(); abort = true; calledHelp = true;}
-    
-    else {
-        //valid paramters for this command
-      vector<string> myArray = setParameters();
-      
-      OptionParser parser(option);
-      map<string,string> parameters = parser.getParameters();
-      
-      ValidParameters validParameter;
-      map<string,string>::iterator it;
-        //check to make sure all parameters are valid for command
-      for (it = parameters.begin(); it != parameters.end(); it++) {
-        if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
-      }
-        vector<string> tempOutNames;
-        outputTypes["summary"] = tempOutNames;
-      
-        //if the user changes the input directory command factory will send this info to us in the output parameter
-      string inputDir = validParameter.validFile(parameters, "inputdir", false);
-      if (inputDir == "not found"){    inputDir = "";          }
-      else {
-        string path;
-        it = parameters.find("shared");
-          //user has given a shared file
-        if(it != parameters.end()){
-          path = m->hasPath(it->second);
-            //if the user has not given a path then, add inputdir. else leave path alone.
-          if (path == "") {    parameters["shared"] = inputDir + it->second;           }
-        }
-        
-        it = parameters.find("design");
-          //user has given a design file
-        if(it != parameters.end()){
-          path = m->hasPath(it->second);
-            //if the user has not given a path then, add inputdir. else leave path alone.
-          if (path == "") {    parameters["design"] = inputDir + it->second;           }
-        }
-        
-      }
-        //check for parameters
-        //get shared file, it is required
-      sharedfile = validParameter.validFile(parameters, "shared", true);
-      if (sharedfile == "not open") { sharedfile = ""; abort = true; }
-      else if (sharedfile == "not found") {
-          //if there is a current shared file, use it
-        sharedfile = m->getSharedFile();
-        if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
-        else {         m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; }
-      }else { m->setSharedFile(sharedfile); }
-      
-        //get design file, it is required
-      designfile = validParameter.validFile(parameters, "design", true);
-      if (designfile == "not open") { sharedfile = ""; abort = true; }
-      else if (designfile == "not found") {
-          //if there is a current shared file, use it
-        designfile = m->getDesignFile();
-        if (designfile != "") { m->mothurOut("Using " + designfile + " as input file for the design parameter."); m->mothurOutEndLine(); }
-        else {         m->mothurOut("You have no current designfile and the design parameter is required."); m->mothurOutEndLine(); abort = true; }
-      }else { m->setDesignFile(designfile); }
-
-      
-        //if the user changes the output directory command factory will send this info to us in the output parameter
-      outputDir = validParameter.validFile(parameters, "outputdir", false);            if (outputDir == "not found"){
-        outputDir = m->hasPath(sharedfile); //if user entered a file with a path then preserve it
-      }
-      
-        // NEW CODE for OTU per split selection criteria
-        string temp = validParameter.validFile(parameters, "splitcriteria", false);
-        if (temp == "not found") { temp = "gainratio"; }
-        if ((temp == "gainratio") || (temp == "infogain")) {
-            treeSplitCriterion = temp;
-        } else { m->mothurOut("Not a valid tree splitting criterio. Valid tree splitting criteria are 'gainratio' and 'infogain'.");
-            m->mothurOutEndLine();
-            abort = true;
-        }
-        
-        temp = validParameter.validFile(parameters, "numtrees", false); if (temp == "not found"){      temp = "100";   }
-        m->mothurConvert(temp, numDecisionTrees);
-        
-            // parameters for pruning
-        temp = validParameter.validFile(parameters, "prune", false);
-        if (temp == "not found") { temp = "f"; }
-        doPruning = m->isTrue(temp);
-        
-        temp = validParameter.validFile(parameters, "pruneaggressiveness", false);
-        if (temp == "not found") { temp = "0.9"; }
-        m->mothurConvert(temp, pruneAggressiveness);
-        
-        temp = validParameter.validFile(parameters, "discarderrortrees", false);
-        if (temp == "not found") { temp = "f"; }
-        discardHighErrorTrees = m->isTrue(temp);
-        
-        temp = validParameter.validFile(parameters, "errorthreshold", false);
-        if (temp == "not found") { temp = "0.4"; }
-        m->mothurConvert(temp, highErrorTreeDiscardThreshold);
-        
-        temp = validParameter.validFile(parameters, "otupersplit", false);
-        if (temp == "not found") { temp = "log2"; }
-        if ((temp == "squareroot") || (temp == "log2")) {
-            optimumFeatureSubsetSelectionCriteria = temp;
-        } else { m->mothurOut("Not a valid OTU per split selection method. Valid OTU per split selection methods are 'log2' and 'squareroot'.");
-            m->mothurOutEndLine();
-            abort = true;
-        }
-        
-        temp = validParameter.validFile(parameters, "stdthreshold", false);
-        if (temp == "not found") { temp = "0.0"; }
-        m->mothurConvert(temp, featureStandardDeviationThreshold);
-                        
-            // end of pruning params
-        
-        //Groups must be checked later to make sure they are valid. SharedUtilities has functions of check the validity, just make to so m->setGroups() after the checks.  If you are using these with a shared file no need to check the SharedRAbundVector class will call SharedUtilites for you, kinda nice, huh?
-      string groups = validParameter.validFile(parameters, "groups", false);
-      if (groups == "not found") { groups = ""; }
-      else { m->splitAtDash(groups, Groups); }
-      m->setGroups(Groups);
-      
-        //Commonly used to process list, rabund, sabund, shared and relabund files.  Look at "smart distancing" examples below in the execute function.
-      string label = validParameter.validFile(parameters, "label", false);
-      if (label == "not found") { label = ""; }
-      else {
-        if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
-        else { allLines = 1;  }
-      }
-    }
-    
-  }
-  catch(exception& e) {
-    m->errorOut(e, "ClassifySharedCommand", "ClassifySharedCommand");
-    exit(1);
-  }
-}
-//**********************************************************************************************************************
-int ClassifySharedCommand::execute() {
-  try {
-    
-    if (abort == true) { if (calledHelp) { return 0; }  return 2;      }
-    
-    InputData input(sharedfile, "sharedfile");
-    vector<SharedRAbundVector*> lookup = input.getSharedRAbundVectors();
-        
-    //read design file
-    designMap.readDesignMap(designfile);
-    
-    string lastLabel = lookup[0]->getLabel();
-    set<string> processedLabels;
-    set<string> userLabels = labels;
-    
-      //as long as you are not at the end of the file or done wih the lines you want
-    while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
-      
-      if (m->control_pressed) { for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }  return 0; }
-      
-      if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){
-        
-        m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
-        
-        processSharedAndDesignData(lookup);  
-          
-        processedLabels.insert(lookup[0]->getLabel());
-        userLabels.erase(lookup[0]->getLabel());
-      }
-      
-      if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
-        string saveLabel = lookup[0]->getLabel();
-        
-        for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
-        lookup = input.getSharedRAbundVectors(lastLabel);
-        m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
-        processSharedAndDesignData(lookup);        
-        
-        processedLabels.insert(lookup[0]->getLabel());
-        userLabels.erase(lookup[0]->getLabel());
-        
-          //restore real lastlabel to save below
-        lookup[0]->setLabel(saveLabel);
-      }
-      
-      lastLabel = lookup[0]->getLabel();
-        //prevent memory leak
-      for (int i = 0; i < lookup.size(); i++) {  delete lookup[i]; lookup[i] = NULL; }
-      
-      if (m->control_pressed) { return 0; }
-      
-        //get next line to process
-      lookup = input.getSharedRAbundVectors();
-    }
-    
-    if (m->control_pressed) {  return 0; }
-    
-      //output error messages about any remaining user labels
-    set<string>::iterator it;
-    bool needToRun = false;
-    for (it = userLabels.begin(); it != userLabels.end(); it++) {
-      m->mothurOut("Your file does not include the label " + *it);
-      if (processedLabels.count(lastLabel) != 1) {
-        m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
-        needToRun = true;
-      }else {
-        m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
-      }
-    }
-    
-      //run last label if you need to
-    if (needToRun == true)  {
-      for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }
-      lookup = input.getSharedRAbundVectors(lastLabel);
-      
-      m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
-      
-      processSharedAndDesignData(lookup);  
-        
-      for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
-      
-    }
-
-      m->mothurOutEndLine();
-      m->mothurOut("Output File Names: "); m->mothurOutEndLine();
-      for (int i = 0; i < outputNames.size(); i++) {   m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
-      m->mothurOutEndLine();
-      
-    return 0;
-    
-  }
-  catch(exception& e) {
-    m->errorOut(e, "ClassifySharedCommand", "execute");
-    exit(1);
-  }
-}
-//**********************************************************************************************************************
-
-void ClassifySharedCommand::processSharedAndDesignData(vector<SharedRAbundVector*> lookup){  
-    try {
-//    for (int i = 0; i < designMap->getNamesOfGroups().size(); i++) {
-//      string groupName = designMap->getNamesOfGroups()[i];
-//      cout << groupName << endl;
-//    }
-
-//    for (int i = 0; i < designMap->getNumSeqs(); i++) {
-//      string sharedGroupName = designMap->getNamesSeqs()[i];
-//      string treatmentName = designMap->getGroup(sharedGroupName);
-//      cout << sharedGroupName << " : " << treatmentName <<  endl;
-//    }
-  
-        map<string, int> treatmentToIntMap;
-        map<int, string> intToTreatmentMap;
-        for (int  i = 0; i < designMap.getNumGroups(); i++) {
-            string treatmentName = designMap.getNamesOfGroups()[i];
-            treatmentToIntMap[treatmentName] = i;
-            intToTreatmentMap[i] = treatmentName;
-        }
-        
-        int numSamples = lookup.size();
-        int numFeatures = lookup[0]->getNumBins();
-        
-        int numRows = numSamples;
-        int numColumns = numFeatures + 1;           // extra one space needed for the treatment/outcome
-        
-        vector< vector<int> > dataSet(numRows, vector<int>(numColumns, 0));
-        
-        vector<string> names;
-        
-        for (int i = 0; i < lookup.size(); i++) {
-            string sharedGroupName = lookup[i]->getGroup();
-            names.push_back(sharedGroupName);
-            string treatmentName = designMap.getGroup(sharedGroupName);
-            
-            int j = 0;
-            for (; j < lookup[i]->getNumBins(); j++) {
-                int otuCount = lookup[i]->getAbundance(j);
-                dataSet[i][j] = otuCount;
-            }
-            dataSet[i][j] = treatmentToIntMap[treatmentName];
-        }
-        
-        RandomForest randomForest(dataSet, numDecisionTrees, treeSplitCriterion, doPruning, pruneAggressiveness, discardHighErrorTrees, highErrorTreeDiscardThreshold, optimumFeatureSubsetSelectionCriteria, featureStandardDeviationThreshold);
-        
-        randomForest.populateDecisionTrees();
-        randomForest.calcForrestErrorRate();
-        randomForest.printConfusionMatrix(intToTreatmentMap);
-        
-        map<string, string> variables; 
-        variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + "RF.";
-        variables["[distance]"] = lookup[0]->getLabel();
-        string filename = getOutputFileName("summary", variables);
-        outputNames.push_back(filename); outputTypes["summary"].push_back(filename);
-        randomForest.calcForrestVariableImportance(filename);
-        
-        //
-        map<string, string> variable; 
-        variable["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + "misclassifications.";
-        variable["[distance]"] = lookup[0]->getLabel();
-        string mc_filename = getOutputFileName("summary", variable);
-        outputNames.push_back(mc_filename); outputTypes["summary"].push_back(mc_filename);
-        randomForest.getMissclassifications(mc_filename, intToTreatmentMap, names);
-        //
-        
-        m->mothurOutEndLine();
-    }
-    catch(exception& e) {
-        m->errorOut(e, "ClassifySharedCommand", "processSharedAndDesignData");
-        exit(1);
-    }
-}
-//**********************************************************************************************************************
-
diff --git a/classifysharedcommand.h b/classifysharedcommand.h
deleted file mode 100755 (executable)
index 7a6907f..0000000
+++ /dev/null
@@ -1,54 +0,0 @@
-//
-//  classifysharedcommand.h
-//  Mothur
-//
-//  Created by Abu Zaher Md. Faridee on 8/13/12.
-//  Copyright (c) 2012 Schloss Lab. All rights reserved.
-//
-
-#ifndef __Mothur__classifysharedcommand__
-#define __Mothur__classifysharedcommand__
-
-#include "command.hpp"
-#include "inputdata.h"
-
-class ClassifySharedCommand : public Command {
-public:
-  ClassifySharedCommand();
-  ClassifySharedCommand(string);
-  ~ClassifySharedCommand() {};
-  
-  vector<string> setParameters();
-  string getCommandName()                      { return "classify.shared";     }
-  string getCommandCategory()          { return "OTU-Based Approaches";                }  
-  string getHelpString();      
-  string getOutputPattern(string);
-  string getCitation() { return "http://www.mothur.org/wiki/Classify.shared\n"; }
-  string getDescription()              { return "implements the random forest machine learning algorithm to identify OTUs that can be used to differentiate between various groups of samples"; }
-  int execute();
-  
-  void help() { m->mothurOut(getHelpString()); }
-
-private:
-    bool abort;
-    string outputDir;
-    vector<string> outputNames, Groups;
-  
-    string sharedfile, designfile;
-    set<string> labels;
-    bool allLines;
-  
-    int processors;
-    bool useTiming;
-
-    GroupMap designMap;
-  
-    int numDecisionTrees;
-    string treeSplitCriterion, optimumFeatureSubsetSelectionCriteria;
-    bool doPruning, discardHighErrorTrees;
-    double pruneAggressiveness, highErrorTreeDiscardThreshold, featureStandardDeviationThreshold;
-    
-    void processSharedAndDesignData(vector<SharedRAbundVector*> lookup);
-};
-
-#endif /* defined(__Mothur__classifysharedcommand__) */
index 03c3e49ccff50f128c657780da2b9d8109289358..8ab0a9529ba697aece318c7e11301f9e465b19c3 100644 (file)
@@ -306,7 +306,7 @@ CommandFactory::CommandFactory(){
     commands["make.table"]          = "make.table";
     commands["sff.multiple"]        = "sff.multiple";
        commands["quit"]                                = "MPIEnabled"; 
-    commands["classifyrf.shared"]              = "classifyrf.shared"; 
+    commands["classify.rf"]         = "classify.rf"; 
     commands["filter.shared"]          = "filter.shared"; 
     commands["primer.design"]          = "primer.design";
     commands["get.dists"]           = "get.dists";
@@ -533,7 +533,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString){
         else if(commandName == "make.contigs")          {      command = new MakeContigsCommand(optionString);             }
         else if(commandName == "load.logfile")          {      command = new LoadLogfileCommand(optionString);             }
         else if(commandName == "sff.multiple")          {      command = new SffMultipleCommand(optionString);             }
-        else if(commandName == "classifyrf.shared")       {    command = new ClassifyRFSharedCommand(optionString);          }
+        else if(commandName == "classify.rf")           {      command = new ClassifyRFSharedCommand(optionString);          }
         else if(commandName == "filter.shared")         {      command = new FilterSharedCommand(optionString);            }
         else if(commandName == "primer.design")         {      command = new PrimerDesignCommand(optionString);            }
         else if(commandName == "get.dists")             {      command = new GetDistsCommand(optionString);                }
@@ -701,7 +701,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString, str
         else if(commandName == "make.contigs")          {      pipecommand = new MakeContigsCommand(optionString);             }
         else if(commandName == "load.logfile")          {      pipecommand = new LoadLogfileCommand(optionString);             }
         else if(commandName == "sff.multiple")          {      pipecommand = new SffMultipleCommand(optionString);             }
-        else if(commandName == "classifyrf.shared")       {    pipecommand = new ClassifyRFSharedCommand(optionString);          }
+        else if(commandName == "classify.rf")           {      pipecommand = new ClassifyRFSharedCommand(optionString);        }
         else if(commandName == "filter.shared")         {      pipecommand = new FilterSharedCommand(optionString);            }
         else if(commandName == "primer.design")         {      pipecommand = new PrimerDesignCommand(optionString);            }
         else if(commandName == "get.dists")             {      pipecommand = new GetDistsCommand(optionString);                }
@@ -855,7 +855,7 @@ Command* CommandFactory::getCommand(string commandName){
         else if(commandName == "make.contigs")          {      shellcommand = new MakeContigsCommand();            }
         else if(commandName == "load.logfile")          {      shellcommand = new LoadLogfileCommand();            }
         else if(commandName == "sff.multiple")          {      shellcommand = new SffMultipleCommand();            }
-        else if(commandName == "classifyrf.shared")       {    shellcommand = new ClassifyRFSharedCommand();         }
+        else if(commandName == "classify.rf")           {      shellcommand = new ClassifyRFSharedCommand();       }
         else if(commandName == "filter.shared")         {      shellcommand = new FilterSharedCommand();           }
         else if(commandName == "primer.design")         {      shellcommand = new PrimerDesignCommand();           }
         else if(commandName == "get.dists")             {      shellcommand = new GetDistsCommand();               }
index 805d21e51d5331576185f6d28128af8dad7cbae3..777444bc09749c6aaa7240448e8d777da8c14317 100644 (file)
@@ -15,7 +15,6 @@ vector<string> KruskalWallisCommand::setParameters(){
         CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","summary",false,true,true); parameters.push_back(pshared);
         CommandParameter pclass("class", "String", "", "", "", "", "","",false,false); parameters.push_back(pclass);
                CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
-               CommandParameter pclasses("classes", "String", "", "", "", "", "","",false,false); parameters.push_back(pclasses);
         //every command must have inputdir and outputdir.  This allows mothur users to redirect input and output files.
                CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
                CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
@@ -34,9 +33,8 @@ string KruskalWallisCommand::getHelpString(){
        try {
                string helpString = "";
                helpString += "The kruskal.wallis command allows you to ....\n";
-               helpString += "The kruskal.wallis command parameters are: shared, design, class,  label and classes.\n";
+               helpString += "The kruskal.wallis command parameters are: shared, design, class, label and classes.\n";
                helpString += "The class parameter is used to indicate the which category you would like used for the Kruskal Wallis analysis. If none is provided first category is used.\n";
-        helpString += "The classes parameter is used to indicate the classes you would like to use. Clases should be inputted in the following format: classes=label<value1|value2|value3>-label<value1|value2>. For example to include groups from treatment with value early or late and age= young or old.  class=treatment<Early|Late>-age<young|old>.\n";
         helpString += "The label parameter is used to indicate which distances in the shared file you would like to use. labels are separated by dashes.\n";
                helpString += "The kruskal.wallis command should be in the following format: kruskal.wallis(shared=final.an.shared, design=final.design, class=treatment).\n";
         return helpString;
@@ -159,9 +157,6 @@ KruskalWallisCommand::KruskalWallisCommand(string option)  {
             mclass = validParameter.validFile(parameters, "class", false);
                        if (mclass == "not found") { mclass = ""; }
             
-            classes = validParameter.validFile(parameters, "classes", false);
-                       if (classes == "not found") { classes = ""; }
-            
                }
                
        }
@@ -178,13 +173,6 @@ int KruskalWallisCommand::execute(){
                if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
         
         DesignMap designMap(designfile);
-        //if user set classes set groups=those classes
-        if (classes != "") {
-            map<string, vector<string> > thisClasses = m->parseClasses(classes);
-            vector<string> groups = designMap.getNamesUnique(thisClasses);
-            if (groups.size() != 0) { m->setGroups(groups); }
-            else { m->mothurOut("[ERROR]: no groups meet your classes requirement, quitting.\n"); return 0; }
-        }
         
         //if user did not select class use first column
         if (mclass == "") {  mclass = designMap.getDefaultClass(); m->mothurOut("\nYou did not provide a class, using " + mclass +".\n\n"); }
index a82d208cf15ea424413fdae4eee7c666868f4b35..73ef6434de6d6edcd6449e8824aae2d7f099b289 100644 (file)
@@ -42,7 +42,7 @@ public:
     
 private:
     bool abort, allLines;
-    string outputDir, sharedfile, designfile, mclass, classes;
+    string outputDir, sharedfile, designfile, mclass;
     vector<string> outputNames;
     set<string> labels;
     
index 3575e021fd754b5d4baf897893a0633372aef69f..8d1768c2b1d87bc840de743cc5d1dbdc0801b7da 100644 (file)
@@ -17,7 +17,7 @@ vector<string> LefseCommand::setParameters(){
         CommandParameter pclass("class", "String", "", "", "", "", "","",false,false); parameters.push_back(pclass);
         CommandParameter psubclass("subclass", "String", "", "", "", "", "","",false,false); parameters.push_back(psubclass);
                CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
-               CommandParameter pclasses("classes", "String", "", "", "", "", "","",false,false); parameters.push_back(pclasses);
+               //CommandParameter pclasses("classes", "String", "", "", "", "", "","",false,false); parameters.push_back(pclasses);
         CommandParameter palpha("aalpha", "Number", "", "0.05", "", "", "","",false,false); parameters.push_back(palpha);
         CommandParameter pwalpha("walpha", "Number", "", "0.05", "", "", "","",false,false); parameters.push_back(pwalpha);
         
@@ -58,7 +58,7 @@ string LefseCommand::getHelpString(){
        try {
                string helpString = "";
                helpString += "The lefse command allows you to ....\n";
-               helpString += "The lefse command parameters are: shared, design, class, subclass, label, classes, walpha, aalpha, lda, wilc, iters, curv, fboots, strict, minc, multiclass and norm.\n";
+               helpString += "The lefse command parameters are: shared, design, class, subclass, label, walpha, aalpha, lda, wilc, iters, curv, fboots, strict, minc, multiclass and norm.\n";
                helpString += "The class parameter is used to indicate the which category you would like used for the Kruskal Wallis analysis. If none is provided first category is used.\n";
         helpString += "The subclass parameter is used to indicate the .....If none is provided, second category is used, or if only one category subclass is ignored. \n";
         helpString += "The aalpha parameter is used to set the alpha value for the Krukal Wallis Anova test Default=0.05. \n";
@@ -67,13 +67,13 @@ string LefseCommand::getHelpString(){
         helpString += "The wilc parameter is used to indicate whether to perform the Wilcoxon test. Default=T. \n";
         helpString += "The iters parameter is used to set the number of bootstrap iteration for LDA. Default=30. \n";
         //helpString += "The wilcsamename parameter is used to indicate whether perform the wilcoxon test only among the subclasses with the same name. Default=F. \n";
-        helpString += "The curv parameter is used to set whether perform the wilcoxon test ing the Curtis's approach [BETA VERSION] Default=F. \n";
+        helpString += "The curv parameter is used to set whether perform the wilcoxon testing the Curtis's approach [BETA VERSION] Default=F. \n";
         helpString += "The norm parameter is used to multiply relative abundances by 1000000. Recommended when very low values are present. Default=T. \n";
         helpString += "The fboots parameter is used to set the subsampling fraction value for each bootstrap iteration. Default=0.67. \n";
         helpString += "The strict parameter is used to set the multiple testing correction options. 0 no correction (more strict, default), 1 correction for independent comparisons, 2 correction for independent comparison. Options = 0,1,2. Default=0. \n";
         helpString += "The minc parameter is used to minimum number of samples per subclass for performing wilcoxon test. Default=10. \n";
         helpString += "The multiclass parameter is used to (for multiclass tasks) set whether the test is performed in a one-against-one ( onevone - more strict!) or in a one-against-all setting ( onevall - less strict). Default=onevall. \n";
-        helpString += "The classes parameter is used to indicate the classes you would like to use. Classes should be inputted in the following format: classes=label<value1|value2|value3>-label<value1|value2>. For example to include groups from treatment with value early or late and age= young or old.  class=treatment<Early|Late>-age<young|old>.\n";
+        //helpString += "The classes parameter is used to indicate the classes you would like to use. Classes should be inputted in the following format: classes=label<value1|value2|value3>-label<value1|value2>. For example to include groups from treatment with value early or late and age= young or old.  class=treatment<Early|Late>-age<young|old>.\n";
         helpString += "The label parameter is used to indicate which distances in the shared file you would like to use. labels are separated by dashes.\n";
                helpString += "The lefse command should be in the following format: lefse(shared=final.an.shared, design=final.design, class=treatment, subclass=age).\n";
         return helpString;
@@ -199,9 +199,6 @@ LefseCommand::LefseCommand(string option)  {
             subclass = validParameter.validFile(parameters, "subclass", false);
                        if (subclass == "not found") { subclass = mclass; }
             
-            classes = validParameter.validFile(parameters, "classes", false);
-                       if (classes == "not found") { classes = ""; }
-            
             string temp = validParameter.validFile(parameters, "aalpha", false);
                        if (temp == "not found") { temp = "0.05"; }
                        m->mothurConvert(temp, anovaAlpha);
@@ -266,22 +263,16 @@ LefseCommand::LefseCommand(string option)  {
 
 int LefseCommand::execute(){
        try {
+        srand(1982);
         //for reading lefse formatted file and running in mothur for testing - pass number of rows used for design file
         if (false) {  makeShared(1); exit(1); }
                
                if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
         
         DesignMap designMap(designfile);
-        //if user set classes set groups=those classes
-        if (classes != "") {
-            map<string, vector<string> > thisClasses = m->parseClasses(classes);
-            vector<string> groups = designMap.getNamesUnique(thisClasses);
-            if (groups.size() != 0) { m->setGroups(groups); }
-            else { m->mothurOut("[ERROR]: no groups meet your classes requirement, quitting.\n"); return 0; }
-        }
         
         //if user did not select class use first column
-        if (mclass == "") {  mclass = designMap.getDefaultClass(); m->mothurOut("\nYou did not provide a class, using " + mclass +".\n\n"); }
+        if (mclass == "") {  mclass = designMap.getDefaultClass(); m->mothurOut("\nYou did not provide a class, using " + mclass +".\n\n"); if (subclass == "") { subclass = mclass; } }
         
         InputData input(sharedfile, "sharedfile");
         vector<SharedRAbundFloatVector*> lookup = input.getSharedRAbundFloatVectors();
@@ -365,6 +356,7 @@ int LefseCommand::execute(){
                m->mothurOut("Output File Names: "); m->mothurOutEndLine();
                for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
                m->mothurOutEndLine();
+        srand(time(NULL));
         return 0;
                
     }
@@ -383,8 +375,8 @@ int LefseCommand::process(vector<SharedRAbundFloatVector*>& lookup, DesignMap& d
         map<string, set<string> > class2SubClasses; //maps class name to vector of its subclasses
         map<string, vector<int> > subClass2GroupIndex; //maps subclass name to vector of indexes in lookup from that subclass. old -> 1,2,3 means groups in location 1,2,3 of lookup are from old.  Saves time below.
         map<string, vector<int> > class2GroupIndex; //maps subclass name to vector of indexes in lookup from that class. old -> 1,2,3 means groups in location 1,2,3 of lookup are from old.  Saves time below.
+        if (normMillion) {  normalize(lookup);  }
         for (int j = 0; j < lookup.size(); j++) {
-            if (normMillion) {  normalize(lookup);  }
             string group = lookup[j]->getGroup();
             string treatment = designMap.get(group, mclass); //get value for this group in this category
             string thisSub = designMap.get(group, subclass);
@@ -426,12 +418,16 @@ int LefseCommand::process(vector<SharedRAbundFloatVector*>& lookup, DesignMap& d
         
         int numSigBeforeWilcox = significantOtuLabels.size();
         
+        if (m->debug) { m->mothurOut("[DEBUG]: completed Kruskal Wallis\n"); } 
+        
         //check for subclass
         string wilcoxString = "";
         if ((subclass != "") && wilc) {  significantOtuLabels = runWilcoxon(lookup, designMap, significantOtuLabels, class2SubClasses, subClass2GroupIndex, subclass2Class);  wilcoxString += " ( " + toString(numSigBeforeWilcox) + " ) before internal wilcoxon"; }
         
         int numSigAfterWilcox = significantOtuLabels.size();
         
+        if (m->debug) { m->mothurOut("[DEBUG]: completed Wilcoxon\n"); } 
+        
         m->mothurOut("\nNumber of significantly discriminative features: " + toString(numSigAfterWilcox) + wilcoxString + ".\n"); 
         
         map<int, double> sigOTUSLDA;
@@ -441,6 +437,8 @@ int LefseCommand::process(vector<SharedRAbundFloatVector*>& lookup, DesignMap& d
         }
         else { m->mothurOut("No features with significant differences between the classes.\n"); }
         
+        if (m->debug) { m->mothurOut("[DEBUG]: completed lda\n"); } 
+        
         printResults(means, significantOtuLabels, sigOTUSLDA, lookup[0]->getLabel(), classes);
         
         return 0;
@@ -461,7 +459,9 @@ int LefseCommand::normalize(vector<SharedRAbundFloatVector*>& lookup) {
         }
         
         for (int i = 0; i < lookup.size(); i++) {
-            for (int j = 0; j < lookup[i]->getNumBins(); j++) { lookup[i]->set(j, lookup[i]->getAbundance(j)*mul[i], lookup[i]->getGroup()); }
+            for (int j = 0; j < lookup[i]->getNumBins(); j++) {
+                lookup[i]->set(j, lookup[i]->getAbundance(j)*mul[i], lookup[i]->getGroup());
+            }
         }
         
         return 0;
@@ -713,15 +713,22 @@ map<int, double> LefseCommand::testLDA(vector<SharedRAbundFloatVector*>& lookup,
         for (int i = 0; i < numBins; i++) {
             if (m->control_pressed) { break; }
             
+            if (m->debug) { m->mothurOut("[DEBUG]: bin = " + toString(i) + "\n."); }
+            
             it = bins.find(i);
             if (it != bins.end()) { //flagged in Kruskal Wallis and Wilcoxon(if we ran it)
                 
+                if (m->debug) { m->mothurOut("[DEBUG]:flagged bin = " + toString(i) + "\n."); }
+                
                 //fill x with this OTUs abundances
                 vector<double> x;
                 for (int j = 0; j < lookup.size(); j++) {  x.push_back(lookup[j]->getAbundance(i));  } 
                 
                 //go through classes
                 for (map<string, vector<int> >::iterator it = class2GroupIndex.begin(); it != class2GroupIndex.end(); it++) {
+                    
+                    if (m->debug) { m->mothurOut("[DEBUG]: class = " + it->first + "\n."); }
+                    
                     //max(float(feats['class'].count(c))*0.5,4)
                     //max(numGroups in this class*0.5, 4.0)
                     double necessaryNum = ((double)((it->second).size())*0.5);
@@ -760,10 +767,14 @@ map<int, double> LefseCommand::testLDA(vector<SharedRAbundFloatVector*>& lookup,
         minCl = (int)((float)(minCl*fBoots*fBoots*0.05));
         minCl = max(minCl, 1);
         
+        if (m->debug) { m->mothurOut("[DEBUG]: about to start iters. \n."); }
+        
         vector< vector< vector<double> > > results;//[iters][numComparison][numOTUs]
         for (int j = 0; j < iters; j++) {
             if (m->control_pressed) { return sigOTUS; }
             
+            if (m->debug) { m->mothurOut("[DEBUG]: iter = " + toString(j) + "\n."); }
+            
             //find "good" random vector
             vector<int> rand_s;
             for (int h = 0; h < 1000; h++) { //generate a vector of length fractionNumGroups with range 0 to numGroups-1
index 1c5ac6f0c251e588986e7f0efa1e027026e88f20..cb1bbb6da80f7da1567979dd158fb5861e74a685 100644 (file)
@@ -41,7 +41,7 @@ public:
     
     string getOutputPattern(string);
        string getHelpString();
-    string getCitation() { return "http://www.mothur.org/wiki/Lefse"; }
+    string getCitation() { return "Segata, N., J. Izard, L. Waldron, D. Gevers, L. Miropolsky, W. S. Garrett, and C. Huttenhower. 2011. Metagenomic biomarker discovery and explanation. Genome Biol 12:R60, http://www.mothur.org/wiki/Lefse"; }
     string getDescription()            { return "brief description"; }
     
     int execute();
@@ -49,7 +49,7 @@ public:
     
 private:
     bool abort, allLines, wilc, wilcsamename, curv, subject, normMillion;
-    string outputDir, sharedfile, designfile, mclass, subclass, classes, rankTec, multiClassStrat;
+    string outputDir, sharedfile, designfile, mclass, subclass, rankTec, multiClassStrat;
     vector<string> outputNames;
     set<string> labels;
     double anovaAlpha, wilcoxonAlpha, fBoots, ldaThreshold;
index fdd95ec9602cb3b44cf3e1aa54934fc3045982c8..b208aab042b6be5c0c096531651b9067976d8f45 100644 (file)
@@ -10,6 +10,8 @@
 #include "linearalgebra.h"
 #include "wilcox.h"
 
+#define PI 3.1415926535897932384626433832795
+
 // This class references functions used from "Numerical Recipes in C++" //
 
 /*********************************************************************************************************************************/
@@ -1320,24 +1322,14 @@ double LinearAlgebra::calcKruskalWallis(vector<spearmanRank>& values, double& pV
        }
 }
 /*********************************************************************************************************************************/
-//python random.normalvariate - thanks http://madssj.com/blog/2008/05/07/porting-normalvariate-from-python-to-c/
-double LinearAlgebra::normalvariate(double mu, double sigma) {
+double LinearAlgebra::normalvariate(double mean, double standardDeviation) {
     try {
-        double NV_MAGICCONST = 1.7155277699214135; /* (4 * exp(-0.5) / sqrt(2.0)); */
-        unsigned long int MAX_RANDOM = 2147483647; /* (2 ** 31) - 1; */
-        
-        double u1, u2, z, zz;
-        for (;;) {
-            if (m->control_pressed) { break; }
-            u1 = ((float)random()) / MAX_RANDOM;
-            u2 = 1.0 - (((float)random()) / MAX_RANDOM);
-            z = NV_MAGICCONST * (u1 - 0.5) / u2;
-            zz = z * z / 4.0;
-            if (zz <= -(log(u2))) {
-                break;
-            }
-        }
-        return mu + z * sigma;
+        double u1 = ((double)(rand()) + 1.0 )/( (double)(RAND_MAX) + 1.0);
+        double u2 = ((double)(rand()) + 1.0 )/( (double)(RAND_MAX) + 1.0);
+        //double r = sqrt( -2.0*log(u1) );
+        //double theta = 2.0*PI*u2;
+        //cout << cos(8.*atan(1.)*u2)*sqrt(-2.*log(u1)) << endl;
+        return cos(8.*atan(1.)*u2)*sqrt(-2.*log(u1));
     }
        catch(exception& e) {
                m->errorOut(e, "LinearAlgebra", "normalvariate");
@@ -1367,6 +1359,7 @@ double LinearAlgebra::pnorm(double x){
         double y = 1.0 - (((((a5*t + a4)*t) + a3)*t + a2)*t + a1)*t*exp(-x*x);
         
         return 0.5*(1.0 + sign*y);
+        
     }
        catch(exception& e) {
                m->errorOut(e, "LinearAlgebra", "pnorm");
index a1018833e6268b6d0fe04aa25d4598f37df9cb4d..7cf8295fb48e02b0dce58b32432d39c736695259 100644 (file)
 vector<string> MakeBiomCommand::setParameters(){       
        try {
                CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","biom",false,true,true); parameters.push_back(pshared);
-        CommandParameter pcontaxonomy("contaxonomy", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(pcontaxonomy);
+        CommandParameter pcontaxonomy("constaxonomy", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(pcontaxonomy);
+        //CommandParameter preference("referencetax", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(preference);
         CommandParameter pmetadata("metadata", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(pmetadata);
                CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
                CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
-               CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
+        //CommandParameter ppicrust("picrust", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(ppicrust);
+        CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
                CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
         CommandParameter pmatrixtype("matrixtype", "Multiple", "sparse-dense", "sparse", "", "", "","",false,false); parameters.push_back(pmatrixtype);
 
@@ -114,12 +116,14 @@ vector<string> MakeBiomCommand::setParameters(){
 string MakeBiomCommand::getHelpString(){       
        try {
                string helpString = "";
-               helpString += "The make.biom command parameters are shared, contaxonomy, metadata, groups, matrixtype and label.  shared is required, unless you have a valid current file.\n";
+               helpString += "The make.biom command parameters are shared, contaxonomy, metadata, groups, matrixtype and label.  shared is required, unless you have a valid current file.\n"; //, picrust and referencetax
                helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like included. The group names are separated by dashes.\n";
                helpString += "The label parameter allows you to select what distance levels you would like, and are also separated by dashes.\n";
                helpString += "The matrixtype parameter allows you to select what type you would like to make. Choices are sparse and dense, default is sparse.\n";
         helpString += "The contaxonomy file is the taxonomy file outputted by classify.otu(list=yourListfile, taxonomy=yourTaxonomyFile). Be SURE that the you are the constaxonomy file distance matches the shared file distance.  ie, for *.0.03.cons.taxonomy set label=0.03. Mothur is smart enough to handle shared files that have been subsampled. It is used to assign taxonomy information to the metadata of rows.\n";
         helpString += "The metadata parameter is used to provide experimental parameters to the columns.  Things like 'sample1 gut human_gut'. \n";
+        //helpString += "The picrust parameter is used to indicate the biom file is for input to picrust.  NOTE: Picrust requires a greengenes taxonomy. \n";
+        //helpString += "The referencetax parameter is used with the picrust parameter.  Picrust requires the name of the reference taxonomy sequence to be in the biom file. \n";
                helpString += "The make.biom command should be in the following format: make.biom(shared=yourShared, groups=yourGroups, label=yourLabels).\n";
                helpString += "Example make.biom(shared=abrecovery.an.shared, groups=A-B-C).\n";
                helpString += "The default value for groups is all the groups in your groupfile, and all labels in your inputfile will be used.\n";
@@ -202,12 +206,20 @@ MakeBiomCommand::MakeBiomCommand(string option) {
                                        if (path == "") {       parameters["shared"] = inputDir + it->second;           }
                                }
                 
-                it = parameters.find("contaxonomy");
+                it = parameters.find("constaxonomy");
                                //user has given a template file
                                if(it != parameters.end()){ 
                                        path = m->hasPath(it->second);
                                        //if the user has not given a path then, add inputdir. else leave path alone.
-                                       if (path == "") {       parameters["contaxonomy"] = inputDir + it->second;              }
+                                       if (path == "") {       parameters["constaxonomy"] = inputDir + it->second;             }
+                               }
+                
+                it = parameters.find("referencetax");
+                               //user has given a template file
+                               if(it != parameters.end()){
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["referencetax"] = inputDir + it->second;             }
                                }
                 
                 it = parameters.find("metadata");
@@ -233,9 +245,13 @@ MakeBiomCommand::MakeBiomCommand(string option) {
                        //if the user changes the output directory command factory will send this info to us in the output parameter 
                        outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(sharedfile);             }
             
-            contaxonomyfile = validParameter.validFile(parameters, "contaxonomy", true);
+            contaxonomyfile = validParameter.validFile(parameters, "constaxonomy", true);
                        if (contaxonomyfile == "not found") {  contaxonomyfile = "";  }
                        else if (contaxonomyfile == "not open") { contaxonomyfile = ""; abort = true; }
+            
+            //referenceTax = validParameter.validFile(parameters, "referencetax", true);
+                       //if (referenceTax == "not found") {  referenceTax = "";  }
+                       //else if (referenceTax == "not open") { referenceTax = ""; abort = true; }
 
             metadatafile = validParameter.validFile(parameters, "metadata", true);
                        if (metadatafile == "not found") {  metadatafile = "";  }
@@ -249,6 +265,13 @@ MakeBiomCommand::MakeBiomCommand(string option) {
                                if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
                                else { allLines = 1;  }
                        }
+            
+            //string temp = validParameter.validFile(parameters, "picrust", false);                    if (temp == "not found"){       temp = "f";                             }
+                       //picrust = m->isTrue(temp);
+            //if (picrust && ((contaxonomyfile == "") || (referenceTax == ""))) {
+                //m->mothurOut("[ERROR]: the picrust parameter requires a consensus taxonomy with greengenes taxonomy the reference."); m->mothurOutEndLine(); abort = true;
+           //}
+            picrust=false;
                        
                        groups = validParameter.validFile(parameters, "groups", false);                 
                        if (groups == "not found") { groups = ""; }
@@ -406,7 +429,8 @@ int MakeBiomCommand::getBiom(vector<SharedRAbundVector*>& lookup){
         out << spaces + "\"type\": \"OTU table\",\n" + spaces + "\"generated_by\": \"" << mothurString << "\",\n" + spaces + "\"date\": \"" << dateString << "\",\n";
         
         int numBins = lookup[0]->getNumBins();
-        vector<string> metadata = getMetaData(lookup);  
+        vector<string> picrustLabels;
+        vector<string> metadata = getMetaData(lookup, picrustLabels);
         
         if (m->control_pressed) {  out.close(); return 0; }
         
@@ -423,10 +447,11 @@ int MakeBiomCommand::getBiom(vector<SharedRAbundVector*>& lookup){
         string rowBack = "\", \"metadata\":";
         for (int i = 0; i < numBins-1; i++) {
             if (m->control_pressed) {  out.close(); return 0; }
-            out << rowFront << m->currentBinLabels[i] << rowBack << metadata[i] << "},\n";
+            if (!picrust) { out << rowFront << m->currentBinLabels[i] << rowBack << metadata[i] << "},\n"; }
+            else {  out << rowFront << picrustLabels[i] << rowBack << metadata[i] << "},\n"; }
         }
-        out << rowFront << m->currentBinLabels[(numBins-1)] << rowBack << metadata[(numBins-1)] << "}\n" + spaces + "],\n";
-        
+        if (!picrust) {  out << rowFront << m->currentBinLabels[(numBins-1)] << rowBack << metadata[(numBins-1)] << "}\n" + spaces + "],\n"; }
+        else {  out << rowFront << picrustLabels[(numBins-1)] << rowBack << metadata[(numBins-1)] << "}\n" + spaces + "],\n"; }
         //get column info
         /*"columns": [
                     {"id":"Sample1", "metadata":null},
@@ -446,7 +471,7 @@ int MakeBiomCommand::getBiom(vector<SharedRAbundVector*>& lookup){
         out << rowFront << lookup[(lookup.size()-1)]->getGroup() << colBack << sampleMetadata[lookup.size()-1] << "}\n" + spaces + "],\n";
         
         out << spaces + "\"matrix_type\": \"" << format << "\",\n" + spaces + "\"matrix_element_type\": \"int\",\n";
-        out <<  spaces + "\"shape\": [" << m->currentBinLabels.size() << "," << lookup.size() << "],\n";
+        out <<  spaces + "\"shape\": [" << numBins << "," << lookup.size() << "],\n";
         out << spaces + "\"data\":  [";
         
         vector<string> dataRows;
@@ -518,7 +543,7 @@ int MakeBiomCommand::getBiom(vector<SharedRAbundVector*>& lookup){
        }
 }
 //**********************************************************************************************************************
-vector<string> MakeBiomCommand::getMetaData(vector<SharedRAbundVector*>& lookup){
+vector<string> MakeBiomCommand::getMetaData(vector<SharedRAbundVector*>& lookup, vector<string>& picrustLabels){
        try {
         vector<string> metadata;
         
@@ -581,7 +606,7 @@ vector<string> MakeBiomCommand::getMetaData(vector<SharedRAbundVector*>& lookup)
             //traverse the binLabels forming the metadata strings and saving them
             //make sure to sanity check
             map<string, string>::iterator it;
-            for (int i = 0; i < m->currentBinLabels.size(); i++) {
+            for (int i = 0; i < lookup[0]->getNumBins(); i++) {
                 
                 if (m->control_pressed) { return metadata; }
                 
@@ -589,6 +614,10 @@ vector<string> MakeBiomCommand::getMetaData(vector<SharedRAbundVector*>& lookup)
                 
                 if (it == labelTaxMap.end()) { m->mothurOut("[ERROR]: can't find taxonomy information for " + m->currentBinLabels[i] + ".\n"); m->control_pressed = true; }
                 else {
+                    if (picrust) {
+                        string temp = it->second; m->removeConfidences(temp);
+                        picrustLabels.push_back(temp);
+                    }
                     vector<string> bootstrapValues;
                     string data = "{\"taxonomy\":[";
             
index 7ea381f145af343213a25bd32109d20fa1fb1dee..1ff398518c850c0447ff58c904b1ceddf9b6c3d3 100644 (file)
@@ -36,14 +36,14 @@ public:
        
 private:
     
-       string sharedfile, contaxonomyfile, metadatafile, groups, outputDir, format, label;
+       string sharedfile, contaxonomyfile, metadatafile, groups, outputDir, format, label, referenceTax;
        vector<string> outputNames, Groups, sampleMetadata;
        set<string> labels;
     
-       bool abort, allLines;
+       bool abort, allLines, picrust;
     
     int getBiom(vector<SharedRAbundVector*>&);
-    vector<string> getMetaData(vector<SharedRAbundVector*>&);
+    vector<string> getMetaData(vector<SharedRAbundVector*>&, vector<string>&);
     vector<string> parseTax(string tax, vector<string>& scores);
     int getSampleMetaData(vector<SharedRAbundVector*>&);
 };
index 04de53382601600c77590d109e065819f4f80a98..7fc8608be4176f7e8360ea58ee36412e074bdae5 100644 (file)
--- a/makefile
+++ b/makefile
@@ -15,8 +15,8 @@ USEREADLINE ?= yes
 CYGWIN_BUILD ?= no
 USECOMPRESSION ?= no
 MOTHUR_FILES="\"Enter_your_default_path_here\""
-RELEASE_DATE = "\"8/19/2013\""
-VERSION = "\"1.31.2\""
+RELEASE_DATE = "\"10/01/2013\""
+VERSION = "\"1.32.0\""
 FORTAN_COMPILER = gfortran
 FORTRAN_FLAGS = 
 
index 9f895f60d42b9bc6bc1527f6826bfd6fd0d23767..73f9db202a0171bd49623ab685f88f9b38593dc5 100644 (file)
@@ -43,8 +43,8 @@ string MakeLefseCommand::getHelpString(){
         helpString += "The constaxonomy parameter is used to input your taxonomy file. http://www.wiki.mothur.org/wiki/Constaxonomy_file. The contaxonomy file is the taxonomy file outputted by classify.otu(list=yourListfile, taxonomy=yourTaxonomyFile). Be SURE that the you are the constaxonomy file distance matches the shared file distance.  ie, for *.0.03.cons.taxonomy set label=0.03. Mothur is smart enough to handle shared files that have been subsampled. \n";
         helpString += "The scale parameter allows you to select what scale you would like to use to convert your shared file abundances to relative abundances. Choices are totalgroup, totalotu, averagegroup, averageotu, default is totalgroup.\n";
                helpString += "The label parameter allows you to select what distance level you would like used, if none is given the first distance is used.\n";
-               helpString += "The make.lefse command should be in the following format: make.lefse(list=yourListFile, taxonomy=outputFromClassify.seqsCommand, name=yourNameFile)\n";
-               helpString += "make.lefse(shared=final.an.list, taxonomy=final.an.taxonomy, name=final.names)\n";
+               helpString += "The make.lefse command should be in the following format: make.lefse(shared=yourSharedFile)\n";
+               helpString += "make.lefse(shared=final.an.shared)\n";
                return helpString;
        }
        catch(exception& e) {
index 449454b1e932eb3c0f23f543bc6bd6c07b01f7ae..27999d074c103144ae9fcf240e393d17c29285f7 100644 (file)
@@ -15,7 +15,7 @@ vector<string> RenameSeqsCommand::setParameters(){
        try {
                CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","fasta",false,true,true); parameters.push_back(pfasta);
         CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none","name",false,false,true); parameters.push_back(pname);
-               CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","group",false,false,true); parameters.push_back(pgroup);
+               CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","group",false,true,true); parameters.push_back(pgroup);
                CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
                CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
                
index e7b2c6acedf523f8249c8159ed186a6b8d5b8892..e40c41218f5a599d894e0b2cb52f54406635f9ee 100644 (file)
@@ -2177,19 +2177,18 @@ int ScreenSeqsCommand::screenCountFile(map<string, string> badSeqNames){
                        if (m->control_pressed) { goodCountOut.close(); in.close(); m->mothurRemove(goodCountFile); return 0; }
             
                        in >> name; m->gobble(in); 
-            in >> thisTotal; m->gobble(in);
+            in >> thisTotal; 
             rest = m->getline(in); m->gobble(in);
             
                        it = badSeqNames.find(name);
                        
                        if(it != badSeqNames.end()){
-                               badSeqNames.erase(it);
+                               badSeqNames.erase(it); 
                        }
                        else{
                                goodCountOut << name << '\t' << thisTotal << '\t' << rest << endl;
                        }
                }
-               
                if (m->control_pressed) { goodCountOut.close();  in.close(); m->mothurRemove(goodCountFile);  return 0; }
         
                //we were unable to remove some of the bad sequences
index 41c8e743122399563b8367fda6a9d4a94f9bda86..329fd1fdbeb28ba673c883eeb6ea36fe059c64fc 100644 (file)
@@ -91,7 +91,7 @@ int SetLogFileCommand::execute(){
         if (directory == "") {
             commandFactory->setLogfileName(name, append);
         }else if (m->dirCheck(directory)) {
-             commandFactory->setLogfileName(name, append);
+            commandFactory->setLogfileName(name, append);
         }
                
                return 0;
index 9a1cfbb5a1567153628a1213a494c36242e1d763..237821a7d9d603c24aafbe44aca2fcdd5d668232 100644 (file)
@@ -34,12 +34,18 @@ EstOutput Jest::getValues(vector<SharedRAbundVector*> shared) {
                
                *chaoS1Sabund = shared[0]->getSAbundVector();
                *chaoS2Sabund = shared[1]->getSAbundVector();
+        
+        //chaoS1Sabund->print(cout);
+        //chaoS2Sabund->print(cout);
                
                S12 = sharedChao->getValues(shared);
                S1 = chaoS1->getValues(chaoS1Sabund);
                S2 = chaoS2->getValues(chaoS2Sabund);
+        
+        //cout << S12[0] << '\t' << S1[0] << '\t' << S2[0] << endl;
                
                data[0] = 1.0 - S12[0] / (float)(S1[0] + S2[0] - S12[0]);
+        //cout << data[0] << endl;
                
                if (isnan(data[0]) || isinf(data[0])) { data[0] = 0; }