]> git.donarmstrong.com Git - mothur.git/commitdiff
added indicator command
authorwestcott <westcott>
Fri, 19 Nov 2010 13:12:27 +0000 (13:12 +0000)
committerwestcott <westcott>
Fri, 19 Nov 2010 13:12:27 +0000 (13:12 +0000)
16 files changed:
Mothur.xcodeproj/project.pbxproj
commandfactory.cpp
consensuscommand.cpp
distancecommand.cpp
getlineagecommand.cpp
indicatorcommand.cpp [new file with mode: 0644]
indicatorcommand.h [new file with mode: 0644]
listvector.cpp
makefile
metastats2.c
mothur
mothur.h
mothurout.cpp
removelineagecommand.cpp
tree.cpp
tree.h

index 369bae98e626911a3afac441c728b2c469992e85..7d9b3154b7fbedce9e5019600c78d8eded165643 100644 (file)
@@ -77,6 +77,8 @@
                A76C4A1111876BAF0009460B /* setlogfilecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = setlogfilecommand.cpp; sourceTree = SOURCE_ROOT; };
                A76D1451128D6A03005D4DFE /* removeotuscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = removeotuscommand.h; sourceTree = "<group>"; };
                A76D1452128D6A03005D4DFE /* removeotuscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = removeotuscommand.cpp; sourceTree = "<group>"; };
+               A76D1473128D9DA2005D4DFE /* indicatorcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = indicatorcommand.h; sourceTree = "<group>"; };
+               A76D1474128D9DA2005D4DFE /* indicatorcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = indicatorcommand.cpp; sourceTree = "<group>"; };
                A77D787B126F387700F351BD /* pairwiseseqscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = pairwiseseqscommand.h; sourceTree = "<group>"; };
                A77D787C126F387700F351BD /* pairwiseseqscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = pairwiseseqscommand.cpp; sourceTree = "<group>"; };
                A780E6CB11E7745D00BB5718 /* endiannessmacros.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = endiannessmacros.h; sourceTree = "<group>"; };
                                A7DA207C113FECD400BF472F /* heatmapsimcommand.cpp */,
                                A7DA207F113FECD400BF472F /* helpcommand.h */,
                                A7DA207E113FECD400BF472F /* helpcommand.cpp */,
+                               A76D1473128D9DA2005D4DFE /* indicatorcommand.h */,
+                               A76D1474128D9DA2005D4DFE /* indicatorcommand.cpp */,
                                A7DA208E113FECD400BF472F /* libshuffcommand.h */,
                                A7DA208D113FECD400BF472F /* libshuffcommand.cpp */,
                                A7DA2090113FECD400BF472F /* listseqscommand.h */,
index 8d038e0c08e6010d21188808b3ff3ad40087a7f7..25d6cfe14e183ce072cb656322676babfdaa094f 100644 (file)
 #include "getgroupscommand.h"
 #include "getotuscommand.h"
 #include "removeotuscommand.h"
+#include "indicatorcommand.h"
 
 /*******************************************************/
 
@@ -206,6 +207,7 @@ CommandFactory::CommandFactory(){
        commands["get.groups"]                  = "get.groups";
        commands["get.otus"]                    = "get.otus";
        commands["remove.otus"]                 = "remove.otus";
+       commands["indicator"]                   = "indicator";
        commands["pairwise.seqs"]               = "MPIEnabled";
        commands["pipeline.pds"]                = "MPIEnabled";
        commands["classify.seqs"]               = "MPIEnabled"; 
@@ -358,6 +360,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString){
                else if(commandName == "pairwise.seqs")                 {       command = new PairwiseSeqsCommand(optionString);                        }
                else if(commandName == "cluster.classic")               {       command = new ClusterDoturCommand(optionString);                        }
                else if(commandName == "sub.sample")                    {       command = new SubSampleCommand(optionString);                           }
+               else if(commandName == "indicator")                             {       command = new IndicatorCommand(optionString);                           }
                else                                                                                    {       command = new NoCommand(optionString);                                          }
 
                return command;
@@ -477,6 +480,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString, str
                else if(commandName == "pairwise.seqs")                 {       pipecommand = new PairwiseSeqsCommand(optionString);                    }
                else if(commandName == "cluster.classic")               {       pipecommand = new ClusterDoturCommand(optionString);                    }
                else if(commandName == "sub.sample")                    {       pipecommand = new SubSampleCommand(optionString);                               }
+               else if(commandName == "indicator")                             {       pipecommand = new IndicatorCommand(optionString);                               }
                else                                                                                    {       pipecommand = new NoCommand(optionString);                                              }
 
                return pipecommand;
@@ -584,6 +588,7 @@ Command* CommandFactory::getCommand(string commandName){
                else if(commandName == "pairwise.seqs")                 {       shellcommand = new PairwiseSeqsCommand();                       }
                else if(commandName == "cluster.classic")               {       shellcommand = new ClusterDoturCommand();                       }
                else if(commandName == "sub.sample")                    {       shellcommand = new SubSampleCommand();                          }
+               else if(commandName == "indicator")                             {       shellcommand = new IndicatorCommand();                          }
                else                                                                                    {       shellcommand = new NoCommand();                                         }
 
                return shellcommand;
index 0671790a2152801536e3f1bf05335a86f86fca13..9b72638662e4001963b9219f4c5f313010774797 100644 (file)
@@ -206,7 +206,7 @@ int ConcensusCommand::execute(){
                outputFile = filename + ".cons.tre";  outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile);
                m->openOutputFile(outputFile, out);
                
-               consensusTree->printForBoot(out);
+               consensusTree->print(out, "boot");
                
                out.close(); out2.close();
                
index cf7939ca233ff2a150e1c7589baed81d151d54d7..4cb98419fd23f540e174b068dd9a832b79fb3036 100644 (file)
@@ -49,7 +49,7 @@ vector<string> DistanceCommand::getRequiredParameters(){
                return myArray;
        }
        catch(exception& e) {
-               m->errorOut(e, "ChopSeqsCommand", "getRequiredParameters");
+               m->errorOut(e, "DistanceCommand", "getRequiredParameters");
                exit(1);
        }
 }
index eaa8d40f3d6b0ec28eb59f107ac16ba9aa0fc61e..4948a1a095343748dde3f467ff59c9ce5e050702 100644 (file)
@@ -182,7 +182,11 @@ GetLineageCommand::GetLineageCommand(string option)  {
                        else if (taxfile == "not found") {  taxfile = ""; m->mothurOut("The taxonomy parameter is required for the get.lineage command."); m->mothurOutEndLine();  abort = true; }
                        
                        string usedDups = "true";
-                       string temp = validParameter.validFile(parameters, "dups", false);      if (temp == "not found") { temp = "false"; usedDups = ""; }
+                       string temp = validParameter.validFile(parameters, "dups", false);      
+                       if (temp == "not found") { 
+                               if (namefile != "") {  temp = "true";                                   }
+                               else                            {  temp = "false"; usedDups = "";       }
+                       }
                        dups = m->isTrue(temp);
                        
                        taxons = validParameter.validFile(parameters, "taxon", false);  
diff --git a/indicatorcommand.cpp b/indicatorcommand.cpp
new file mode 100644 (file)
index 0000000..7f6dc21
--- /dev/null
@@ -0,0 +1,796 @@
+/*
+ *  indicatorcommand.cpp
+ *  Mothur
+ *
+ *  Created by westcott on 11/12/10.
+ *  Copyright 2010 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "indicatorcommand.h"
+//**********************************************************************************************************************
+vector<string> IndicatorCommand::getValidParameters(){ 
+       try {
+               string Array[] =  {"tree","shared","relabund","label","groups","outputdir","inputdir"};
+               vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
+               return myArray;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "IndicatorCommand", "getValidParameters");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+vector<string> IndicatorCommand::getRequiredParameters(){      
+       try {
+               string Array[] =  {"label","tree"};
+               vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
+               return myArray;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "IndicatorCommand", "getRequiredParameters");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+IndicatorCommand::IndicatorCommand(){  
+       try {
+               abort = true;
+               //initialize outputTypes
+               vector<string> tempOutNames;
+               outputTypes["tree"] = tempOutNames;
+               outputTypes["summary"] = tempOutNames;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "IndicatorCommand", "IndicatorCommand");
+               exit(1);
+       }
+}
+
+//**********************************************************************************************************************
+vector<string> IndicatorCommand::getRequiredFiles(){   
+       try {
+               vector<string> myArray;
+               return myArray;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "IndicatorCommand", "getRequiredFiles");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+IndicatorCommand::IndicatorCommand(string option)  {
+       try {
+               globaldata = GlobalData::getInstance();
+               abort = false;
+               
+               //allow user to run help
+               if(option == "help") { help(); abort = true; }
+               
+               else {
+                       //valid paramters for this command
+                       string Array[] =  {"tree","shared","relabund","groups","label","outputdir","inputdir"};
+                       vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
+                       
+                       OptionParser parser(option);
+                       map<string, string> parameters = parser.getParameters();
+                       
+                       ValidParameters validParameter;
+                       map<string, string>::iterator it;
+                       
+                       //check to make sure all parameters are valid for command
+                       for (it = parameters.begin(); it != parameters.end(); it++) { 
+                               if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
+                       }
+                       
+                       globaldata->newRead();
+                       
+                       vector<string> tempOutNames;
+                       outputTypes["tree"] = tempOutNames;
+                       outputTypes["summary"] = tempOutNames;
+                       
+                       //if the user changes the input directory command factory will send this info to us in the output parameter 
+                       string inputDir = validParameter.validFile(parameters, "inputdir", false);              
+                       if (inputDir == "not found"){   inputDir = "";          }
+                       else {
+                               string path;
+                               it = parameters.find("tree");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["tree"] = inputDir + it->second;             }
+                               }
+                               
+                               it = parameters.find("shared");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["shared"] = inputDir + it->second;           }
+                               }
+                               
+                               it = parameters.find("relabund");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["relabund"] = inputDir + it->second;         }
+                               }
+                               
+                       }
+                       
+                       outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
+
+                       //check for required parameters
+                       treefile = validParameter.validFile(parameters, "tree", true);
+                       if (treefile == "not open") { abort = true; }
+                       else if (treefile == "not found") { treefile = ""; m->mothurOut("tree is a required parameter for the indicator command."); m->mothurOutEndLine(); abort = true;  }     
+                       else {  globaldata->setTreeFile(treefile);  globaldata->setFormat("tree");      }
+                       
+                       sharedfile = validParameter.validFile(parameters, "shared", true);
+                       if (sharedfile == "not open") { abort = true; }
+                       else if (sharedfile == "not found") { sharedfile = ""; }
+                       else { inputFileName = sharedfile; }
+                       
+                       relabundfile = validParameter.validFile(parameters, "relabund", true);
+                       if (relabundfile == "not open") { abort = true; }
+                       else if (relabundfile == "not found") { relabundfile = ""; }
+                       else { inputFileName = relabundfile; }
+                       
+                       groups = validParameter.validFile(parameters, "groups", false);                 
+                       if (groups == "not found") { groups = ""; pickedGroups = false; }
+                       else { 
+                               pickedGroups = true;
+                               m->splitAtDash(groups, Groups);
+                               globaldata->Groups = Groups;
+                       }                       
+                       
+                       label = validParameter.validFile(parameters, "label", false);                   
+                       if (label == "not found") { label = ""; m->mothurOut("You must provide a label to process."); m->mothurOutEndLine(); abort = true; }    
+                       
+                       if ((relabundfile == "") && (sharedfile == "")) { m->mothurOut("You must provide either a shared or relabund file."); m->mothurOutEndLine(); abort = true;  }
+                       
+                       if ((relabundfile != "") && (sharedfile != "")) { m->mothurOut("You may not use both a shared and relabund file."); m->mothurOutEndLine(); abort = true;  }
+                       
+               }
+       }
+       catch(exception& e) {
+               m->errorOut(e, "IndicatorCommand", "IndicatorCommand");         
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+
+void IndicatorCommand::help(){
+       try {
+               /*m->mothurOut("The read.tree command must be run before you execute a unifrac.weighted, unifrac.unweighted. \n");
+               m->mothurOut("It also must be run before using the parsimony command, unless you are using the randomtree parameter.\n");
+               m->mothurOut("The read.tree command parameters are tree, group and name.\n");
+               m->mothurOut("The read.tree command should be in the following format: read.tree(tree=yourTreeFile, group=yourGroupFile).\n");
+               m->mothurOut("The tree and group parameters are both required, if no group file is given then one group is assumed.\n");
+               m->mothurOut("The name parameter allows you to enter a namefile.\n");
+               m->mothurOut("Note: No spaces between parameter labels (i.e. tree), '=' and parameters (i.e.yourTreefile).\n\n"); */
+       }
+       catch(exception& e) {
+               m->errorOut(e, "IndicatorCommand", "help");     
+               exit(1);
+       }
+}
+
+//**********************************************************************************************************************
+
+IndicatorCommand::~IndicatorCommand(){}
+
+//**********************************************************************************************************************
+
+int IndicatorCommand::execute(){
+       try {
+               
+               if (abort == true) { return 0; }
+       
+               /***************************************************/
+               // use smart distancing to get right sharedRabund  //
+               /***************************************************/
+               if (sharedfile != "") {  
+                       getShared(); 
+                       if (m->control_pressed) {  for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } return 0; }
+                       if (lookup[0] == NULL) { m->mothurOut("[ERROR] reading shared file."); m->mothurOutEndLine(); return 0; }
+               }else { 
+                       getSharedFloat(); 
+                       if (m->control_pressed) {  for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  } return 0; }
+                       if (lookupFloat[0] == NULL) { m->mothurOut("[ERROR] reading relabund file."); m->mothurOutEndLine(); return 0; }
+               }
+                       
+               /***************************************************/
+               //    reading tree info                                                    //
+               /***************************************************/
+               string groupfile = ""; 
+               Tree* tree = new Tree(treefile); delete tree;  //extracts names from tree to make faked out groupmap
+        
+               globaldata->setGroupFile(groupfile); 
+               treeMap = new TreeMap();
+               bool mismatch = false;
+               for (int i = 0; i < globaldata->Treenames.size(); i++) { 
+                       //sanity check - is this a group that is not in the sharedfile?
+                       if (!(m->inUsersGroups(globaldata->Treenames[i], globaldata->gGroupmap->namesOfGroups))) {
+                               m->mothurOut("[ERROR]: " + globaldata->Treenames[i] + " is not a group in your shared or relabund file."); m->mothurOutEndLine();
+                               mismatch = true;
+                       }
+                       treeMap->addSeq(globaldata->Treenames[i], "Group1"); 
+               }
+                               
+               if (mismatch) { //cleanup and exit
+                       if (sharedfile != "") {  for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } }
+                       else { for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  } }
+                       delete treeMap;
+                       return 0;
+               }
+                       
+               globaldata->gTreemap = treeMap;
+        
+               read = new ReadNewickTree(treefile);
+               int readOk = read->read(); 
+               
+               if (readOk != 0) { m->mothurOut("Read Terminated."); m->mothurOutEndLine(); globaldata->gTree.clear(); delete globaldata->gTreemap; delete read; return 0; }
+               
+               vector<Tree*> T = globaldata->gTree;
+               
+               delete read;
+               
+               if (m->control_pressed) {  
+                       if (sharedfile != "") {  for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } }
+                       else { for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  } }
+                       for (int i = 0; i < T.size(); i++) {  delete T[i];  } globaldata->gTree.clear(); delete globaldata->gTreemap; return 0; 
+               }
+                       
+               T[0]->assembleTree();
+                               
+               /***************************************************/
+               //    create ouptut tree - respecting pickedGroups //
+               /***************************************************/
+               Tree* outputTree = new Tree(globaldata->Groups.size()); 
+               
+               if (pickedGroups) {
+                       outputTree->getSubTree(T[0], globaldata->Groups);
+                       outputTree->assembleTree();
+               }else{
+                       outputTree->getCopy(T[0]);
+                       outputTree->assembleTree();
+               }
+               
+               //no longer need original tree, we have output tree to use and label
+               for (int i = 0; i < T.size(); i++) {  delete T[i];  } globaldata->gTree.clear();
+               
+               if (m->control_pressed) {  
+                       if (sharedfile != "") {  for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } }
+                       else { for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  } }
+                       delete outputTree; delete globaldata->gTreemap;  return 0; 
+               }
+               
+               /***************************************************/
+               //              get indicator species values                       //
+               /***************************************************/
+               GetIndicatorSpecies(outputTree);
+               
+               if (m->control_pressed) {  
+                       if (sharedfile != "") {  for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } }
+                       else { for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  } }
+                       for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); }
+                       delete outputTree; delete globaldata->gTreemap;  return 0; 
+               }
+               
+               m->mothurOutEndLine();
+               m->mothurOut("Output File Names: "); m->mothurOutEndLine();
+               for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
+               m->mothurOutEndLine();
+                               
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "IndicatorCommand", "execute");  
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+//traverse tree finding indicator species values for each otu at each node
+//label node with otu number that has highest indicator value
+//report all otu values to file
+int IndicatorCommand::GetIndicatorSpecies(Tree*& T){
+       try {
+               string thisOutputDir = outputDir;
+               if (outputDir == "") {  thisOutputDir += m->hasPath(inputFileName);  }
+               string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(inputFileName)) + "indicator.summary";
+               outputNames.push_back(outputFileName); outputTypes["summary"].push_back(outputFileName);
+               
+               ofstream out;
+               m->openOutputFile(outputFileName, out);
+               out << "Node\tOTU#\tIndVal" << endl;
+               
+               string treeOutputDir = outputDir;
+               if (outputDir == "") {  treeOutputDir += m->hasPath(treefile);  }
+               string outputTreeFileName = treeOutputDir + m->getRootName(m->getSimpleName(treefile)) + "indicator.tre";
+               
+               
+               //create a map from tree node index to names of descendants, save time later to know which sharedRabund you need
+               map<int, set<string> > nodeToDescendants;
+               map<int, set<int> > descendantNodes;
+               for (int i = 0; i < T->getNumNodes(); i++) {
+                       if (m->control_pressed) { return 0; }
+                       
+                       nodeToDescendants[i] = getDescendantList(T, i, nodeToDescendants, descendantNodes);
+               }
+               
+               //you need the distances to leaf to decide grouping below
+               //this will also set branch lengths if the tree does not include them
+               map<int, float> distToLeaf = getLengthToLeaf(T);
+                       
+               //for each node
+               for (int i = T->getNumLeaves(); i < T->getNumNodes(); i++) {
+                               
+                       if (m->control_pressed) { out.close(); return 0; }
+                       
+                       /*****************************************************/
+                       //create vectors containing rabund info                          //
+                       /*****************************************************/
+                       
+                       vector<float> indicatorValues; //size of numBins
+                       
+                       if (sharedfile != "") {
+                               vector< vector<SharedRAbundVector*> > groupings;
+                               
+                               /*groupings.resize(1);
+                               groupings[0].push_back(lookup[0]);
+                               groupings[0].push_back(lookup[1]);
+                               groupings[0].push_back(lookup[2]);
+                               groupings[0].push_back(lookup[3]);
+                               groupings[0].push_back(lookup[4]);*/
+                               
+                               //get nodes that will be a valid grouping
+                               //you are valid if you are not one of my descendants
+                               //AND your distToLeaf is <= mine
+                               //AND your distToLeaf is >= my smallest childs
+                               //AND you were not added as part of a larger grouping
+                               set<string> groupsAlreadyAdded;
+                               for (int j = (T->getNumNodes()-1); j >= 0; j--) {
+                                       if ((descendantNodes[i].count(j) == 0) && (distToLeaf[j] <= distToLeaf[i]) && ((distToLeaf[j] >= distToLeaf[T->tree[i].getLChild()]) || (distToLeaf[j] >= distToLeaf[T->tree[i].getRChild()]))) {
+                                               vector<SharedRAbundVector*> subset;
+                                               int count = 0;
+                                               int doneCount = nodeToDescendants[j].size();
+                                               for (int k = 0; k < lookup.size(); k++) {
+                                                       //is this descendant of j, and we didn't already add this as part of a larger grouping
+                                                       if ((nodeToDescendants[j].count(lookup[k]->getGroup()) != 0) && (groupsAlreadyAdded.count(lookup[k]->getGroup()) == 0)) { 
+                                                               subset.push_back(lookup[k]);
+                                                               groupsAlreadyAdded.insert(lookup[k]->getGroup());
+                                                               count++;
+                                                       }
+                                                       if (count == doneCount) { break; } //quit once you get the rabunds for this grouping
+                                               }
+                                               
+                                               //if subset.size == 0 then the node was added as part of a larger grouping
+                                               if (subset.size() != 0) { groupings.push_back(subset); }
+                                       }
+                               }
+                               
+                               if (groupsAlreadyAdded.size() != lookup.size()) { m->mothurOut("[ERROR]: could not make proper groupings."); m->mothurOutEndLine(); }
+                               
+                               indicatorValues = getValues(groupings);
+                               
+                       }else {
+                               vector< vector<SharedRAbundFloatVector*> > groupings;
+                               
+                               //get nodes that will be a valid grouping
+                               //you are valid if you are not one of my descendants
+                               //AND your distToLeaf is <= mine
+                               //AND your distToLeaf is >= my smallest childs
+                               //AND you were not added as part of a larger grouping
+                               set<string> groupsAlreadyAdded;
+                               for (int j = (T->getNumNodes()-1); j >= 0; j--) {
+                                       if ((descendantNodes[i].count(j) == 0) && (distToLeaf[j] <= distToLeaf[i]) && ((distToLeaf[j] >= distToLeaf[T->tree[i].getLChild()]) || (distToLeaf[j] >= distToLeaf[T->tree[i].getRChild()]))) {
+                                               vector<SharedRAbundFloatVector*> subset;
+                                               int count = 0;
+                                               int doneCount = nodeToDescendants[j].size();
+                                               for (int k = 0; k < lookupFloat.size(); k++) {
+                                                       //is this descendant of j, and we didn't already add this as part of a larger grouping
+                                                       if ((nodeToDescendants[j].count(lookupFloat[k]->getGroup()) != 0) && (groupsAlreadyAdded.count(lookupFloat[k]->getGroup()) == 0)) { 
+                                                               subset.push_back(lookupFloat[k]);
+                                                               groupsAlreadyAdded.insert(lookupFloat[k]->getGroup());
+                                                               count++;
+                                                       }
+                                                       if (count == doneCount) { break; } //quit once you get the rabunds for this grouping
+                                               }
+                                               
+                                               //if subset.size == 0 then the node was added as part of a larger grouping
+                                               if (subset.size() != 0) { groupings.push_back(subset); }
+                                       }
+                               }
+                               
+                               if (groupsAlreadyAdded.size() != lookupFloat.size()) { m->mothurOut("[ERROR]: could not make proper groupings."); m->mothurOutEndLine(); }
+                               
+                               indicatorValues = getValues(groupings);
+                       }
+                       
+                       if (m->control_pressed) { out.close(); return 0; }
+                       
+                       /******************************************************/
+                       //output indicator values to table form + label tree  //
+                       /*****************************************************/
+                       vector<int> indicatorOTUs;
+                       float largestValue = 0.0;
+                       for (int j = 0; j < indicatorValues.size(); j++) {
+                               
+                               if (m->control_pressed) { out.close(); return 0; }
+                               
+                               out << (i+1) << '\t' << (j+1) << '\t' << indicatorValues[j] << endl;
+                               
+                               //show no favortism
+                               if (indicatorValues[j] > largestValue) { 
+                                       largestValue = indicatorValues[j];
+                                       indicatorOTUs.clear();
+                                       indicatorOTUs.push_back(j+1);
+                               }else if (indicatorValues[j] == largestValue) { 
+                                       indicatorOTUs.push_back(j+1);
+                               }
+                               
+                               random_shuffle(indicatorOTUs.begin(), indicatorOTUs.end());
+                               
+                               T->tree[i].setLabel(indicatorOTUs[0]);
+                       }
+                       
+               }
+               out.close();
+       
+               ofstream outTree;
+               m->openOutputFile(outputTreeFileName, outTree);
+               outputNames.push_back(outputTreeFileName); outputTypes["tree"].push_back(outputTreeFileName);
+               
+               T->print(outTree, "both");
+               outTree.close();
+       
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "IndicatorCommand", "GetIndicatorSpecies");      
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+vector<float> IndicatorCommand::getValues(vector< vector<SharedRAbundFloatVector*> >& groupings){
+       try {
+               vector<float> values;
+               
+               //for each otu
+               for (int i = 0; i < groupings[0][0]->getNumBins(); i++) {
+                       
+                       if (m->control_pressed) { return values; }
+                       
+                       vector<float> terms; 
+                       float AijDenominator = 0.0;
+                       vector<float> Bij;
+                       //get overall abundance of each grouping
+                       for (int j = 0; j < groupings.size(); j++) {
+                               
+                               float totalAbund = 0;
+                               int numNotZero = 0;
+                               for (int k = 0; k < groupings[j].size(); k++) { 
+                                       totalAbund += groupings[j][k]->getAbundance(i); 
+                                       if (groupings[j][k]->getAbundance(i) != 0) { numNotZero++; }
+                               }
+                               
+                               float Aij = (totalAbund / (float) groupings[j].size());
+                               terms.push_back(Aij);
+                               
+                               //percentage of sites represented
+                               Bij.push_back(numNotZero / (float) groupings[j].size());
+                               
+                               AijDenominator += Aij;
+                       }
+                       
+                       float maxIndVal = 0.0;
+                       for (int j = 0; j < terms.size(); j++) { 
+                               float thisAij = (terms[j] / AijDenominator); 
+                               float thisValue = thisAij * Bij[j] * 100.0;
+                               
+                               //save largest
+                               if (thisValue > maxIndVal) { maxIndVal = thisValue; }
+                       }
+                       
+                       values.push_back(maxIndVal);
+               }
+               
+               return values;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "IndicatorCommand", "getValues");        
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+//same as above, just data type difference
+vector<float> IndicatorCommand::getValues(vector< vector<SharedRAbundVector*> >& groupings){
+       try {
+               vector<float> values;
+               
+       /*for (int j = 0; j < groupings.size(); j++) {          
+               cout << "grouping " << j << endl;
+               for (int k = 0; k < groupings[j].size(); k++) { 
+                       cout << groupings[j][k]->getGroup() << endl;
+               }
+       }*/
+               //for each otu
+               for (int i = 0; i < groupings[0][0]->getNumBins(); i++) {
+                       vector<float> terms; 
+                       float AijDenominator = 0.0;
+                       vector<float> Bij;
+                       //get overall abundance of each grouping
+                       for (int j = 0; j < groupings.size(); j++) {
+       
+                               int totalAbund = 0.0;
+                               int numNotZero = 0;
+                               for (int k = 0; k < groupings[j].size(); k++) { 
+                                       totalAbund += groupings[j][k]->getAbundance(i); 
+                                       if (groupings[j][k]->getAbundance(i) != 0.0) { numNotZero++; }
+                                       
+                               }
+                                       
+                               float Aij = (totalAbund / (float) groupings[j].size());
+                               terms.push_back(Aij);
+                               
+                               //percentage of sites represented
+                               Bij.push_back(numNotZero / (float) groupings[j].size());
+                               
+                               AijDenominator += Aij;
+                       }
+                       
+                       float maxIndVal = 0.0;
+                       for (int j = 0; j < terms.size(); j++) { 
+                               float thisAij = (terms[j] / AijDenominator); 
+                               float thisValue = thisAij * Bij[j] * 100.0;
+                                       
+                               //save largest
+                               if (thisValue > maxIndVal) { maxIndVal = thisValue; }
+                       }
+                       
+                       values.push_back(maxIndVal);
+               }
+               
+               return values;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "IndicatorCommand", "getValues");        
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+//you need the distances to leaf to decide groupings
+//this will also set branch lengths if the tree does not include them
+map<int, float> IndicatorCommand::getLengthToLeaf(Tree*& T){
+       try {
+               map<int, float> dists;
+               
+               for (int i = 0; i < T->getNumNodes(); i++) {
+                       
+                       int lc = T->tree[i].getLChild();
+                       int rc = T->tree[i].getRChild();
+                                
+                       //if you have no branch length, set it then calc
+                       if (T->tree[i].getBranchLength() <= 0.0) {
+                               
+                               if (lc == -1) { // you are a leaf
+                                       //if you are a leaf set you priliminary length to 1.0, this may adjust later
+                                       T->tree[i].setBranchLength(1.0);
+                                       dists[i] = 0.0;
+                               }else{ // you are an internal node
+                                       //look at your children's length to leaf
+                                       float ldist = dists[lc];
+                                       float rdist = dists[rc];
+                                       
+                                       float greater;
+                                       if (rdist > greater) { greater = rdist; }
+                                       else { greater = ldist; }
+                                       
+                                       //branch length = difference + 1
+                                       T->tree[lc].setBranchLength((abs(ldist-greater) + 1.0));
+                                       T->tree[rc].setBranchLength((abs(rdist-greater) + 1.0));
+                                       
+                                       dists[i] = dists[lc] + (abs(ldist-greater) + 1.0);
+                               }
+                               
+                       }else{
+                               if (lc == -1) { dists[i] = 0.0; }
+                               else { dists[i] = dists[lc] + T->tree[lc].getBranchLength(); }
+                       }
+                       
+               }
+               
+               return dists;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "IndicatorCommand", "getLengthToLeaf");  
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+set<string> IndicatorCommand::getDescendantList(Tree*& T, int i, map<int, set<string> > descendants, map<int, set<int> >& nodes){
+       try {
+               set<string> names;
+               
+               set<string>::iterator it;
+               
+               int lc = T->tree[i].getLChild();
+               int rc = T->tree[i].getRChild();
+               
+               if (lc == -1) { //you are a leaf your only descendant is yourself
+                       set<int> temp; temp.insert(i);
+                       names.insert(T->tree[i].getName());
+                       nodes[i] = temp;
+               }else{ //your descedants are the combination of your childrens descendants
+                       names = descendants[lc];
+                       nodes[i] = nodes[lc];
+                       for (it = descendants[rc].begin(); it != descendants[rc].end(); it++) {
+                               names.insert(*it);
+                       }
+                       for (set<int>::iterator itNum = nodes[rc].begin(); itNum != nodes[rc].end(); itNum++) {
+                               nodes[i].insert(*itNum);
+                       }
+               }
+               
+               return names;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "IndicatorCommand", "getDescendantList");        
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+int IndicatorCommand::getShared(){
+       try {
+               InputData* input = new InputData(sharedfile, "sharedfile");
+               lookup = input->getSharedRAbundVectors();
+               string lastLabel = lookup[0]->getLabel();
+               
+               //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
+               set<string> labels; labels.insert(label);
+               set<string> processedLabels;
+               set<string> userLabels = labels;
+               
+               //as long as you are not at the end of the file or done wih the lines you want
+               while((lookup[0] != NULL) && (userLabels.size() != 0)) {
+                       if (m->control_pressed) {  delete input; return 0;  }
+                       
+                       if(labels.count(lookup[0]->getLabel()) == 1){
+                               processedLabels.insert(lookup[0]->getLabel());
+                               userLabels.erase(lookup[0]->getLabel());
+                               break;
+                       }
+                       
+                       if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
+                               string saveLabel = lookup[0]->getLabel();
+                               
+                               for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
+                               lookup = input->getSharedRAbundVectors(lastLabel);
+                               
+                               processedLabels.insert(lookup[0]->getLabel());
+                               userLabels.erase(lookup[0]->getLabel());
+                               
+                               //restore real lastlabel to save below
+                               lookup[0]->setLabel(saveLabel);
+                               break;
+                       }
+                       
+                       lastLabel = lookup[0]->getLabel();                      
+                       
+                       //get next line to process
+                       //prevent memory leak
+                       for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
+                       lookup = input->getSharedRAbundVectors();
+               }
+               
+               
+               if (m->control_pressed) { delete input; return 0;  }
+               
+               //output error messages about any remaining user labels
+               set<string>::iterator it;
+               bool needToRun = false;
+               for (it = userLabels.begin(); it != userLabels.end(); it++) {  
+                       m->mothurOut("Your file does not include the label " + *it); 
+                       if (processedLabels.count(lastLabel) != 1) {
+                               m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
+                               needToRun = true;
+                       }else {
+                               m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
+                       }
+               }
+               
+               //run last label if you need to
+               if (needToRun == true)  {
+                       for (int i = 0; i < lookup.size(); i++) {  if (lookup[i] != NULL) {     delete lookup[i];       } } 
+                       lookup = input->getSharedRAbundVectors(lastLabel);
+               }       
+               
+               delete input;
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "IndicatorCommand", "getShared");        
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+int IndicatorCommand::getSharedFloat(){
+       try {
+               InputData* input = new InputData(relabundfile, "relabund");
+               lookupFloat = input->getSharedRAbundFloatVectors();
+               string lastLabel = lookupFloat[0]->getLabel();
+               
+               //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
+               set<string> labels; labels.insert(label);
+               set<string> processedLabels;
+               set<string> userLabels = labels;
+               
+               //as long as you are not at the end of the file or done wih the lines you want
+               while((lookupFloat[0] != NULL) && (userLabels.size() != 0)) {
+                       
+                       if (m->control_pressed) {  delete input; return 0;  }
+                       
+                       if(labels.count(lookupFloat[0]->getLabel()) == 1){
+                               processedLabels.insert(lookupFloat[0]->getLabel());
+                               userLabels.erase(lookupFloat[0]->getLabel());
+                               break;
+                       }
+                       
+                       if ((m->anyLabelsToProcess(lookupFloat[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
+                               string saveLabel = lookupFloat[0]->getLabel();
+                               
+                               for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  } 
+                               lookupFloat = input->getSharedRAbundFloatVectors(lastLabel);
+                               
+                               processedLabels.insert(lookupFloat[0]->getLabel());
+                               userLabels.erase(lookupFloat[0]->getLabel());
+                               
+                               //restore real lastlabel to save below
+                               lookupFloat[0]->setLabel(saveLabel);
+                               break;
+                       }
+                       
+                       lastLabel = lookupFloat[0]->getLabel();                 
+                       
+                       //get next line to process
+                       //prevent memory leak
+                       for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  } 
+                       lookupFloat = input->getSharedRAbundFloatVectors();
+               }
+               
+               
+               if (m->control_pressed) { delete input; return 0;  }
+               
+               //output error messages about any remaining user labels
+               set<string>::iterator it;
+               bool needToRun = false;
+               for (it = userLabels.begin(); it != userLabels.end(); it++) {  
+                       m->mothurOut("Your file does not include the label " + *it); 
+                       if (processedLabels.count(lastLabel) != 1) {
+                               m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
+                               needToRun = true;
+                       }else {
+                               m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
+                       }
+               }
+               
+               //run last label if you need to
+               if (needToRun == true)  {
+                       for (int i = 0; i < lookupFloat.size(); i++) {  if (lookupFloat[i] != NULL) {   delete lookupFloat[i];  } } 
+                       lookupFloat = input->getSharedRAbundFloatVectors(lastLabel);
+               }       
+               
+               delete input;
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "IndicatorCommand", "getShared");        
+       exit(1);
+       }
+}              
+/*****************************************************************/
+
+
diff --git a/indicatorcommand.h b/indicatorcommand.h
new file mode 100644 (file)
index 0000000..0d924fd
--- /dev/null
@@ -0,0 +1,56 @@
+#ifndef INDICATORCOMMAND_H
+#define INDICATORCOMMAND_H
+
+/*
+ *  indicatorcommand.h
+ *  Mothur
+ *
+ *  Created by westcott on 11/12/10.
+ *  Copyright 2010 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "command.hpp"
+#include "readtree.h"
+#include "treemap.h"
+#include "globaldata.hpp"
+#include "sharedrabundvector.h"
+#include "sharedrabundfloatvector.h"
+#include "inputdata.h"
+
+class IndicatorCommand : public Command {
+public:
+       IndicatorCommand(string);
+       IndicatorCommand();
+       ~IndicatorCommand();
+       vector<string> getRequiredParameters();
+       vector<string> getValidParameters();
+       vector<string> getRequiredFiles();
+       map<string, vector<string> > getOutputFiles() { return outputTypes; }
+       int execute();
+       void help();
+       
+private:
+       GlobalData* globaldata;
+       ReadTree* read;
+       TreeMap* treeMap;
+       string treefile, sharedfile, relabundfile, groups, label, inputFileName, outputDir;
+       bool abort, pickedGroups;
+       vector<string> outputNames, Groups;
+       map<string, vector<string> > outputTypes;
+       vector<SharedRAbundVector*> lookup;
+       vector<SharedRAbundFloatVector*> lookupFloat;
+       
+       int getShared();
+       int getSharedFloat();
+       int GetIndicatorSpecies(Tree*&);
+       set<string> getDescendantList(Tree*&, int, map<int, set<string> >, map<int, set<int> >&);
+       vector<float> getValues(vector< vector<SharedRAbundVector*> >&);
+       vector<float> getValues(vector< vector<SharedRAbundFloatVector*> >&);
+       map<int, float> getLengthToLeaf(Tree*&);
+       
+};
+
+
+#endif
+
index 4bb45b7f83d9c811a018f5c475fc557a9f65287f..9369a12280b35cd41a6a6e3548258f27ebb36b5c 100644 (file)
@@ -55,7 +55,6 @@ ListVector::ListVector(ifstream& f) : DataVector(), maxRank(0), numBins(0), numS
                        f >> inputData;
                        set(i, inputData);
                }
-               
                m->gobble(f);
        }
        catch(exception& e) {
index 583d4ad2298171fb5045ecc3fed59a520934a25b..1b8e0462007179c8dfd09966153ed4ebb54a21a3 100644 (file)
--- a/makefile
+++ b/makefile
@@ -66,7 +66,7 @@ ifeq  ($(strip $(USEMPI)),yes)
 endif
 
 # if you want to enable reading and writing of compressed files, set to yes.
-# The default is no.  this may only work on unix-like systems.
+# The default is no.  this may only work on unix-like systems, not for windows.
 
 USECOMPRESSION ?= no
 
index 81ffb00347de5f51795cf74e6c605e664cc80f99..a6424f5fa95086083d4c09658f7c9e2338d2381c 100644 (file)
@@ -186,7 +186,7 @@ int metastat_main (char* outputFileName, int numRows, int numCols, double thresh
        
        int *nr, *nc, *ldtabl, *work;
        int nrow=2, ncol=2, ldtable=2;
-       int workspace = 2*(row*col*sizeof(double *)); 
+       int workspace = 6*(row*col*sizeof(double *)); 
        double *expect, *prc, *emin,*prt,*pre;
        double e=0, prc1=0, emin1=0, prt1=0, pre1=0;
          
diff --git a/mothur b/mothur
index b122ea07aece410563672cbc0d00719fa614c608..bb3b522fd6de1b23408e4a593490bca2db0337fc 100755 (executable)
Binary files a/mothur and b/mothur differ
index 337e5b33a6b1d8100effe484c230d42c7fd93f58..2e2299ec2ad6fa1938a2f49e1f8601993b232cbd 100644 (file)
--- a/mothur.h
+++ b/mothur.h
@@ -56,6 +56,7 @@
        #include <sys/wait.h>
        #include <sys/time.h>
        #include <sys/resource.h>
+       #include <sys/stat.h>
        #include <unistd.h>
        
        #ifdef USE_READLINE
index b1d818850bd974ff482bc8c88ffc4bdc0a3dc7bd..b30ee3ca630c91fea01343c9d873e709e027a4fb 100644 (file)
@@ -9,6 +9,7 @@
 
 #include "mothurout.h"
 
+
 /******************************************************/
 MothurOut* MothurOut::getInstance() {
        if( _uniqueInstance == 0) {
@@ -359,18 +360,21 @@ string MothurOut::getline(ifstream& fileHandle) {
 }
 /***********************************************************************/
 
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
 #ifdef USE_COMPRESSION
 inline bool endsWith(string s, const char * suffix){
   size_t suffixLength = strlen(suffix);
   return s.size() >= suffixLength && s.substr(s.size() - suffixLength, suffixLength).compare(suffix) == 0;
 }
 #endif
+#endif
 
 string MothurOut::getRootName(string longName){
        try {
        
                string rootName = longName;
 
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
 #ifdef USE_COMPRESSION
     if (endsWith(rootName, ".gz") || endsWith(rootName, ".bz2")) {
       int pos = rootName.find_last_of('.');
@@ -378,7 +382,7 @@ string MothurOut::getRootName(string longName){
       cerr << "shortening " << longName << " to " << rootName << "\n";
     }
 #endif
-
+#endif
                if(rootName.find_last_of(".") != rootName.npos){
                        int pos = rootName.find_last_of('.')+1;
                        rootName = rootName.substr(0, pos);
@@ -632,7 +636,7 @@ int MothurOut::openInputFile(string fileName, ifstream& fileHandle, string m){
        try {
                        //get full path name
                        string completeFileName = getFullPathName(fileName);
-
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
 #ifdef USE_COMPRESSION
       // check for gzipped or bzipped file
       if (endsWith(completeFileName, ".gz") || endsWith(completeFileName, ".bz2")) {
@@ -655,7 +659,7 @@ int MothurOut::openInputFile(string fileName, ifstream& fileHandle, string m){
         }
       }
 #endif
-
+#endif
                        fileHandle.open(completeFileName.c_str());
                        if(!fileHandle) {
                                //mothurOut("[ERROR]: Could not open " + completeFileName); mothurOutEndLine();
@@ -678,7 +682,7 @@ int MothurOut::openInputFile(string fileName, ifstream& fileHandle){
 
                //get full path name
                string completeFileName = getFullPathName(fileName);
-
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
 #ifdef USE_COMPRESSION
   // check for gzipped or bzipped file
   if (endsWith(completeFileName, ".gz") || endsWith(completeFileName, ".bz2")) {
@@ -701,7 +705,7 @@ int MothurOut::openInputFile(string fileName, ifstream& fileHandle){
     }
   }
 #endif
-
+#endif
 
                fileHandle.open(completeFileName.c_str());
                if(!fileHandle) {
@@ -756,7 +760,7 @@ int MothurOut::openOutputFile(string fileName, ofstream& fileHandle){
        try { 
        
                string completeFileName = getFullPathName(fileName);
-
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
 #ifdef USE_COMPRESSION
     // check for gzipped file
     if (endsWith(completeFileName, ".gz") || endsWith(completeFileName, ".bz2")) {
@@ -776,7 +780,7 @@ int MothurOut::openOutputFile(string fileName, ofstream& fileHandle){
       }
     }
 #endif
-
+#endif
                fileHandle.open(completeFileName.c_str(), ios::trunc);
                if(!fileHandle) {
                        mothurOut("[ERROR]: Could not open " + completeFileName); mothurOutEndLine();
index 1ea9c39af69e703f41b0aa7c9e5194eeeb0efcf0..1a3de32088d3bf1351b1005c654ac612fcd27732 100644 (file)
@@ -181,7 +181,11 @@ RemoveLineageCommand::RemoveLineageCommand(string option)  {
                        else if (taxfile == "not found") {  taxfile = ""; m->mothurOut("The taxonomy parameter is required for the get.lineage command."); m->mothurOutEndLine();  abort = true; }
                        
                        string usedDups = "true";
-                       string temp = validParameter.validFile(parameters, "dups", false);      if (temp == "not found") { temp = "false"; usedDups = ""; }
+                       string temp = validParameter.validFile(parameters, "dups", false);      
+                       if (temp == "not found") { 
+                               if (namefile != "") {  temp = "true";                                   }
+                               else                            {  temp = "false"; usedDups = "";       }
+                       }
                        dups = m->isTrue(temp);
                        
                        taxons = validParameter.validFile(parameters, "taxon", false);  
index f8bb76fd664383f148c73eca62aa5cc2291202c8..2a22cf3b952ff217c9e57c5ce98bbb01c5cbacb0 100644 (file)
--- a/tree.cpp
+++ b/tree.cpp
@@ -9,6 +9,22 @@
 
 #include "tree.h"
 
+/*****************************************************************/
+Tree::Tree(int num) {
+       try {
+               globaldata = GlobalData::getInstance();
+               m = MothurOut::getInstance();
+               
+               numLeaves = num;  
+               numNodes = 2*numLeaves - 1;
+               
+               tree.resize(numNodes);
+       }
+       catch(exception& e) {
+               m->errorOut(e, "Tree", "Tree - numNodes");
+               exit(1);
+       }
+}
 /*****************************************************************/
 Tree::Tree(string g) {
        try {
@@ -129,7 +145,7 @@ void Tree::addNamesToCounts() {
                                                itCounts = tree[i].pGroups.find(group);
                                                if (itCounts == tree[i].pGroups.end()) { //new group, add it
                                                        tree[i].pGroups[group] = 1;
-                                               }else {
+                                               }else{
                                                        tree[i].pGroups[group]++;
                                                }
                                                
@@ -243,7 +259,180 @@ int Tree::assembleTree(string n) {
                exit(1);
        }
 }
+/*****************************************************************/
+void Tree::getSubTree(Tree* copy, vector<string> Groups) {
+       try {
+                       
+               //we want to select some of the leaf nodes to create the output tree
+               //go through the input Tree starting at parents of leaves
+               for (int i = 0; i < numNodes; i++) {
+                       
+                       //initialize leaf nodes
+                       if (i <= (numLeaves-1)) {
+                               tree[i].setName(Groups[i]);
+                                       
+                               //save group info
+                               string group = globaldata->gTreemap->getGroup(Groups[i]);
+                               vector<string> tempGroups; tempGroups.push_back(group);
+                               tree[i].setGroup(tempGroups);
+                               groupNodeInfo[group].push_back(i); 
+                               
+                               //set pcount and pGroup for groupname to 1.
+                               tree[i].pcount[group] = 1;
+                               tree[i].pGroups[group] = 1;
+                               
+                               //Treemap knows name, group and index to speed up search
+                               globaldata->gTreemap->setIndex(Groups[i], i);
+                               
+                               //intialize non leaf nodes
+                       }else if (i > (numLeaves-1)) {
+                               tree[i].setName("");
+                               vector<string> tempGroups;
+                               tree[i].setGroup(tempGroups);
+                       }
+               }
+               
+               set<int> removedLeaves;
+               for (int i = 0; i < copy->getNumLeaves(); i++) {
+                       
+                       if (removedLeaves.count(i) == 0) {
+                       
+                       //am I in the group
+                       int parent = copy->tree[i].getParent();
+                       
+                       if (parent != -1) {
+                               if (m->inUsersGroups(copy->tree[i].getName(), Groups)) {
+                                       //find my siblings name
+                                       int parentRC = copy->tree[parent].getRChild();
+                                       int parentLC = copy->tree[parent].getLChild();
+                                       
+                                       //if I am the right child, then my sib is the left child
+                                       int sibIndex = parentRC;
+                                       if (parentRC == i) { sibIndex = parentLC; }
+                                       
+                                       string sibsName = copy->tree[sibIndex].getName();
+                                       
+                                       //if yes, is my sibling
+                                       if ((m->inUsersGroups(sibsName, Groups)) || (sibsName == "")) {
+                                               //we both are okay no trimming required
+                                       }else{
+                                               //i am, my sib is not, so remove sib by setting my parent to my grandparent
+                                               int grandparent = copy->tree[parent].getParent();
+                                               int grandparentLC = copy->tree[grandparent].getLChild();
+                                               int grandparentRC = copy->tree[grandparent].getRChild();
+                                               
+                                               //whichever of my granparents children was my parent now equals me
+                                               if (grandparentLC == parent) { grandparentLC = i; }
+                                               else { grandparentRC = i; }
+                                               
+                                               copy->tree[i].setParent(grandparent);
+                                               copy->tree[i].setBranchLength((copy->tree[i].getBranchLength()+copy->tree[parent].getBranchLength()));
+                                               copy->tree[grandparent].setChildren(grandparentLC, grandparentRC);
+                                               removedLeaves.insert(sibIndex);
+                                       }
+                               }else{
+                                       //find my siblings name
+                                       int parentRC = copy->tree[parent].getRChild();
+                                       int parentLC = copy->tree[parent].getLChild();
+                                       
+                                       //if I am the right child, then my sib is the left child
+                                       int sibIndex = parentRC;
+                                       if (parentRC == i) { sibIndex = parentLC; }
+                                       
+                                       string sibsName = copy->tree[sibIndex].getName();
+                                       
+                                       //if no is my sibling
+                                       if ((m->inUsersGroups(sibsName, Groups)) || (sibsName == "")) {
+                                               //i am not, but my sib is
+                                               int grandparent = copy->tree[parent].getParent();
+                                               int grandparentLC = copy->tree[grandparent].getLChild();
+                                               int grandparentRC = copy->tree[grandparent].getRChild();
+                                               
+                                               //whichever of my granparents children was my parent now equals my sib
+                                               if (grandparentLC == parent) { grandparentLC = sibIndex; }
+                                               else { grandparentRC = sibIndex; }
+                                               
+                                               copy->tree[sibIndex].setParent(grandparent);
+                                               copy->tree[sibIndex].setBranchLength((copy->tree[sibIndex].getBranchLength()+copy->tree[parent].getBranchLength()));
+                                               copy->tree[grandparent].setChildren(grandparentLC, grandparentRC);
+                                               removedLeaves.insert(i);
+                                       }else{
+                                               //neither of us are, so we want to eliminate ourselves and our parent
+                                               //so set our parents sib to our great-grandparent
+                                               int parent = copy->tree[i].getParent();
+                                               int grandparent = copy->tree[parent].getParent();
+                                               
+                                               if (grandparent != -1) {
+                                                       int greatgrandparent = copy->tree[grandparent].getParent();
+                                                       int greatgrandparentLC = copy->tree[greatgrandparent].getLChild();
+                                                       int greatgrandparentRC = copy->tree[greatgrandparent].getRChild();
+                                                       
+                                                       int grandparentLC = copy->tree[grandparent].getLChild();
+                                                       int grandparentRC = copy->tree[grandparent].getRChild();
+                                                       
+                                                       int parentsSibIndex = grandparentLC;
+                                                       if (grandparentRC == parent) { parentsSibIndex = grandparentLC; }
 
+                                                       //whichever of my greatgrandparents children was my grandparent
+                                                       if (greatgrandparentLC == grandparent) { greatgrandparentLC = parentsSibIndex; }
+                                                       else { greatgrandparentRC = parentsSibIndex; }
+                                                       
+                                                       copy->tree[parentsSibIndex].setParent(greatgrandparent);
+                                                       copy->tree[parentsSibIndex].setBranchLength((copy->tree[parentsSibIndex].getBranchLength()+copy->tree[grandparent].getBranchLength()));
+                                                       copy->tree[greatgrandparent].setChildren(greatgrandparentLC, greatgrandparentRC);
+                                               }else{
+                                                       copy->tree[parent].setChildren(-1, -1);
+                                                       cout << "issues with making subtree" << endl;
+                                               }
+                                               removedLeaves.insert(sibIndex);
+                                               removedLeaves.insert(i);
+                                       }
+                               }
+                       }
+                       }
+               }
+               
+               int root = 0;
+               for (int i = 0; i < copy->getNumNodes(); i++) {
+                       //you found the root
+                       if (copy->tree[i].getParent() == -1) { root = i; break; }
+               }
+               
+               int nextSpot = numLeaves;
+               populateNewTree(copy->tree, root, nextSpot);
+               
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "Tree", "getCopy");
+               exit(1);
+       }
+}
+/*****************************************************************/
+int Tree::populateNewTree(vector<Node>& oldtree, int node, int& index) {
+       try {
+               
+               if (oldtree[node].getLChild() != -1) {
+                       int rc = populateNewTree(oldtree, oldtree[node].getLChild(), index);
+                       int lc = populateNewTree(oldtree, oldtree[node].getRChild(), index);
+                       
+                       tree[index].setChildren(lc, rc);
+                       index++;
+                       
+                       return (index-1);
+               }else { //you are a leaf
+                       int indexInNewTree = globaldata->gTreemap->getIndex(oldtree[node].getName());
+                       
+                       tree[indexInNewTree].setParent(index);
+                       return indexInNewTree;
+                       
+               }
+       }
+       catch(exception& e) {
+               m->errorOut(e, "Tree", "populateNewTree");
+               exit(1);
+       }
+}
 /*****************************************************************/
 void Tree::getCopy(Tree* copy) {
        try {
@@ -592,18 +781,17 @@ void Tree::print(ostream& out) {
        }
 }
 /*****************************************************************/
-void Tree::printForBoot(ostream& out) {
+void Tree::print(ostream& out, string mode) {
        try {
                int root = findRoot();
-               printBranch(root, out, "boot");
+               printBranch(root, out, mode);
                out << ";" << endl;
        }
        catch(exception& e) {
-               m->errorOut(e, "Tree", "printForBoot");
+               m->errorOut(e, "Tree", "print");
                exit(1);
        }
 }
-
 /*****************************************************************/
 // This prints out the tree in Newick form.
 void Tree::createNewickFile(string f) {
@@ -644,12 +832,11 @@ int Tree::findRoot() {
                exit(1);
        }
 }
-
 /*****************************************************************/
 void Tree::printBranch(int node, ostream& out, string mode) {
-       try {
-               
-               // you are not a leaf
+try {
+
+// you are not a leaf
                if (tree[node].getLChild() != -1) {
                        out << "(";
                        printBranch(tree[node].getLChild(), out, mode);
@@ -666,21 +853,39 @@ void Tree::printBranch(int node, ostream& out, string mode) {
                                if (tree[node].getLabel() != -1) {
                                        out << tree[node].getLabel();
                                }
+                       }else if (mode == "both") {
+                               if (tree[node].getLabel() != -1) {
+                                       out << tree[node].getLabel();
+                               }
+                               //if there is a branch length then print it
+                               if (tree[node].getBranchLength() != -1) {
+                                       out << ":" << tree[node].getBranchLength();
+                               }
                        }
                }else { //you are a leaf
                        string leafGroup = globaldata->gTreemap->getGroup(tree[node].getName());
                        
-                       out << leafGroup; 
                        if (mode == "branch") {
+                               out << leafGroup; 
                                //if there is a branch length then print it
                                if (tree[node].getBranchLength() != -1) {
                                        out << ":" << tree[node].getBranchLength();
                                }
                        }else if (mode == "boot") {
+                               out << leafGroup; 
                                //if there is a label then print it
                                if (tree[node].getLabel() != -1) {
                                        out << tree[node].getLabel();
                                }
+                       }else if (mode == "both") {
+                               out << tree[node].getName();
+                               if (tree[node].getLabel() != -1) {
+                                       out << tree[node].getLabel();
+                               }
+                               //if there is a branch length then print it
+                               if (tree[node].getBranchLength() != -1) {
+                                       out << ":" << tree[node].getBranchLength();
+                               }
                        }
                }
                
@@ -690,7 +895,7 @@ void Tree::printBranch(int node, ostream& out, string mode) {
                exit(1);
        }
 }
-
+                                                                         
 /*****************************************************************/
 
 void Tree::printTree() {
diff --git a/tree.h b/tree.h
index 1c44d8c919805c24072e0f6ab314789393ed63ce..102d56546c24d427616b0388fcf80b869312b694 100644 (file)
--- a/tree.h
+++ b/tree.h
@@ -19,10 +19,12 @@ class GlobalData;
 class Tree {
 public: 
        Tree(string); 
+       Tree(int); 
        Tree();         //to generate a tree from a file
        ~Tree();
        
        void getCopy(Tree*);  //makes tree a copy of the one passed in.
+       void getSubTree(Tree*, vector<string>);  //makes tree a that contains only the names passed in.
        void assembleRandomTree();
        void assembleRandomUnifracTree(vector<string>);
        void assembleRandomUnifracTree(string, string);
@@ -34,7 +36,7 @@ public:
        map<string, int> mergeUserGroups(int, vector<string>);  //returns a map with a groupname and the number of times that group was seen in the children
        void printTree();
        void print(ostream&);
-       void printForBoot(ostream&);
+       void print(ostream&, string);
        int findRoot();  //return index of root node
        
        //this function takes the leaf info and populates the non leaf nodes
@@ -65,6 +67,7 @@ private:
                                                        //not included in the tree. 
                                                        //only takes names from the first tree in the tree file and assumes that all trees use the same names.
        int readTreeString(ifstream&);
+       int populateNewTree(vector<Node>&, int, int&);
                
        MothurOut* m;