]> git.donarmstrong.com Git - mothur.git/commitdiff
added phylo.diversity command. added hard parameter to cluster, hcluster and read...
authorwestcott <westcott>
Tue, 4 May 2010 12:41:07 +0000 (12:41 +0000)
committerwestcott <westcott>
Tue, 4 May 2010 12:41:07 +0000 (12:41 +0000)
16 files changed:
Mothur.xcodeproj/project.pbxproj
clustercommand.cpp
clustercommand.h
commandfactory.cpp
hclustercommand.cpp
hclustercommand.h
makefile
phylodiversity.cpp [new file with mode: 0644]
phylodiversity.h [new file with mode: 0644]
phylodiversitycommand.cpp [new file with mode: 0644]
phylodiversitycommand.h [new file with mode: 0644]
readdistcommand.cpp
readdistcommand.h
readtreecommand.cpp
removeseqscommand.cpp
tree.cpp

index ce6918c8449ed7b857386c30c75a5537e74767cd..2084dc5fffb240afa30d8f790448006fc0b0d814 100644 (file)
@@ -7,6 +7,10 @@
        objects = {
 
 /* Begin PBXFileReference section */
+               A72B3A62118B37FD004B9F8D /* phylodiversitycommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = phylodiversitycommand.h; sourceTree = "<group>"; };
+               A72B3A63118B37FD004B9F8D /* phylodiversitycommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = phylodiversitycommand.cpp; sourceTree = "<group>"; };
+               A72B3A7B118B4D1B004B9F8D /* phylodiversity.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = phylodiversity.h; sourceTree = "<group>"; };
+               A72B3A7C118B4D1B004B9F8D /* phylodiversity.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = phylodiversity.cpp; sourceTree = "<group>"; };
                A747E79B1163442A00FB9042 /* chimeracheckcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = chimeracheckcommand.h; sourceTree = "<group>"; };
                A747E79C1163442A00FB9042 /* chimeracheckcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = chimeracheckcommand.cpp; sourceTree = "<group>"; };
                A747E81C116365E000FB9042 /* chimeraslayercommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = chimeraslayercommand.h; sourceTree = "<group>"; };
                                A7DA20B0113FECD400BF472F /* nseqs.h */,
                                A7DA20B2113FECD400BF472F /* onegapdist.h */,
                                A7DA20B3113FECD400BF472F /* onegapignore.h */,
+                               A72B3A7B118B4D1B004B9F8D /* phylodiversity.h */,
+                               A72B3A7C118B4D1B004B9F8D /* phylodiversity.cpp */,
                                A7DA20BE113FECD400BF472F /* parsimony.cpp */,
                                A7DA20BF113FECD400BF472F /* parsimony.h */,
                                A7DA20CE113FECD400BF472F /* qstat.cpp */,
                                A7E8338C115BBDAA00739EC4 /* parsesffcommand.h */,
                                A7DA20C0113FECD400BF472F /* parsimonycommand.cpp */,
                                A7DA20C1113FECD400BF472F /* parsimonycommand.h */,
-                               A7DA20C2113FECD400BF472F /* pcacommand.cpp */,
                                A7DA20C3113FECD400BF472F /* pcacommand.h */,
-                               A7DA20C6113FECD400BF472F /* phylotypecommand.cpp */,
+                               A7DA20C2113FECD400BF472F /* pcacommand.cpp */,
+                               A72B3A62118B37FD004B9F8D /* phylodiversitycommand.h */,
+                               A72B3A63118B37FD004B9F8D /* phylodiversitycommand.cpp */,
                                A7DA20C7113FECD400BF472F /* phylotypecommand.h */,
-                               A7DA20CA113FECD400BF472F /* preclustercommand.cpp */,
+                               A7DA20C6113FECD400BF472F /* phylotypecommand.cpp */,
                                A7DA20CB113FECD400BF472F /* preclustercommand.h */,
-                               A7DA20D0113FECD400BF472F /* quitcommand.cpp */,
+                               A7DA20CA113FECD400BF472F /* preclustercommand.cpp */,
                                A7DA20D1113FECD400BF472F /* quitcommand.h */,
-                               A7DA20DA113FECD400BF472F /* rarefactcommand.cpp */,
+                               A7DA20D0113FECD400BF472F /* quitcommand.cpp */,
                                A7DA20DB113FECD400BF472F /* rarefactcommand.h */,
-                               A7DA20DD113FECD400BF472F /* rarefactsharedcommand.cpp */,
+                               A7DA20DA113FECD400BF472F /* rarefactcommand.cpp */,
                                A7DA20DE113FECD400BF472F /* rarefactsharedcommand.h */,
-                               A7DA20E5113FECD400BF472F /* readdistcommand.cpp */,
+                               A7DA20DD113FECD400BF472F /* rarefactsharedcommand.cpp */,
                                A7DA20E6113FECD400BF472F /* readdistcommand.h */,
-                               A7DA20EA113FECD400BF472F /* readotucommand.cpp */,
+                               A7DA20E5113FECD400BF472F /* readdistcommand.cpp */,
                                A7DA20EB113FECD400BF472F /* readotucommand.h */,
-                               A7DA20F0113FECD400BF472F /* readtreecommand.cpp */,
+                               A7DA20EA113FECD400BF472F /* readotucommand.cpp */,
                                A7DA20F1113FECD400BF472F /* readtreecommand.h */,
-                               A7DA20F2113FECD400BF472F /* removeseqscommand.cpp */,
+                               A7DA20F0113FECD400BF472F /* readtreecommand.cpp */,
                                A7DA20F3113FECD400BF472F /* removeseqscommand.h */,
+                               A7DA20F2113FECD400BF472F /* removeseqscommand.cpp */,
                                A7DA20F5113FECD400BF472F /* reversecommand.h */,
                                A7DA20F4113FECD400BF472F /* reversecommand.cpp */,
                                A7DA20F9113FECD400BF472F /* screenseqscommand.h */,
index 6f6e309331df3caf27dc5d2d0c6d4e19ab246590..9ed67e04433edecd93262c1824732edbb3cd186c 100644 (file)
@@ -22,7 +22,7 @@ ClusterCommand::ClusterCommand(string option)  {
                
                else {
                        //valid paramters for this command
-                       string Array[] =  {"cutoff","precision","method","showabund","timing","outputdir","inputdir"};
+                       string Array[] =  {"cutoff","precision","method","showabund","timing","hard","outputdir","inputdir"};
                        vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
                        
                        OptionParser parser(option);
@@ -56,10 +56,13 @@ ClusterCommand::ClusterCommand(string option)  {
                        length = temp.length();
                        convert(temp, precision); 
                        
+                       temp = validParameter.validFile(parameters, "hard", false);                     if (temp == "not found") { temp = "F"; }
+                       hard = isTrue(temp);
+                       
                        temp = validParameter.validFile(parameters, "cutoff", false);
                        if (temp == "not found") { temp = "10"; }
                        convert(temp, cutoff); 
-                       cutoff += (5 / (precision * 10.0));
+                       if (!hard) {    cutoff += (5 / (precision * 10.0));  }
                        
                        method = validParameter.validFile(parameters, "method", false);
                        if (method == "not found") { method = "furthest"; }
@@ -114,7 +117,7 @@ ClusterCommand::ClusterCommand(string option)  {
 void ClusterCommand::help(){
        try {
                m->mothurOut("The cluster command can only be executed after a successful read.dist command.\n");
-               m->mothurOut("The cluster command parameter options are method, cuttoff, precision, showabund and timing. No parameters are required.\n");
+               m->mothurOut("The cluster command parameter options are method, cuttoff, hard, precision, showabund and timing. No parameters are required.\n");
                m->mothurOut("The cluster command should be in the following format: \n");
                m->mothurOut("cluster(method=yourMethod, cutoff=yourCutoff, precision=yourPrecision) \n");
                m->mothurOut("The acceptable cluster methods are furthest, nearest and average.  If no method is provided then furthest is assumed.\n\n");      
index 5c57b69d1bd5dc0495925cb452df908ff5cb5b81..c8e01749de20baac9fe523e9cbf0b5f158b35941 100644 (file)
@@ -43,7 +43,7 @@ private:
        RAbundVector oldRAbund;
        ListVector oldList;
 
-       bool abort;
+       bool abort, hard;
 
        string method, fileroot, tag, outputDir;
        double cutoff;
index 32d42113d60a997b2404053bf0de39c68a28c71a..30b76f813f6710ff9c2cda0819305ca8837f7bf5 100644 (file)
@@ -72,6 +72,7 @@
 #include "chimerapintailcommand.h"
 #include "chimerabellerophoncommand.h"
 #include "setlogfilecommand.h"
+#include "phylodiversitycommand.h"
 
 /*******************************************************/
 
@@ -151,6 +152,7 @@ CommandFactory::CommandFactory(){
        commands["parse.list"]                  = "parse.list";
        commands["parse.sff"]                   = "parse.sff";
        commands["set.logfile"]                 = "set.logfile";
+       commands["phylo.diversity"]             = "phylo.diversity";
        commands["classify.seqs"]               = "MPIEnabled"; 
        commands["dist.seqs"]                   = "MPIEnabled";
        commands["filter.seqs"]                 = "MPIEnabled";
@@ -267,6 +269,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString){
                else if(commandName == "set.logfile")                   {       command = new SetLogFileCommand(optionString);                          }
                else if(commandName == "parse.list")                    {       command = new ParseListCommand(optionString);                           }
                else if(commandName == "parse.sff")                             {       command = new ParseSFFCommand(optionString);                            }
+               else if(commandName == "phylo.diversity")               {       command = new PhyloDiversityCommand(optionString);                      }
                else                                                                                    {       command = new NoCommand(optionString);                                          }
 
                return command;
index f98190873eb7219b86f3da445de3fbf9da8ff848..89c6dbd4d27d61ade30c0668a2376281298bd37d 100644 (file)
@@ -21,7 +21,7 @@ HClusterCommand::HClusterCommand(string option)  {
                
                else {
                        //valid paramters for this command
-                       string Array[] =  {"cutoff","precision","method","phylip","column","name","sorted","showabund","timing","outputdir","inputdir"};
+                       string Array[] =  {"cutoff","hard","precision","method","phylip","column","name","sorted","showabund","timing","outputdir","inputdir"};
                        vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
                        
                        OptionParser parser(option);
@@ -104,10 +104,13 @@ HClusterCommand::HClusterCommand(string option)  {
                        length = temp.length();
                        convert(temp, precision); 
                        
+                       temp = validParameter.validFile(parameters, "hard", false);                     if (temp == "not found") { temp = "F"; }
+                       hard = isTrue(temp);
+                       
                        temp = validParameter.validFile(parameters, "cutoff", false);
                        if (temp == "not found") { temp = "10"; }
                        convert(temp, cutoff); 
-                       cutoff += (5 / (precision * 10.0));
+                       if (!hard) {  cutoff += (5 / (precision * 10.0));  }
                        
                        method = validParameter.validFile(parameters, "method", false);
                        if (method == "not found") { method = "furthest"; }
index 8bbc655fd7c30a9708b8292a81de0bb6a7b6c478..1aae49d9b341a6ce2652ed4bd3e340bf2a0a9040 100644 (file)
@@ -46,7 +46,7 @@ private:
        ListVector oldList;
        ReadCluster* read;
        
-       bool abort, sorted, print_start;
+       bool abort, sorted, print_start, hard;
        string method, fileroot, tag, distfile, format, phylipfile, columnfile, namefile, sort, showabund, timing, outputDir;
        double cutoff;
        int precision, length;
index b7bb8d04798f870be5f84d925323b9a79be08ca4..a868ec524c0366e2e942c4a57379b4248a619569 100644 (file)
--- a/makefile
+++ b/makefile
@@ -219,7 +219,9 @@ mothur : \
                ./classifyseqscommand.o\\r
                ./parsesffcommand.o\\r
                ./classify.o\\r
-               ./phylotree.o\\r
+               ./phylotree.o\
+               ./phylodiversity.o\
+               ./phylodiversitycommand.o\\r
                ./bayesian.o\
                ./phylosummary.o\\r
                ./alignmentdb.o\\r
@@ -418,7 +420,9 @@ mothur : \
                ./classifyseqscommand.o\\r
                ./parsesffcommand.o\\r
                ./classify.o\\r
-               ./phylotree.o\\r
+               ./phylotree.o\
+               ./phylodiversity.o\
+               ./phylodiversitycommand.o\\r
                ./bayesian.o\
                ./phylosummary.o\\r
                ./alignmentdb.o\\r
@@ -620,7 +624,9 @@ clean :
                ./classifyseqscommand.o\\r
                ./parsesffcommand.o\\r
                ./classify.o\\r
-               ./phylotree.o\\r
+               ./phylotree.o\
+               ./phylodiversity.o\
+               ./phylodiversitycommand.o\\r
                ./bayesian.o\
                ./phylosummary.o\\r
                ./alignmentdb.o\\r
@@ -1635,6 +1641,14 @@ install : mothur
 ./setlogfilecommand.o : setlogfilecommand.cpp\r
        $(CC) $(CC_OPTIONS) setlogfilecommand.cpp -c $(INCLUDE) -o ./setlogfilecommand.o\r
 \r
+# Item # 199 -- phylodiversity --\r
+./phylodiversity.o : phylodiversity.cpp\r
+       $(CC) $(CC_OPTIONS) phylodiversity.cpp -c $(INCLUDE) -o ./phylodiversity.o\r
+\r
+# Item # 200 -- phylodiversitycommand --\r
+./phylodiversitycommand.o : phylodiversitycommand.cpp\r
+       $(CC) $(CC_OPTIONS) phylodiversitycommand.cpp -c $(INCLUDE) -o ./phylodiversitycommand.o\r
+\r
 \r
 \r
 ##### END RUN ####\r
diff --git a/phylodiversity.cpp b/phylodiversity.cpp
new file mode 100644 (file)
index 0000000..96d49d4
--- /dev/null
@@ -0,0 +1,141 @@
+/*
+ *  phylodiversity.cpp
+ *  Mothur
+ *
+ *  Created by westcott on 4/30/10.
+ *  Copyright 2010 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "phylodiversity.h"
+
+/**************************************************************************************************/
+EstOutput PhyloDiversity::getValues(Tree* t, vector<int> treeNodes) {
+    try {
+               
+               map<string, float> DScore;
+               float totalLength = 0.0;
+               data.clear();
+               
+               //initialize Dscore
+               for (int i=0; i<globaldata->Groups.size(); i++) {               DScore[globaldata->Groups[i]] = 0.0;    }
+       
+               /********************************************************/
+               //calculate a D value for each group 
+               for(int v=0;v<treeNodes.size();v++){
+                               
+                       if (m->control_pressed) { return data; }
+                       
+                       //is this node from a sequence which is in one of the users groups
+                       if (inUsersGroups(t->tree[treeNodes[v]].getGroup(), globaldata->Groups) == true) {
+                                       
+                               //calc the branch length
+                               //while you aren't at root
+                               float sum = 0.0;
+                               int index = treeNodes[v];
+
+                               while(t->tree[index].getParent() != -1){
+                                       
+                                       //if you have a BL
+                                       if(t->tree[index].getBranchLength() != -1){
+                                               sum += abs(t->tree[index].getBranchLength());
+                                       }
+                                       index = t->tree[index].getParent();
+                               }
+                                       
+                               //get last breanch length added
+                               if(t->tree[index].getBranchLength() != -1){
+                                       sum += abs(t->tree[index].getBranchLength());
+                               }
+                                       
+                               //for each group in the groups update the total branch length accounting for the names file
+                               vector<string> groups = t->tree[treeNodes[v]].getGroup();
+                               for (int j = 0; j < groups.size(); j++) {
+                                       int numSeqsInGroupJ = 0;
+                                       map<string, int>::iterator it;
+                                       it = t->tree[treeNodes[v]].pcount.find(groups[j]);
+                                       if (it != t->tree[treeNodes[v]].pcount.end()) { //this leaf node contains seqs from group j
+                                               numSeqsInGroupJ = it->second;
+                                       }
+
+                                       //add branch length to total for group
+                                       DScore[groups[j]] += (numSeqsInGroupJ * sum);
+                               }
+                       }
+               }
+               
+       
+               for (int i=0; i<globaldata->Groups.size(); i++) {   
+                       if (groupTotals[globaldata->Groups[i]] != 0.0) { //avoid divide by zero error
+                               float percent = DScore[globaldata->Groups[i]] / groupTotals[globaldata->Groups[i]];
+                               data.push_back(percent);  
+                       }else {  data.push_back(0.0);  }
+               }
+               
+               return data;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "PhyloDiversity", "getValues");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+void PhyloDiversity::setTotalGroupBranchLengths(Tree* t) {
+    try {
+               
+               groupTotals.clear();
+               
+               //initialize group totals
+               for (int i=0; i<globaldata->Groups.size(); i++) {               groupTotals[globaldata->Groups[i]] = 0.0;       }
+               
+               
+               /********************************************************/
+               //calculate a D value for each group 
+               for(int v=0;v<t->getNumLeaves();v++){
+                       
+                       //is this node from a sequence which is in one of the users groups
+                       if (inUsersGroups(t->tree[v].getGroup(), globaldata->Groups) == true) {
+                       
+                               //calc the branch length
+                               int index = v;
+                               float sum = 0.0;
+                               
+                               while(t->tree[index].getParent() != -1){  //while you aren't at root
+                                               
+                                       //if you have a BL
+                                       if(t->tree[index].getBranchLength() != -1){
+                                               sum += abs(t->tree[index].getBranchLength());
+                                       }
+                                       index = t->tree[index].getParent();
+                               }
+                                       
+                               //get last breanch length added
+                               if(t->tree[index].getBranchLength() != -1){
+                                       sum += abs(t->tree[index].getBranchLength());
+                               }
+                                                       
+                               //account for the names file
+                               vector<string> groups = t->tree[v].getGroup();
+                               for (int j = 0; j < groups.size(); j++) {
+                                       int numSeqsInGroupJ = 0;
+                                       map<string, int>::iterator it;
+                                       it = t->tree[v].pcount.find(groups[j]);
+                                       if (it != t->tree[v].pcount.end()) { //this leaf node contains seqs from group j
+                                               numSeqsInGroupJ = it->second;
+                                       }
+
+                                       //add branch length to total for group
+                                       groupTotals[groups[j]] += (numSeqsInGroupJ * sum);
+                               }//end for
+                       }//end if
+               }//end for
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "PhyloDiversity", "setTotalGroupBranchLengths");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+
+
diff --git a/phylodiversity.h b/phylodiversity.h
new file mode 100644 (file)
index 0000000..131a63c
--- /dev/null
@@ -0,0 +1,43 @@
+#ifndef PHYLODIVERSITY_H
+#define PHYLODIVERSITY_H
+
+
+/*
+ *  phylodiversity.h
+ *  Mothur
+ *
+ *  Created by westcott on 4/30/10.
+ *  Copyright 2010 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "treemap.h"
+#include "globaldata.hpp"
+#include "mothurout.h"
+
+typedef vector<double> EstOutput; 
+
+/***********************************************************************/
+
+class PhyloDiversity  {
+       
+       public:
+               PhyloDiversity(TreeMap* t) : tmap(t) { globaldata = GlobalData::getInstance();  m = MothurOut::getInstance(); }
+               ~PhyloDiversity() {};
+               
+               EstOutput getValues(Tree*, vector<int>);
+               void setTotalGroupBranchLengths(Tree*);
+               
+       private:
+               GlobalData* globaldata;
+               MothurOut* m;
+               EstOutput data;
+               TreeMap* tmap;
+               map<string, float> groupTotals;
+};
+
+/***********************************************************************/
+
+
+#endif
+
diff --git a/phylodiversitycommand.cpp b/phylodiversitycommand.cpp
new file mode 100644 (file)
index 0000000..978b188
--- /dev/null
@@ -0,0 +1,223 @@
+/*
+ *  phylodiversitycommand.cpp
+ *  Mothur
+ *
+ *  Created by westcott on 4/30/10.
+ *  Copyright 2010 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "phylodiversitycommand.h"
+#include "phylodiversity.h"
+
+//**********************************************************************************************************************
+PhyloDiversityCommand::PhyloDiversityCommand(string option)  {
+       try {
+               globaldata = GlobalData::getInstance();
+               abort = false;
+               
+               //allow user to run help
+               if(option == "help") { help(); abort = true; }
+               
+               else {
+                       //valid paramters for this command
+                       string Array[] =  {"freq","rarefy","iters","groups","outputdir","inputdir"};
+                       vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
+                       
+                       OptionParser parser(option);
+                       map<string,string> parameters = parser.getParameters();
+                       
+                       ValidParameters validParameter;
+               
+                       //check to make sure all parameters are valid for command
+                       for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
+                               if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
+                       }
+                       
+                       //if the user changes the output directory command factory will send this info to us in the output parameter 
+                       outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
+                       
+                       if (globaldata->gTree.size() == 0) {//no trees were read
+                               m->mothurOut("You must execute the read.tree command, before you may execute the phylo.diversity command."); m->mothurOutEndLine(); abort = true;  }
+
+                       string temp;
+                       temp = validParameter.validFile(parameters, "freq", false);                     if (temp == "not found") { temp = "100"; }
+                       convert(temp, freq); 
+                       
+                       temp = validParameter.validFile(parameters, "iters", false);                    if (temp == "not found") { temp = "1000"; }
+                       convert(temp, iters); 
+                       
+                       temp = validParameter.validFile(parameters, "rarefy", false);                   if (temp == "not found") { temp = "F"; }
+                       rarefy = isTrue(temp);
+                       if (!rarefy) { iters = 1;  }
+                       
+                       groups = validParameter.validFile(parameters, "groups", false);                 
+                       if (groups == "not found") { groups = ""; Groups = globaldata->gTreemap->namesOfGroups;  globaldata->Groups = Groups;  }
+                       else { 
+                               splitAtDash(groups, Groups);
+                               globaldata->Groups = Groups;
+                       }
+               }
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "PhyloDiversityCommand", "PhyloDiversityCommand");
+               exit(1);
+       }                       
+}
+//**********************************************************************************************************************
+
+void PhyloDiversityCommand::help(){
+       try {
+
+
+       }
+       catch(exception& e) {
+               m->errorOut(e, "PhyloDiversityCommand", "help");
+               exit(1);
+       }
+}
+
+//**********************************************************************************************************************
+
+PhyloDiversityCommand::~PhyloDiversityCommand(){}
+
+//**********************************************************************************************************************
+
+int PhyloDiversityCommand::execute(){
+       try {
+               
+               if (abort == true) { return 0; }
+               
+               //incase the user had some mismatches between the tree and group files we don't want group xxx to be analyzed
+               for (int i = 0; i < globaldata->Groups.size(); i++) { if (globaldata->Groups[i] == "xxx") { globaldata->Groups.erase(globaldata->Groups.begin()+i);  break; }  }
+                
+               vector<string> outputNames;
+               
+               //diversity calculator
+               PhyloDiversity phylo(globaldata->gTreemap);
+               
+               vector<Tree*> trees = globaldata->gTree;
+               
+               //for each of the users trees
+               for(int i = 0; i < trees.size(); i++) {
+               
+                       if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str());         } return 0; }
+                       
+                       phylo.setTotalGroupBranchLengths(trees[i]);
+                       
+                       string outFile = outputDir + getRootName(getSimpleName(globaldata->getTreeFile()))  + toString(i+1) + ".phylo.diversity";
+                       if (rarefy) { outFile += ".rarefaction"; }
+                       outputNames.push_back(outFile);
+                       
+                       int numLeafNodes = trees[i]->getNumLeaves();
+                       
+                       //create a vector containing indexes of leaf nodes, randomize it, select nodes to send to calculator
+                       vector<int> randomLeaf;
+                       for (int j = 0; j < numLeafNodes; j++) {  
+                               if (inUsersGroups(trees[i]->tree[j].getGroup(), globaldata->Groups) == true) { //is this a node from the group the user selected.
+                                       randomLeaf.push_back(j); 
+                               }
+                       }
+                       
+                       numLeafNodes = randomLeaf.size();  //reset the number of leaf nodes you are using 
+                       
+                       //each group, each sampling, if no rarefy iters = 1;
+                       vector< vector<float> > diversity;
+                       diversity.resize(globaldata->Groups.size());
+                       
+                       //initialize sampling spots
+                       vector<int> numSampledList;
+                       for(int k = 0; k < numLeafNodes; k++){  if((k == 0) || (k+1) % freq == 0){  numSampledList.push_back(k); }   }
+                       if(numLeafNodes % freq != 0){   numSampledList.push_back(numLeafNodes);   }
+                       
+                       //initialize diversity
+                       for (int j = 0; j < diversity.size(); j++) {   diversity[j].resize(numSampledList.size(), 0.0);  }  //                  10sampled       20 sampled ...
+                                                                                                                                                                                                                               //groupA                0.0                     0.0
+                                                                                                                                                                                                                       //then for each iter you add to score and then when printing divide by iters to get average
+                       for (int l = 0; l < iters; l++) {
+                               random_shuffle(randomLeaf.begin(), randomLeaf.end());
+               
+                               vector<int> leavesSampled;
+                               EstOutput data;
+                               int count = 0;
+                               for(int k = 0; k < numLeafNodes; k++){
+                                               
+                                       if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str());         } return 0; }
+                                       
+                                       leavesSampled.push_back(randomLeaf[k]);
+                                               
+                                       if((k == 0) || (k+1) % freq == 0){ //ready to calc?
+                                               
+                                               data = phylo.getValues(trees[i], leavesSampled);
+                                               
+                                               //datas results are in the same order as globaldatas groups
+                                               for (int h = 0; h < data.size(); h++) {  diversity[h][count] += data[h];  }
+                                               
+                                               count++;
+                                       }
+                               }
+               
+                               if(numLeafNodes % freq != 0){   
+                                       
+                                       data = phylo.getValues(trees[i], leavesSampled);
+                                       
+                                       //datas results are in the same order as globaldatas groups
+                                       for (int h = 0; h < data.size(); h++) {  diversity[h][count] += data[h];  }
+                               }
+                       }
+                       
+                       printData(numSampledList, diversity, outFile);
+
+               }
+               
+       
+               if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str());         } return 0; }
+
+               m->mothurOutEndLine();
+               m->mothurOut("Output File Names: "); m->mothurOutEndLine();
+               for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
+               m->mothurOutEndLine();
+
+               
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "PhyloDiversityCommand", "execute");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+
+void PhyloDiversityCommand::printData(vector<int>& num, vector< vector<float> >& div, string file){
+       try {
+               ofstream out;
+               openOutputFile(file, out);
+               
+               out << "numSampled\t";
+               for (int i = 0; i < globaldata->Groups.size(); i++) { out << globaldata->Groups[i] << '\t';  }
+               out << endl;
+               
+               out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
+               
+               for (int i = 0; i < num.size(); i++) {  
+                       if (i == (num.size()-1)) {  out << num[i] << '\t';  }
+                       else {  out << (num[i]+1) << '\t';  }
+                       
+                       for (int j = 0; j < div.size(); j++) {
+                               float score = div[j][i] / (float)iters;
+                               out << setprecision(6) << score << '\t';
+                       }
+                       out << endl;
+               }
+               
+               out.close();
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "PhyloDiversityCommand", "printData");
+               exit(1);
+       }
+}
+
+//**********************************************************************************************************************
diff --git a/phylodiversitycommand.h b/phylodiversitycommand.h
new file mode 100644 (file)
index 0000000..bd26173
--- /dev/null
@@ -0,0 +1,37 @@
+#ifndef PHYLODIVERSITYCOMMAND_H
+#define PHYLODIVERSITYCOMMAND_H
+
+/*
+ *  phylodiversitycommand.h
+ *  Mothur
+ *
+ *  Created by westcott on 4/30/10.
+ *  Copyright 2010 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "command.hpp"
+#include "treemap.h"
+#include "globaldata.hpp"
+
+class PhyloDiversityCommand : public Command {
+       
+       public:
+               PhyloDiversityCommand(string);
+               ~PhyloDiversityCommand();
+               int execute();  
+               void help();
+       
+       private:
+               GlobalData* globaldata;
+       
+               int iters, freq;  
+               bool abort, rarefy;
+               string groups, outputDir;
+               vector<string> Groups, outputNames; //holds groups to be used, and outputFile names
+               
+               void printData(vector<int>&, vector< vector<float> >&, string);
+};
+
+#endif
+
index 7f60d712861f34a79b798e0ef8d1a3cbcbe714a5..414f33087738ba355ad5ff51c2e89349d41b21f9 100644 (file)
@@ -22,7 +22,7 @@ ReadDistCommand::ReadDistCommand(string option) {
                
                else {
                        //valid paramters for this command
-                       string Array[] =  {"phylip", "column", "name", "cutoff", "precision", "group","outputdir","inputdir","sim"};
+                       string Array[] =  {"phylip", "column", "name", "cutoff","hard", "precision", "group","outputdir","inputdir","sim"};
                        vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
                        
                        OptionParser parser(option);
@@ -126,9 +126,12 @@ ReadDistCommand::ReadDistCommand(string option) {
                        sim = isTrue(temp); 
                        globaldata->sim = sim;
                        
+                       temp = validParameter.validFile(parameters, "hard", false);                     if (temp == "not found") { temp = "F"; }
+                       hard = isTrue(temp);
+                       
                        temp = validParameter.validFile(parameters, "cutoff", false);                   if (temp == "not found") { temp = "10"; }
                        convert(temp, cutoff); 
-                       cutoff += (5 / (precision * 10.0));
+                       if (!hard) {  cutoff += (5 / (precision * 10.0));  }
                        
                        if (abort == false) {
                                distFileName = globaldata->inputFileName;
index 937ca3f0797c58bc24dc3a239ac5f7c0e4624339..6241f97b5fb1b70f79a326360ace611a1866fa72 100644 (file)
@@ -42,7 +42,7 @@ private:
        string phylipfile, columnfile, namefile, groupfile, outputDir;
        NameAssignment* nameMap;
 
-       bool abort, sim;
+       bool abort, sim, hard;
 
 };
 
index 60654489ce6a1a848eb0dc9743f43e64b832a639..1762983a148af80403eb66b68bd2834f703a84d8 100644 (file)
@@ -180,6 +180,8 @@ int ReadTreeCommand::execute(){
                                        }
                                }
                        }
+                       
+                       globaldata->gTreemap = treeMap;
                }
                
                return 0;
index 43ec945ad49e45ba933b866ac55995ed3e11e62d..a61e467e0bd5820060c052acffefe5fe7983294f 100644 (file)
@@ -125,13 +125,13 @@ RemoveSeqsCommand::RemoveSeqsCommand(string option)  {
                        
                        if ((fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == ""))  { m->mothurOut("You must provide one of the following: fasta, name, group, alignreport or list."); m->mothurOutEndLine(); abort = true; }
                        
-                       int okay = 2;
-                       if (outputDir != "") { okay++; }
-                       if (usedDups != "") { okay++;  }
+                       //int okay = 2;
+                       //if (outputDir != "") { okay++; }
+                       //if (usedDups != "") { okay++;  }
                        
                        if ((usedDups != "") && (namefile == "")) {  m->mothurOut("You may only use dups with the name option."); m->mothurOutEndLine();  abort = true; }
                        
-                       if (parameters.size() > okay) { m->mothurOut("You may only enter one of the following: fasta, name, group, alignreport, or list."); m->mothurOutEndLine(); abort = true;  }
+                       //if (parameters.size() > okay) { m->mothurOut("You may only enter one of the following: fasta, name, group, alignreport, or list."); m->mothurOutEndLine(); abort = true;  }
                }
 
        }
@@ -172,10 +172,10 @@ int RemoveSeqsCommand::execute(){
                
                //read through the correct file and output lines you want to keep
                if (fastafile != "")            {               readFasta();    }
-               else if (namefile != "")        {               readName();             }
-               else if (groupfile != "")       {               readGroup();    }
-               else if (alignfile != "")       {               readAlign();    }
-               else if (listfile != "")        {               readList();             }
+               if (namefile != "")                     {               readName();             }
+               if (groupfile != "")            {               readGroup();    }
+               if (alignfile != "")            {               readAlign();    }
+               if (listfile != "")                     {               readList();             }
                
                if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str()); } return 0; }
                
@@ -221,7 +221,7 @@ int RemoveSeqsCommand::readFasta(){
                                        wroteSomething = true;
                                        
                                        currSeq.printSequence(out);
-                               }else {         names.erase(name);              }
+                               }//else {               names.erase(name);              }
                        }
                        gobble(in);
                }
@@ -435,7 +435,7 @@ int RemoveSeqsCommand::readGroup(){
                        if (names.count(name) == 0) {
                                wroteSomething = true;
                                out << name << '\t' << group << endl;
-                       }else {         names.erase(name);              }
+                       }//else {               names.erase(name);              }
                                        
                        gobble(in);
                }
@@ -496,7 +496,7 @@ int RemoveSeqsCommand::readAlign(){
                                out << endl;
                                
                        }else {//still read just don't do anything with it
-                               names.erase(name);      
+                               //names.erase(name);    
                                
                                //read rest
                                for (int i = 0; i < 15; i++) {  
index 7da3cef3718502640d61377bccbaae0d8e4213d3..b6cfa0b2429a0e7a6c94f4f5ef5c36ad7f04f473 100644 (file)
--- a/tree.cpp
+++ b/tree.cpp
@@ -70,10 +70,11 @@ void Tree::addNamesToCounts() {
                                
                //go through each leaf and update its pcounts and pgroups
                for (int i = 0; i < numLeaves; i++) {
+
                        string name = tree[i].getName();
-                       
+               
                        map<string, string>::iterator itNames = globaldata->names.find(name);
-                       
+               
                        if (itNames == globaldata->names.end()) { m->mothurOut(name + " is not in your name file, please correct."); m->mothurOutEndLine(); exit(1);  }
                        else {
                                vector<string> dupNames;
@@ -131,8 +132,7 @@ void Tree::addNamesToCounts() {
                                tree[i].setGroup(nodeGroups);
                                
                        }//end else
-               }//end for
-                               
+               }//end for                                      
        }
        catch(exception& e) {
                m->errorOut(e, "Tree", "addNamesToCounts");