From: westcott Date: Tue, 4 May 2010 12:41:07 +0000 (+0000) Subject: added phylo.diversity command. added hard parameter to cluster, hcluster and read... X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=commitdiff_plain;h=dabb32850eae34e26b066a7da55716ad685786a3 added phylo.diversity command. added hard parameter to cluster, hcluster and read.dist. modified remove.seqs so it can do multiple files at once. --- diff --git a/Mothur.xcodeproj/project.pbxproj b/Mothur.xcodeproj/project.pbxproj index ce6918c..2084dc5 100644 --- a/Mothur.xcodeproj/project.pbxproj +++ b/Mothur.xcodeproj/project.pbxproj @@ -7,6 +7,10 @@ objects = { /* Begin PBXFileReference section */ + A72B3A62118B37FD004B9F8D /* phylodiversitycommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = phylodiversitycommand.h; sourceTree = ""; }; + A72B3A63118B37FD004B9F8D /* phylodiversitycommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = phylodiversitycommand.cpp; sourceTree = ""; }; + A72B3A7B118B4D1B004B9F8D /* phylodiversity.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = phylodiversity.h; sourceTree = ""; }; + A72B3A7C118B4D1B004B9F8D /* phylodiversity.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = phylodiversity.cpp; sourceTree = ""; }; A747E79B1163442A00FB9042 /* chimeracheckcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = chimeracheckcommand.h; sourceTree = ""; }; A747E79C1163442A00FB9042 /* chimeracheckcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = chimeracheckcommand.cpp; sourceTree = ""; }; A747E81C116365E000FB9042 /* chimeraslayercommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = chimeraslayercommand.h; sourceTree = ""; }; @@ -552,6 +556,8 @@ A7DA20B0113FECD400BF472F /* nseqs.h */, A7DA20B2113FECD400BF472F /* onegapdist.h */, A7DA20B3113FECD400BF472F /* onegapignore.h */, + A72B3A7B118B4D1B004B9F8D /* phylodiversity.h */, + A72B3A7C118B4D1B004B9F8D /* phylodiversity.cpp */, A7DA20BE113FECD400BF472F /* parsimony.cpp */, A7DA20BF113FECD400BF472F /* parsimony.h */, A7DA20CE113FECD400BF472F /* qstat.cpp */, @@ -706,26 +712,28 @@ A7E8338C115BBDAA00739EC4 /* parsesffcommand.h */, A7DA20C0113FECD400BF472F /* parsimonycommand.cpp */, A7DA20C1113FECD400BF472F /* parsimonycommand.h */, - A7DA20C2113FECD400BF472F /* pcacommand.cpp */, A7DA20C3113FECD400BF472F /* pcacommand.h */, - A7DA20C6113FECD400BF472F /* phylotypecommand.cpp */, + A7DA20C2113FECD400BF472F /* pcacommand.cpp */, + A72B3A62118B37FD004B9F8D /* phylodiversitycommand.h */, + A72B3A63118B37FD004B9F8D /* phylodiversitycommand.cpp */, A7DA20C7113FECD400BF472F /* phylotypecommand.h */, - A7DA20CA113FECD400BF472F /* preclustercommand.cpp */, + A7DA20C6113FECD400BF472F /* phylotypecommand.cpp */, A7DA20CB113FECD400BF472F /* preclustercommand.h */, - A7DA20D0113FECD400BF472F /* quitcommand.cpp */, + A7DA20CA113FECD400BF472F /* preclustercommand.cpp */, A7DA20D1113FECD400BF472F /* quitcommand.h */, - A7DA20DA113FECD400BF472F /* rarefactcommand.cpp */, + A7DA20D0113FECD400BF472F /* quitcommand.cpp */, A7DA20DB113FECD400BF472F /* rarefactcommand.h */, - A7DA20DD113FECD400BF472F /* rarefactsharedcommand.cpp */, + A7DA20DA113FECD400BF472F /* rarefactcommand.cpp */, A7DA20DE113FECD400BF472F /* rarefactsharedcommand.h */, - A7DA20E5113FECD400BF472F /* readdistcommand.cpp */, + A7DA20DD113FECD400BF472F /* rarefactsharedcommand.cpp */, A7DA20E6113FECD400BF472F /* readdistcommand.h */, - A7DA20EA113FECD400BF472F /* readotucommand.cpp */, + A7DA20E5113FECD400BF472F /* readdistcommand.cpp */, A7DA20EB113FECD400BF472F /* readotucommand.h */, - A7DA20F0113FECD400BF472F /* readtreecommand.cpp */, + A7DA20EA113FECD400BF472F /* readotucommand.cpp */, A7DA20F1113FECD400BF472F /* readtreecommand.h */, - A7DA20F2113FECD400BF472F /* removeseqscommand.cpp */, + A7DA20F0113FECD400BF472F /* readtreecommand.cpp */, A7DA20F3113FECD400BF472F /* removeseqscommand.h */, + A7DA20F2113FECD400BF472F /* removeseqscommand.cpp */, A7DA20F5113FECD400BF472F /* reversecommand.h */, A7DA20F4113FECD400BF472F /* reversecommand.cpp */, A7DA20F9113FECD400BF472F /* screenseqscommand.h */, diff --git a/clustercommand.cpp b/clustercommand.cpp index 6f6e309..9ed67e0 100644 --- a/clustercommand.cpp +++ b/clustercommand.cpp @@ -22,7 +22,7 @@ ClusterCommand::ClusterCommand(string option) { else { //valid paramters for this command - string Array[] = {"cutoff","precision","method","showabund","timing","outputdir","inputdir"}; + string Array[] = {"cutoff","precision","method","showabund","timing","hard","outputdir","inputdir"}; vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); OptionParser parser(option); @@ -56,10 +56,13 @@ ClusterCommand::ClusterCommand(string option) { length = temp.length(); convert(temp, precision); + temp = validParameter.validFile(parameters, "hard", false); if (temp == "not found") { temp = "F"; } + hard = isTrue(temp); + temp = validParameter.validFile(parameters, "cutoff", false); if (temp == "not found") { temp = "10"; } convert(temp, cutoff); - cutoff += (5 / (precision * 10.0)); + if (!hard) { cutoff += (5 / (precision * 10.0)); } method = validParameter.validFile(parameters, "method", false); if (method == "not found") { method = "furthest"; } @@ -114,7 +117,7 @@ ClusterCommand::ClusterCommand(string option) { void ClusterCommand::help(){ try { m->mothurOut("The cluster command can only be executed after a successful read.dist command.\n"); - m->mothurOut("The cluster command parameter options are method, cuttoff, precision, showabund and timing. No parameters are required.\n"); + m->mothurOut("The cluster command parameter options are method, cuttoff, hard, precision, showabund and timing. No parameters are required.\n"); m->mothurOut("The cluster command should be in the following format: \n"); m->mothurOut("cluster(method=yourMethod, cutoff=yourCutoff, precision=yourPrecision) \n"); m->mothurOut("The acceptable cluster methods are furthest, nearest and average. If no method is provided then furthest is assumed.\n\n"); diff --git a/clustercommand.h b/clustercommand.h index 5c57b69..c8e0174 100644 --- a/clustercommand.h +++ b/clustercommand.h @@ -43,7 +43,7 @@ private: RAbundVector oldRAbund; ListVector oldList; - bool abort; + bool abort, hard; string method, fileroot, tag, outputDir; double cutoff; diff --git a/commandfactory.cpp b/commandfactory.cpp index 32d4211..30b76f8 100644 --- a/commandfactory.cpp +++ b/commandfactory.cpp @@ -72,6 +72,7 @@ #include "chimerapintailcommand.h" #include "chimerabellerophoncommand.h" #include "setlogfilecommand.h" +#include "phylodiversitycommand.h" /*******************************************************/ @@ -151,6 +152,7 @@ CommandFactory::CommandFactory(){ commands["parse.list"] = "parse.list"; commands["parse.sff"] = "parse.sff"; commands["set.logfile"] = "set.logfile"; + commands["phylo.diversity"] = "phylo.diversity"; commands["classify.seqs"] = "MPIEnabled"; commands["dist.seqs"] = "MPIEnabled"; commands["filter.seqs"] = "MPIEnabled"; @@ -267,6 +269,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString){ else if(commandName == "set.logfile") { command = new SetLogFileCommand(optionString); } else if(commandName == "parse.list") { command = new ParseListCommand(optionString); } else if(commandName == "parse.sff") { command = new ParseSFFCommand(optionString); } + else if(commandName == "phylo.diversity") { command = new PhyloDiversityCommand(optionString); } else { command = new NoCommand(optionString); } return command; diff --git a/hclustercommand.cpp b/hclustercommand.cpp index f981908..89c6dbd 100644 --- a/hclustercommand.cpp +++ b/hclustercommand.cpp @@ -21,7 +21,7 @@ HClusterCommand::HClusterCommand(string option) { else { //valid paramters for this command - string Array[] = {"cutoff","precision","method","phylip","column","name","sorted","showabund","timing","outputdir","inputdir"}; + string Array[] = {"cutoff","hard","precision","method","phylip","column","name","sorted","showabund","timing","outputdir","inputdir"}; vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); OptionParser parser(option); @@ -104,10 +104,13 @@ HClusterCommand::HClusterCommand(string option) { length = temp.length(); convert(temp, precision); + temp = validParameter.validFile(parameters, "hard", false); if (temp == "not found") { temp = "F"; } + hard = isTrue(temp); + temp = validParameter.validFile(parameters, "cutoff", false); if (temp == "not found") { temp = "10"; } convert(temp, cutoff); - cutoff += (5 / (precision * 10.0)); + if (!hard) { cutoff += (5 / (precision * 10.0)); } method = validParameter.validFile(parameters, "method", false); if (method == "not found") { method = "furthest"; } diff --git a/hclustercommand.h b/hclustercommand.h index 8bbc655..1aae49d 100644 --- a/hclustercommand.h +++ b/hclustercommand.h @@ -46,7 +46,7 @@ private: ListVector oldList; ReadCluster* read; - bool abort, sorted, print_start; + bool abort, sorted, print_start, hard; string method, fileroot, tag, distfile, format, phylipfile, columnfile, namefile, sort, showabund, timing, outputDir; double cutoff; int precision, length; diff --git a/makefile b/makefile index b7bb8d0..a868ec5 100644 --- a/makefile +++ b/makefile @@ -219,7 +219,9 @@ mothur : \ ./classifyseqscommand.o\ ./parsesffcommand.o\ ./classify.o\ - ./phylotree.o\ + ./phylotree.o\ + ./phylodiversity.o\ + ./phylodiversitycommand.o\ ./bayesian.o\ ./phylosummary.o\ ./alignmentdb.o\ @@ -418,7 +420,9 @@ mothur : \ ./classifyseqscommand.o\ ./parsesffcommand.o\ ./classify.o\ - ./phylotree.o\ + ./phylotree.o\ + ./phylodiversity.o\ + ./phylodiversitycommand.o\ ./bayesian.o\ ./phylosummary.o\ ./alignmentdb.o\ @@ -620,7 +624,9 @@ clean : ./classifyseqscommand.o\ ./parsesffcommand.o\ ./classify.o\ - ./phylotree.o\ + ./phylotree.o\ + ./phylodiversity.o\ + ./phylodiversitycommand.o\ ./bayesian.o\ ./phylosummary.o\ ./alignmentdb.o\ @@ -1635,6 +1641,14 @@ install : mothur ./setlogfilecommand.o : setlogfilecommand.cpp $(CC) $(CC_OPTIONS) setlogfilecommand.cpp -c $(INCLUDE) -o ./setlogfilecommand.o +# Item # 199 -- phylodiversity -- +./phylodiversity.o : phylodiversity.cpp + $(CC) $(CC_OPTIONS) phylodiversity.cpp -c $(INCLUDE) -o ./phylodiversity.o + +# Item # 200 -- phylodiversitycommand -- +./phylodiversitycommand.o : phylodiversitycommand.cpp + $(CC) $(CC_OPTIONS) phylodiversitycommand.cpp -c $(INCLUDE) -o ./phylodiversitycommand.o + ##### END RUN #### diff --git a/phylodiversity.cpp b/phylodiversity.cpp new file mode 100644 index 0000000..96d49d4 --- /dev/null +++ b/phylodiversity.cpp @@ -0,0 +1,141 @@ +/* + * phylodiversity.cpp + * Mothur + * + * Created by westcott on 4/30/10. + * Copyright 2010 Schloss Lab. All rights reserved. + * + */ + +#include "phylodiversity.h" + +/**************************************************************************************************/ +EstOutput PhyloDiversity::getValues(Tree* t, vector treeNodes) { + try { + + map DScore; + float totalLength = 0.0; + data.clear(); + + //initialize Dscore + for (int i=0; iGroups.size(); i++) { DScore[globaldata->Groups[i]] = 0.0; } + + /********************************************************/ + //calculate a D value for each group + for(int v=0;vcontrol_pressed) { return data; } + + //is this node from a sequence which is in one of the users groups + if (inUsersGroups(t->tree[treeNodes[v]].getGroup(), globaldata->Groups) == true) { + + //calc the branch length + //while you aren't at root + float sum = 0.0; + int index = treeNodes[v]; + + while(t->tree[index].getParent() != -1){ + + //if you have a BL + if(t->tree[index].getBranchLength() != -1){ + sum += abs(t->tree[index].getBranchLength()); + } + index = t->tree[index].getParent(); + } + + //get last breanch length added + if(t->tree[index].getBranchLength() != -1){ + sum += abs(t->tree[index].getBranchLength()); + } + + //for each group in the groups update the total branch length accounting for the names file + vector groups = t->tree[treeNodes[v]].getGroup(); + for (int j = 0; j < groups.size(); j++) { + int numSeqsInGroupJ = 0; + map::iterator it; + it = t->tree[treeNodes[v]].pcount.find(groups[j]); + if (it != t->tree[treeNodes[v]].pcount.end()) { //this leaf node contains seqs from group j + numSeqsInGroupJ = it->second; + } + + //add branch length to total for group + DScore[groups[j]] += (numSeqsInGroupJ * sum); + } + } + } + + + for (int i=0; iGroups.size(); i++) { + if (groupTotals[globaldata->Groups[i]] != 0.0) { //avoid divide by zero error + float percent = DScore[globaldata->Groups[i]] / groupTotals[globaldata->Groups[i]]; + data.push_back(percent); + }else { data.push_back(0.0); } + } + + return data; + } + catch(exception& e) { + m->errorOut(e, "PhyloDiversity", "getValues"); + exit(1); + } +} +/**************************************************************************************************/ +void PhyloDiversity::setTotalGroupBranchLengths(Tree* t) { + try { + + groupTotals.clear(); + + //initialize group totals + for (int i=0; iGroups.size(); i++) { groupTotals[globaldata->Groups[i]] = 0.0; } + + + /********************************************************/ + //calculate a D value for each group + for(int v=0;vgetNumLeaves();v++){ + + //is this node from a sequence which is in one of the users groups + if (inUsersGroups(t->tree[v].getGroup(), globaldata->Groups) == true) { + + //calc the branch length + int index = v; + float sum = 0.0; + + while(t->tree[index].getParent() != -1){ //while you aren't at root + + //if you have a BL + if(t->tree[index].getBranchLength() != -1){ + sum += abs(t->tree[index].getBranchLength()); + } + index = t->tree[index].getParent(); + } + + //get last breanch length added + if(t->tree[index].getBranchLength() != -1){ + sum += abs(t->tree[index].getBranchLength()); + } + + //account for the names file + vector groups = t->tree[v].getGroup(); + for (int j = 0; j < groups.size(); j++) { + int numSeqsInGroupJ = 0; + map::iterator it; + it = t->tree[v].pcount.find(groups[j]); + if (it != t->tree[v].pcount.end()) { //this leaf node contains seqs from group j + numSeqsInGroupJ = it->second; + } + + //add branch length to total for group + groupTotals[groups[j]] += (numSeqsInGroupJ * sum); + }//end for + }//end if + }//end for + + } + catch(exception& e) { + m->errorOut(e, "PhyloDiversity", "setTotalGroupBranchLengths"); + exit(1); + } +} +/**************************************************************************************************/ + + diff --git a/phylodiversity.h b/phylodiversity.h new file mode 100644 index 0000000..131a63c --- /dev/null +++ b/phylodiversity.h @@ -0,0 +1,43 @@ +#ifndef PHYLODIVERSITY_H +#define PHYLODIVERSITY_H + + +/* + * phylodiversity.h + * Mothur + * + * Created by westcott on 4/30/10. + * Copyright 2010 Schloss Lab. All rights reserved. + * + */ + +#include "treemap.h" +#include "globaldata.hpp" +#include "mothurout.h" + +typedef vector EstOutput; + +/***********************************************************************/ + +class PhyloDiversity { + + public: + PhyloDiversity(TreeMap* t) : tmap(t) { globaldata = GlobalData::getInstance(); m = MothurOut::getInstance(); } + ~PhyloDiversity() {}; + + EstOutput getValues(Tree*, vector); + void setTotalGroupBranchLengths(Tree*); + + private: + GlobalData* globaldata; + MothurOut* m; + EstOutput data; + TreeMap* tmap; + map groupTotals; +}; + +/***********************************************************************/ + + +#endif + diff --git a/phylodiversitycommand.cpp b/phylodiversitycommand.cpp new file mode 100644 index 0000000..978b188 --- /dev/null +++ b/phylodiversitycommand.cpp @@ -0,0 +1,223 @@ +/* + * phylodiversitycommand.cpp + * Mothur + * + * Created by westcott on 4/30/10. + * Copyright 2010 Schloss Lab. All rights reserved. + * + */ + +#include "phylodiversitycommand.h" +#include "phylodiversity.h" + +//********************************************************************************************************************** +PhyloDiversityCommand::PhyloDiversityCommand(string option) { + try { + globaldata = GlobalData::getInstance(); + abort = false; + + //allow user to run help + if(option == "help") { help(); abort = true; } + + else { + //valid paramters for this command + string Array[] = {"freq","rarefy","iters","groups","outputdir","inputdir"}; + vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); + + OptionParser parser(option); + map parameters = parser.getParameters(); + + ValidParameters validParameter; + + //check to make sure all parameters are valid for command + for (map::iterator it = parameters.begin(); it != parameters.end(); it++) { + if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; } + } + + //if the user changes the output directory command factory will send this info to us in the output parameter + outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; } + + if (globaldata->gTree.size() == 0) {//no trees were read + m->mothurOut("You must execute the read.tree command, before you may execute the phylo.diversity command."); m->mothurOutEndLine(); abort = true; } + + string temp; + temp = validParameter.validFile(parameters, "freq", false); if (temp == "not found") { temp = "100"; } + convert(temp, freq); + + temp = validParameter.validFile(parameters, "iters", false); if (temp == "not found") { temp = "1000"; } + convert(temp, iters); + + temp = validParameter.validFile(parameters, "rarefy", false); if (temp == "not found") { temp = "F"; } + rarefy = isTrue(temp); + if (!rarefy) { iters = 1; } + + groups = validParameter.validFile(parameters, "groups", false); + if (groups == "not found") { groups = ""; Groups = globaldata->gTreemap->namesOfGroups; globaldata->Groups = Groups; } + else { + splitAtDash(groups, Groups); + globaldata->Groups = Groups; + } + } + + } + catch(exception& e) { + m->errorOut(e, "PhyloDiversityCommand", "PhyloDiversityCommand"); + exit(1); + } +} +//********************************************************************************************************************** + +void PhyloDiversityCommand::help(){ + try { + + + } + catch(exception& e) { + m->errorOut(e, "PhyloDiversityCommand", "help"); + exit(1); + } +} + +//********************************************************************************************************************** + +PhyloDiversityCommand::~PhyloDiversityCommand(){} + +//********************************************************************************************************************** + +int PhyloDiversityCommand::execute(){ + try { + + if (abort == true) { return 0; } + + //incase the user had some mismatches between the tree and group files we don't want group xxx to be analyzed + for (int i = 0; i < globaldata->Groups.size(); i++) { if (globaldata->Groups[i] == "xxx") { globaldata->Groups.erase(globaldata->Groups.begin()+i); break; } } + + vector outputNames; + + //diversity calculator + PhyloDiversity phylo(globaldata->gTreemap); + + vector trees = globaldata->gTree; + + //for each of the users trees + for(int i = 0; i < trees.size(); i++) { + + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; } + + phylo.setTotalGroupBranchLengths(trees[i]); + + string outFile = outputDir + getRootName(getSimpleName(globaldata->getTreeFile())) + toString(i+1) + ".phylo.diversity"; + if (rarefy) { outFile += ".rarefaction"; } + outputNames.push_back(outFile); + + int numLeafNodes = trees[i]->getNumLeaves(); + + //create a vector containing indexes of leaf nodes, randomize it, select nodes to send to calculator + vector randomLeaf; + for (int j = 0; j < numLeafNodes; j++) { + if (inUsersGroups(trees[i]->tree[j].getGroup(), globaldata->Groups) == true) { //is this a node from the group the user selected. + randomLeaf.push_back(j); + } + } + + numLeafNodes = randomLeaf.size(); //reset the number of leaf nodes you are using + + //each group, each sampling, if no rarefy iters = 1; + vector< vector > diversity; + diversity.resize(globaldata->Groups.size()); + + //initialize sampling spots + vector numSampledList; + for(int k = 0; k < numLeafNodes; k++){ if((k == 0) || (k+1) % freq == 0){ numSampledList.push_back(k); } } + if(numLeafNodes % freq != 0){ numSampledList.push_back(numLeafNodes); } + + //initialize diversity + for (int j = 0; j < diversity.size(); j++) { diversity[j].resize(numSampledList.size(), 0.0); } // 10sampled 20 sampled ... + //groupA 0.0 0.0 + //then for each iter you add to score and then when printing divide by iters to get average + for (int l = 0; l < iters; l++) { + random_shuffle(randomLeaf.begin(), randomLeaf.end()); + + vector leavesSampled; + EstOutput data; + int count = 0; + for(int k = 0; k < numLeafNodes; k++){ + + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; } + + leavesSampled.push_back(randomLeaf[k]); + + if((k == 0) || (k+1) % freq == 0){ //ready to calc? + + data = phylo.getValues(trees[i], leavesSampled); + + //datas results are in the same order as globaldatas groups + for (int h = 0; h < data.size(); h++) { diversity[h][count] += data[h]; } + + count++; + } + } + + if(numLeafNodes % freq != 0){ + + data = phylo.getValues(trees[i], leavesSampled); + + //datas results are in the same order as globaldatas groups + for (int h = 0; h < data.size(); h++) { diversity[h][count] += data[h]; } + } + } + + printData(numSampledList, diversity, outFile); + + } + + + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; } + + m->mothurOutEndLine(); + m->mothurOut("Output File Names: "); m->mothurOutEndLine(); + for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } + m->mothurOutEndLine(); + + + return 0; + } + catch(exception& e) { + m->errorOut(e, "PhyloDiversityCommand", "execute"); + exit(1); + } +} +//********************************************************************************************************************** + +void PhyloDiversityCommand::printData(vector& num, vector< vector >& div, string file){ + try { + ofstream out; + openOutputFile(file, out); + + out << "numSampled\t"; + for (int i = 0; i < globaldata->Groups.size(); i++) { out << globaldata->Groups[i] << '\t'; } + out << endl; + + out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint); + + for (int i = 0; i < num.size(); i++) { + if (i == (num.size()-1)) { out << num[i] << '\t'; } + else { out << (num[i]+1) << '\t'; } + + for (int j = 0; j < div.size(); j++) { + float score = div[j][i] / (float)iters; + out << setprecision(6) << score << '\t'; + } + out << endl; + } + + out.close(); + + } + catch(exception& e) { + m->errorOut(e, "PhyloDiversityCommand", "printData"); + exit(1); + } +} + +//********************************************************************************************************************** diff --git a/phylodiversitycommand.h b/phylodiversitycommand.h new file mode 100644 index 0000000..bd26173 --- /dev/null +++ b/phylodiversitycommand.h @@ -0,0 +1,37 @@ +#ifndef PHYLODIVERSITYCOMMAND_H +#define PHYLODIVERSITYCOMMAND_H + +/* + * phylodiversitycommand.h + * Mothur + * + * Created by westcott on 4/30/10. + * Copyright 2010 Schloss Lab. All rights reserved. + * + */ + +#include "command.hpp" +#include "treemap.h" +#include "globaldata.hpp" + +class PhyloDiversityCommand : public Command { + + public: + PhyloDiversityCommand(string); + ~PhyloDiversityCommand(); + int execute(); + void help(); + + private: + GlobalData* globaldata; + + int iters, freq; + bool abort, rarefy; + string groups, outputDir; + vector Groups, outputNames; //holds groups to be used, and outputFile names + + void printData(vector&, vector< vector >&, string); +}; + +#endif + diff --git a/readdistcommand.cpp b/readdistcommand.cpp index 7f60d71..414f330 100644 --- a/readdistcommand.cpp +++ b/readdistcommand.cpp @@ -22,7 +22,7 @@ ReadDistCommand::ReadDistCommand(string option) { else { //valid paramters for this command - string Array[] = {"phylip", "column", "name", "cutoff", "precision", "group","outputdir","inputdir","sim"}; + string Array[] = {"phylip", "column", "name", "cutoff","hard", "precision", "group","outputdir","inputdir","sim"}; vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); OptionParser parser(option); @@ -126,9 +126,12 @@ ReadDistCommand::ReadDistCommand(string option) { sim = isTrue(temp); globaldata->sim = sim; + temp = validParameter.validFile(parameters, "hard", false); if (temp == "not found") { temp = "F"; } + hard = isTrue(temp); + temp = validParameter.validFile(parameters, "cutoff", false); if (temp == "not found") { temp = "10"; } convert(temp, cutoff); - cutoff += (5 / (precision * 10.0)); + if (!hard) { cutoff += (5 / (precision * 10.0)); } if (abort == false) { distFileName = globaldata->inputFileName; diff --git a/readdistcommand.h b/readdistcommand.h index 937ca3f..6241f97 100644 --- a/readdistcommand.h +++ b/readdistcommand.h @@ -42,7 +42,7 @@ private: string phylipfile, columnfile, namefile, groupfile, outputDir; NameAssignment* nameMap; - bool abort, sim; + bool abort, sim, hard; }; diff --git a/readtreecommand.cpp b/readtreecommand.cpp index 6065448..1762983 100644 --- a/readtreecommand.cpp +++ b/readtreecommand.cpp @@ -180,6 +180,8 @@ int ReadTreeCommand::execute(){ } } } + + globaldata->gTreemap = treeMap; } return 0; diff --git a/removeseqscommand.cpp b/removeseqscommand.cpp index 43ec945..a61e467 100644 --- a/removeseqscommand.cpp +++ b/removeseqscommand.cpp @@ -125,13 +125,13 @@ RemoveSeqsCommand::RemoveSeqsCommand(string option) { if ((fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "")) { m->mothurOut("You must provide one of the following: fasta, name, group, alignreport or list."); m->mothurOutEndLine(); abort = true; } - int okay = 2; - if (outputDir != "") { okay++; } - if (usedDups != "") { okay++; } + //int okay = 2; + //if (outputDir != "") { okay++; } + //if (usedDups != "") { okay++; } if ((usedDups != "") && (namefile == "")) { m->mothurOut("You may only use dups with the name option."); m->mothurOutEndLine(); abort = true; } - if (parameters.size() > okay) { m->mothurOut("You may only enter one of the following: fasta, name, group, alignreport, or list."); m->mothurOutEndLine(); abort = true; } + //if (parameters.size() > okay) { m->mothurOut("You may only enter one of the following: fasta, name, group, alignreport, or list."); m->mothurOutEndLine(); abort = true; } } } @@ -172,10 +172,10 @@ int RemoveSeqsCommand::execute(){ //read through the correct file and output lines you want to keep if (fastafile != "") { readFasta(); } - else if (namefile != "") { readName(); } - else if (groupfile != "") { readGroup(); } - else if (alignfile != "") { readAlign(); } - else if (listfile != "") { readList(); } + if (namefile != "") { readName(); } + if (groupfile != "") { readGroup(); } + if (alignfile != "") { readAlign(); } + if (listfile != "") { readList(); } if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; } @@ -221,7 +221,7 @@ int RemoveSeqsCommand::readFasta(){ wroteSomething = true; currSeq.printSequence(out); - }else { names.erase(name); } + }//else { names.erase(name); } } gobble(in); } @@ -435,7 +435,7 @@ int RemoveSeqsCommand::readGroup(){ if (names.count(name) == 0) { wroteSomething = true; out << name << '\t' << group << endl; - }else { names.erase(name); } + }//else { names.erase(name); } gobble(in); } @@ -496,7 +496,7 @@ int RemoveSeqsCommand::readAlign(){ out << endl; }else {//still read just don't do anything with it - names.erase(name); + //names.erase(name); //read rest for (int i = 0; i < 15; i++) { diff --git a/tree.cpp b/tree.cpp index 7da3cef..b6cfa0b 100644 --- a/tree.cpp +++ b/tree.cpp @@ -70,10 +70,11 @@ void Tree::addNamesToCounts() { //go through each leaf and update its pcounts and pgroups for (int i = 0; i < numLeaves; i++) { + string name = tree[i].getName(); - + map::iterator itNames = globaldata->names.find(name); - + if (itNames == globaldata->names.end()) { m->mothurOut(name + " is not in your name file, please correct."); m->mothurOutEndLine(); exit(1); } else { vector dupNames; @@ -131,8 +132,7 @@ void Tree::addNamesToCounts() { tree[i].setGroup(nodeGroups); }//end else - }//end for - + }//end for } catch(exception& e) { m->errorOut(e, "Tree", "addNamesToCounts");