2 * phylodiversitycommand.cpp
5 * Created by westcott on 4/30/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "phylodiversitycommand.h"
11 #include "phylodiversity.h"
13 //**********************************************************************************************************************
14 PhyloDiversityCommand::PhyloDiversityCommand(string option) {
16 globaldata = GlobalData::getInstance();
19 //allow user to run help
20 if(option == "help") { help(); abort = true; }
23 //valid paramters for this command
24 string Array[] = {"freq","rarefy","iters","groups","outputdir","inputdir"};
25 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
27 OptionParser parser(option);
28 map<string,string> parameters = parser.getParameters();
30 ValidParameters validParameter;
32 //check to make sure all parameters are valid for command
33 for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
34 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
37 //if the user changes the output directory command factory will send this info to us in the output parameter
38 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
40 if (globaldata->gTree.size() == 0) {//no trees were read
41 m->mothurOut("You must execute the read.tree command, before you may execute the phylo.diversity command."); m->mothurOutEndLine(); abort = true; }
44 temp = validParameter.validFile(parameters, "freq", false); if (temp == "not found") { temp = "100"; }
47 temp = validParameter.validFile(parameters, "iters", false); if (temp == "not found") { temp = "1000"; }
50 temp = validParameter.validFile(parameters, "rarefy", false); if (temp == "not found") { temp = "F"; }
51 rarefy = isTrue(temp);
52 if (!rarefy) { iters = 1; }
54 groups = validParameter.validFile(parameters, "groups", false);
55 if (groups == "not found") { groups = ""; Groups = globaldata->gTreemap->namesOfGroups; globaldata->Groups = Groups; }
57 splitAtDash(groups, Groups);
58 globaldata->Groups = Groups;
64 m->errorOut(e, "PhyloDiversityCommand", "PhyloDiversityCommand");
68 //**********************************************************************************************************************
70 void PhyloDiversityCommand::help(){
72 m->mothurOut("The phylo.diversity command can only be executed after a successful read.tree command.\n");
73 m->mothurOut("The phylo.diversity command parameters are groups, iters, freq and rarefy. No parameters are required.\n");
74 m->mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. The group names are separated by dashes. By default all groups are used.\n");
75 m->mothurOut("The iters parameter allows you to specify the number of randomizations to preform, by default iters=1000, if you set rarefy to true.\n");
76 m->mothurOut("The freq parameter is used indicate when to output your data, by default it is set to 100. But you can set it to a percentage of the number of sequence. For example freq=0.10, means 10%. \n");
77 m->mothurOut("The rarefy parameter allows you to create a rarefaction curve. The default is false.\n");
78 m->mothurOut("The phylo.diversity command should be in the following format: phylo.diversity(groups=yourGroups, rarefy=yourRarefy, iters=yourIters).\n");
79 m->mothurOut("Example phylo.diversity(groups=A-B-C, rarefy=T, iters=500).\n");
80 m->mothurOut("The phylo.diversity command output two files: .phylo.diversity and if rarefy=T, .rarefaction.\n");
81 m->mothurOut("Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n\n");
85 m->errorOut(e, "PhyloDiversityCommand", "help");
90 //**********************************************************************************************************************
92 PhyloDiversityCommand::~PhyloDiversityCommand(){}
94 //**********************************************************************************************************************
96 int PhyloDiversityCommand::execute(){
99 if (abort == true) { return 0; }
101 //incase the user had some mismatches between the tree and group files we don't want group xxx to be analyzed
102 for (int i = 0; i < globaldata->Groups.size(); i++) { if (globaldata->Groups[i] == "xxx") { globaldata->Groups.erase(globaldata->Groups.begin()+i); break; } }
104 vector<string> outputNames;
106 //diversity calculator
107 PhyloDiversity phylo(globaldata->gTreemap);
109 vector<Tree*> trees = globaldata->gTree;
111 //for each of the users trees
112 for(int i = 0; i < trees.size(); i++) {
114 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
116 phylo.setTotalGroupBranchLengths(trees[i]);
118 string outFile = outputDir + getRootName(getSimpleName(globaldata->getTreeFile())) + toString(i+1) + ".phylo.diversity";
119 if (rarefy) { outFile += ".rarefaction"; }
120 outputNames.push_back(outFile);
122 int numLeafNodes = trees[i]->getNumLeaves();
124 //create a vector containing indexes of leaf nodes, randomize it, select nodes to send to calculator
125 vector<int> randomLeaf;
126 for (int j = 0; j < numLeafNodes; j++) {
127 if (inUsersGroups(trees[i]->tree[j].getGroup(), globaldata->Groups) == true) { //is this a node from the group the user selected.
128 randomLeaf.push_back(j);
132 numLeafNodes = randomLeaf.size(); //reset the number of leaf nodes you are using
134 //convert freq percentage to number
136 if (freq < 1.0) { increment = numLeafNodes * freq; }
137 else { increment = freq; }
139 //each group, each sampling, if no rarefy iters = 1;
140 vector< vector<float> > diversity;
141 diversity.resize(globaldata->Groups.size());
143 //initialize sampling spots
144 vector<int> numSampledList;
145 for(int k = 0; k < numLeafNodes; k++){ if((k == 0) || (k+1) % increment == 0){ numSampledList.push_back(k); } }
146 if(numLeafNodes % increment != 0){ numSampledList.push_back(numLeafNodes); }
148 //initialize diversity
149 for (int j = 0; j < diversity.size(); j++) { diversity[j].resize(numSampledList.size(), 0.0); } // 10sampled 20 sampled ...
151 //then for each iter you add to score and then when printing divide by iters to get average
152 for (int l = 0; l < iters; l++) {
153 random_shuffle(randomLeaf.begin(), randomLeaf.end());
155 vector<int> leavesSampled;
158 for(int k = 0; k < numLeafNodes; k++){
160 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
162 leavesSampled.push_back(randomLeaf[k]);
164 if((k == 0) || (k+1) % increment == 0){ //ready to calc?
166 data = phylo.getValues(trees[i], leavesSampled);
168 //datas results are in the same order as globaldatas groups
169 for (int h = 0; h < data.size(); h++) { diversity[h][count] += data[h]; }
175 if(numLeafNodes % increment != 0){
177 data = phylo.getValues(trees[i], leavesSampled);
179 //datas results are in the same order as globaldatas groups
180 for (int h = 0; h < data.size(); h++) { diversity[h][count] += data[h]; }
184 printData(numSampledList, diversity, outFile);
189 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
191 m->mothurOutEndLine();
192 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
193 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
194 m->mothurOutEndLine();
199 catch(exception& e) {
200 m->errorOut(e, "PhyloDiversityCommand", "execute");
204 //**********************************************************************************************************************
206 void PhyloDiversityCommand::printData(vector<int>& num, vector< vector<float> >& div, string file){
209 openOutputFile(file, out);
211 out << "numSampled\t";
212 for (int i = 0; i < globaldata->Groups.size(); i++) { out << globaldata->Groups[i] << '\t'; }
215 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
217 for (int i = 0; i < num.size(); i++) {
218 if (i == (num.size()-1)) { out << num[i] << '\t'; }
219 else { out << (num[i]+1) << '\t'; }
221 for (int j = 0; j < div.size(); j++) {
222 float score = div[j][i] / (float)iters;
223 out << setprecision(6) << score << '\t';
231 catch(exception& e) {
232 m->errorOut(e, "PhyloDiversityCommand", "printData");
237 //**********************************************************************************************************************