2 * phylodiversitycommand.cpp
5 * Created by westcott on 4/30/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "phylodiversitycommand.h"
11 #include "phylodiversity.h"
13 //**********************************************************************************************************************
14 PhyloDiversityCommand::PhyloDiversityCommand(string option) {
16 globaldata = GlobalData::getInstance();
19 //allow user to run help
20 if(option == "help") { help(); abort = true; }
23 //valid paramters for this command
24 string Array[] = {"freq","rarefy","iters","groups","outputdir","inputdir"};
25 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
27 OptionParser parser(option);
28 map<string,string> parameters = parser.getParameters();
30 ValidParameters validParameter;
32 //check to make sure all parameters are valid for command
33 for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
34 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
37 //if the user changes the output directory command factory will send this info to us in the output parameter
38 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
40 if (globaldata->gTree.size() == 0) {//no trees were read
41 m->mothurOut("You must execute the read.tree command, before you may execute the phylo.diversity command."); m->mothurOutEndLine(); abort = true; }
44 temp = validParameter.validFile(parameters, "freq", false); if (temp == "not found") { temp = "100"; }
47 temp = validParameter.validFile(parameters, "iters", false); if (temp == "not found") { temp = "1000"; }
50 temp = validParameter.validFile(parameters, "rarefy", false); if (temp == "not found") { temp = "F"; }
51 rarefy = isTrue(temp);
52 if (!rarefy) { iters = 1; }
54 groups = validParameter.validFile(parameters, "groups", false);
55 if (groups == "not found") { groups = ""; Groups = globaldata->gTreemap->namesOfGroups; globaldata->Groups = Groups; }
57 splitAtDash(groups, Groups);
58 globaldata->Groups = Groups;
64 m->errorOut(e, "PhyloDiversityCommand", "PhyloDiversityCommand");
68 //**********************************************************************************************************************
70 void PhyloDiversityCommand::help(){
76 m->errorOut(e, "PhyloDiversityCommand", "help");
81 //**********************************************************************************************************************
83 PhyloDiversityCommand::~PhyloDiversityCommand(){}
85 //**********************************************************************************************************************
87 int PhyloDiversityCommand::execute(){
90 if (abort == true) { return 0; }
92 //incase the user had some mismatches between the tree and group files we don't want group xxx to be analyzed
93 for (int i = 0; i < globaldata->Groups.size(); i++) { if (globaldata->Groups[i] == "xxx") { globaldata->Groups.erase(globaldata->Groups.begin()+i); break; } }
95 vector<string> outputNames;
97 //diversity calculator
98 PhyloDiversity phylo(globaldata->gTreemap);
100 vector<Tree*> trees = globaldata->gTree;
102 //for each of the users trees
103 for(int i = 0; i < trees.size(); i++) {
105 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
107 phylo.setTotalGroupBranchLengths(trees[i]);
109 string outFile = outputDir + getRootName(getSimpleName(globaldata->getTreeFile())) + toString(i+1) + ".phylo.diversity";
110 if (rarefy) { outFile += ".rarefaction"; }
111 outputNames.push_back(outFile);
113 int numLeafNodes = trees[i]->getNumLeaves();
115 //create a vector containing indexes of leaf nodes, randomize it, select nodes to send to calculator
116 vector<int> randomLeaf;
117 for (int j = 0; j < numLeafNodes; j++) {
118 if (inUsersGroups(trees[i]->tree[j].getGroup(), globaldata->Groups) == true) { //is this a node from the group the user selected.
119 randomLeaf.push_back(j);
123 numLeafNodes = randomLeaf.size(); //reset the number of leaf nodes you are using
125 //each group, each sampling, if no rarefy iters = 1;
126 vector< vector<float> > diversity;
127 diversity.resize(globaldata->Groups.size());
129 //initialize sampling spots
130 vector<int> numSampledList;
131 for(int k = 0; k < numLeafNodes; k++){ if((k == 0) || (k+1) % freq == 0){ numSampledList.push_back(k); } }
132 if(numLeafNodes % freq != 0){ numSampledList.push_back(numLeafNodes); }
134 //initialize diversity
135 for (int j = 0; j < diversity.size(); j++) { diversity[j].resize(numSampledList.size(), 0.0); } // 10sampled 20 sampled ...
137 //then for each iter you add to score and then when printing divide by iters to get average
138 for (int l = 0; l < iters; l++) {
139 random_shuffle(randomLeaf.begin(), randomLeaf.end());
141 vector<int> leavesSampled;
144 for(int k = 0; k < numLeafNodes; k++){
146 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
148 leavesSampled.push_back(randomLeaf[k]);
150 if((k == 0) || (k+1) % freq == 0){ //ready to calc?
152 data = phylo.getValues(trees[i], leavesSampled);
154 //datas results are in the same order as globaldatas groups
155 for (int h = 0; h < data.size(); h++) { diversity[h][count] += data[h]; }
161 if(numLeafNodes % freq != 0){
163 data = phylo.getValues(trees[i], leavesSampled);
165 //datas results are in the same order as globaldatas groups
166 for (int h = 0; h < data.size(); h++) { diversity[h][count] += data[h]; }
170 printData(numSampledList, diversity, outFile);
175 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
177 m->mothurOutEndLine();
178 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
179 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
180 m->mothurOutEndLine();
185 catch(exception& e) {
186 m->errorOut(e, "PhyloDiversityCommand", "execute");
190 //**********************************************************************************************************************
192 void PhyloDiversityCommand::printData(vector<int>& num, vector< vector<float> >& div, string file){
195 openOutputFile(file, out);
197 out << "numSampled\t";
198 for (int i = 0; i < globaldata->Groups.size(); i++) { out << globaldata->Groups[i] << '\t'; }
201 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
203 for (int i = 0; i < num.size(); i++) {
204 if (i == (num.size()-1)) { out << num[i] << '\t'; }
205 else { out << (num[i]+1) << '\t'; }
207 for (int j = 0; j < div.size(); j++) {
208 float score = div[j][i] / (float)iters;
209 out << setprecision(6) << score << '\t';
217 catch(exception& e) {
218 m->errorOut(e, "PhyloDiversityCommand", "printData");
223 //**********************************************************************************************************************