]> git.donarmstrong.com Git - mothur.git/blob - phylodiversitycommand.cpp
modified freq parameter be a percentage of numSeqs, added catchall command - not...
[mothur.git] / phylodiversitycommand.cpp
1 /*
2  *  phylodiversitycommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 4/30/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "phylodiversitycommand.h"
11 #include "phylodiversity.h"
12
13 //**********************************************************************************************************************
14 PhyloDiversityCommand::PhyloDiversityCommand(string option)  {
15         try {
16                 globaldata = GlobalData::getInstance();
17                 abort = false;
18                 
19                 //allow user to run help
20                 if(option == "help") { help(); abort = true; }
21                 
22                 else {
23                         //valid paramters for this command
24                         string Array[] =  {"freq","rarefy","iters","groups","outputdir","inputdir"};
25                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
26                         
27                         OptionParser parser(option);
28                         map<string,string> parameters = parser.getParameters();
29                         
30                         ValidParameters validParameter;
31                 
32                         //check to make sure all parameters are valid for command
33                         for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
34                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
35                         }
36                         
37                         //if the user changes the output directory command factory will send this info to us in the output parameter 
38                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
39                         
40                         if (globaldata->gTree.size() == 0) {//no trees were read
41                                 m->mothurOut("You must execute the read.tree command, before you may execute the phylo.diversity command."); m->mothurOutEndLine(); abort = true;  }
42
43                         string temp;
44                         temp = validParameter.validFile(parameters, "freq", false);                     if (temp == "not found") { temp = "0.10"; }
45                         convert(temp, freq); 
46                         
47                         temp = validParameter.validFile(parameters, "iters", false);                    if (temp == "not found") { temp = "1000"; }
48                         convert(temp, iters); 
49                         
50                         temp = validParameter.validFile(parameters, "rarefy", false);                   if (temp == "not found") { temp = "F"; }
51                         rarefy = isTrue(temp);
52                         if (!rarefy) { iters = 1;  }
53                         
54                         groups = validParameter.validFile(parameters, "groups", false);                 
55                         if (groups == "not found") { groups = ""; Groups = globaldata->gTreemap->namesOfGroups;  globaldata->Groups = Groups;  }
56                         else { 
57                                 splitAtDash(groups, Groups);
58                                 globaldata->Groups = Groups;
59                         }
60                 }
61                 
62         }
63         catch(exception& e) {
64                 m->errorOut(e, "PhyloDiversityCommand", "PhyloDiversityCommand");
65                 exit(1);
66         }                       
67 }
68 //**********************************************************************************************************************
69
70 void PhyloDiversityCommand::help(){
71         try {
72                 m->mothurOut("The phylo.diversity command can only be executed after a successful read.tree command.\n");
73                 m->mothurOut("The phylo.diversity command parameters are groups, iters, freq and rarefy.  No parameters are required.\n");
74                 m->mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. The group names are separated by dashes. By default all groups are used.\n");
75                 m->mothurOut("The iters parameter allows you to specify the number of randomizations to preform, by default iters=1000, if you set rarefy to true.\n");
76                 m->mothurOut("The freq parameter is used indicate when to output your data.  It is a percentage of the number of sequences.  By default it is set to 0.10, meaning 10%. \n");
77                 m->mothurOut("The rarefy parameter allows you to create a rarefaction curve. The default is false.\n");
78                 m->mothurOut("The phylo.diversity command should be in the following format: phylo.diversity(groups=yourGroups, rarefy=yourRarefy, iters=yourIters).\n");
79                 m->mothurOut("Example phylo.diversity(groups=A-B-C, rarefy=T, iters=500).\n");
80                 m->mothurOut("The phylo.diversity command output two files: .phylo.diversity and if rarefy=T, .rarefaction.\n");
81                 m->mothurOut("Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n\n");
82
83         }
84         catch(exception& e) {
85                 m->errorOut(e, "PhyloDiversityCommand", "help");
86                 exit(1);
87         }
88 }
89
90 //**********************************************************************************************************************
91
92 PhyloDiversityCommand::~PhyloDiversityCommand(){}
93
94 //**********************************************************************************************************************
95
96 int PhyloDiversityCommand::execute(){
97         try {
98                 
99                 if (abort == true) { return 0; }
100                 
101                 //incase the user had some mismatches between the tree and group files we don't want group xxx to be analyzed
102                 for (int i = 0; i < globaldata->Groups.size(); i++) { if (globaldata->Groups[i] == "xxx") { globaldata->Groups.erase(globaldata->Groups.begin()+i);  break; }  }
103                  
104                 vector<string> outputNames;
105                 
106                 //diversity calculator
107                 PhyloDiversity phylo(globaldata->gTreemap);
108                 
109                 vector<Tree*> trees = globaldata->gTree;
110                 
111                 //for each of the users trees
112                 for(int i = 0; i < trees.size(); i++) {
113                 
114                         if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str());         } return 0; }
115                         
116                         phylo.setTotalGroupBranchLengths(trees[i]);
117                         
118                         string outFile = outputDir + getRootName(getSimpleName(globaldata->getTreeFile()))  + toString(i+1) + ".phylo.diversity";
119                         if (rarefy) { outFile += ".rarefaction"; }
120                         outputNames.push_back(outFile);
121                         
122                         int numLeafNodes = trees[i]->getNumLeaves();
123                         
124                         //create a vector containing indexes of leaf nodes, randomize it, select nodes to send to calculator
125                         vector<int> randomLeaf;
126                         for (int j = 0; j < numLeafNodes; j++) {  
127                                 if (inUsersGroups(trees[i]->tree[j].getGroup(), globaldata->Groups) == true) { //is this a node from the group the user selected.
128                                         randomLeaf.push_back(j); 
129                                 }
130                         }
131                         
132                         numLeafNodes = randomLeaf.size();  //reset the number of leaf nodes you are using 
133                         
134                         //convert freq percentage to number
135                         int increment = numLeafNodes * freq;
136                         
137                         //each group, each sampling, if no rarefy iters = 1;
138                         vector< vector<float> > diversity;
139                         diversity.resize(globaldata->Groups.size());
140                         
141                         //initialize sampling spots
142                         vector<int> numSampledList;
143                         for(int k = 0; k < numLeafNodes; k++){  if((k == 0) || (k+1) % increment == 0){  numSampledList.push_back(k); }   }
144                         if(numLeafNodes % increment != 0){      numSampledList.push_back(numLeafNodes);   }
145                         
146                         //initialize diversity
147                         for (int j = 0; j < diversity.size(); j++) {   diversity[j].resize(numSampledList.size(), 0.0);  }  //                  10sampled       20 sampled ...
148                                                                                                                                                                                                                                 //groupA                0.0                     0.0
149                                                                                                                                                                                                                         //then for each iter you add to score and then when printing divide by iters to get average
150                         for (int l = 0; l < iters; l++) {
151                                 random_shuffle(randomLeaf.begin(), randomLeaf.end());
152                 
153                                 vector<int> leavesSampled;
154                                 EstOutput data;
155                                 int count = 0;
156                                 for(int k = 0; k < numLeafNodes; k++){
157                                                 
158                                         if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str());         } return 0; }
159                                         
160                                         leavesSampled.push_back(randomLeaf[k]);
161                                                 
162                                         if((k == 0) || (k+1) % increment == 0){ //ready to calc?
163                                                 
164                                                 data = phylo.getValues(trees[i], leavesSampled);
165                                                 
166                                                 //datas results are in the same order as globaldatas groups
167                                                 for (int h = 0; h < data.size(); h++) {  diversity[h][count] += data[h];  }
168                                                 
169                                                 count++;
170                                         }
171                                 }
172                 
173                                 if(numLeafNodes % increment != 0){      
174                                         
175                                         data = phylo.getValues(trees[i], leavesSampled);
176                                         
177                                         //datas results are in the same order as globaldatas groups
178                                         for (int h = 0; h < data.size(); h++) {  diversity[h][count] += data[h];  }
179                                 }
180                         }
181                         
182                         printData(numSampledList, diversity, outFile);
183
184                 }
185                 
186         
187                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str());         } return 0; }
188
189                 m->mothurOutEndLine();
190                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
191                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
192                 m->mothurOutEndLine();
193
194                 
195                 return 0;
196         }
197         catch(exception& e) {
198                 m->errorOut(e, "PhyloDiversityCommand", "execute");
199                 exit(1);
200         }
201 }
202 //**********************************************************************************************************************
203
204 void PhyloDiversityCommand::printData(vector<int>& num, vector< vector<float> >& div, string file){
205         try {
206                 ofstream out;
207                 openOutputFile(file, out);
208                 
209                 out << "numSampled\t";
210                 for (int i = 0; i < globaldata->Groups.size(); i++) { out << globaldata->Groups[i] << '\t';  }
211                 out << endl;
212                 
213                 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
214                 
215                 for (int i = 0; i < num.size(); i++) {  
216                         if (i == (num.size()-1)) {  out << num[i] << '\t';  }
217                         else {  out << (num[i]+1) << '\t';  }
218                         
219                         for (int j = 0; j < div.size(); j++) {
220                                 float score = div[j][i] / (float)iters;
221                                 out << setprecision(6) << score << '\t';
222                         }
223                         out << endl;
224                 }
225                 
226                 out.close();
227                 
228         }
229         catch(exception& e) {
230                 m->errorOut(e, "PhyloDiversityCommand", "printData");
231                 exit(1);
232         }
233 }
234
235 //**********************************************************************************************************************