]> git.donarmstrong.com Git - mothur.git/blob - phylodiversitycommand.cpp
1.10.0
[mothur.git] / phylodiversitycommand.cpp
1 /*
2  *  phylodiversitycommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 4/30/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "phylodiversitycommand.h"
11 #include "phylodiversity.h"
12
13 //**********************************************************************************************************************
14 PhyloDiversityCommand::PhyloDiversityCommand(string option)  {
15         try {
16                 globaldata = GlobalData::getInstance();
17                 abort = false;
18                 
19                 //allow user to run help
20                 if(option == "help") { help(); abort = true; }
21                 
22                 else {
23                         //valid paramters for this command
24                         string Array[] =  {"freq","rarefy","iters","groups","outputdir","inputdir"};
25                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
26                         
27                         OptionParser parser(option);
28                         map<string,string> parameters = parser.getParameters();
29                         
30                         ValidParameters validParameter;
31                 
32                         //check to make sure all parameters are valid for command
33                         for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
34                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
35                         }
36                         
37                         //if the user changes the output directory command factory will send this info to us in the output parameter 
38                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
39                         
40                         if (globaldata->gTree.size() == 0) {//no trees were read
41                                 m->mothurOut("You must execute the read.tree command, before you may execute the phylo.diversity command."); m->mothurOutEndLine(); abort = true;  }
42
43                         string temp;
44                         temp = validParameter.validFile(parameters, "freq", false);                     if (temp == "not found") { temp = "100"; }
45                         convert(temp, freq); 
46                         
47                         temp = validParameter.validFile(parameters, "iters", false);                    if (temp == "not found") { temp = "1000"; }
48                         convert(temp, iters); 
49                         
50                         temp = validParameter.validFile(parameters, "rarefy", false);                   if (temp == "not found") { temp = "F"; }
51                         rarefy = isTrue(temp);
52                         if (!rarefy) { iters = 1;  }
53                         
54                         groups = validParameter.validFile(parameters, "groups", false);                 
55                         if (groups == "not found") { groups = ""; Groups = globaldata->gTreemap->namesOfGroups;  globaldata->Groups = Groups;  }
56                         else { 
57                                 splitAtDash(groups, Groups);
58                                 globaldata->Groups = Groups;
59                         }
60                 }
61                 
62         }
63         catch(exception& e) {
64                 m->errorOut(e, "PhyloDiversityCommand", "PhyloDiversityCommand");
65                 exit(1);
66         }                       
67 }
68 //**********************************************************************************************************************
69
70 void PhyloDiversityCommand::help(){
71         try {
72                 m->mothurOut("The phylo.diversity command can only be executed after a successful read.tree command.\n");
73                 m->mothurOut("The phylo.diversity command parameters are groups, iters, freq and rarefy.  No parameters are required.\n");
74                 m->mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. The group names are separated by dashes. By default all groups are used.\n");
75                 m->mothurOut("The iters parameter allows you to specify the number of randomizations to preform, by default iters=1000, if you set rarefy to true.\n");
76                 m->mothurOut("The freq parameter is used indicate when to output your data, by default it is set to 100. But you can set it to a percentage of the number of sequence. For example freq=0.10, means 10%. \n");
77                 m->mothurOut("The rarefy parameter allows you to create a rarefaction curve. The default is false.\n");
78                 m->mothurOut("The phylo.diversity command should be in the following format: phylo.diversity(groups=yourGroups, rarefy=yourRarefy, iters=yourIters).\n");
79                 m->mothurOut("Example phylo.diversity(groups=A-B-C, rarefy=T, iters=500).\n");
80                 m->mothurOut("The phylo.diversity command output two files: .phylo.diversity and if rarefy=T, .rarefaction.\n");
81                 m->mothurOut("Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n\n");
82
83         }
84         catch(exception& e) {
85                 m->errorOut(e, "PhyloDiversityCommand", "help");
86                 exit(1);
87         }
88 }
89
90 //**********************************************************************************************************************
91
92 PhyloDiversityCommand::~PhyloDiversityCommand(){}
93
94 //**********************************************************************************************************************
95
96 int PhyloDiversityCommand::execute(){
97         try {
98                 
99                 if (abort == true) { return 0; }
100                 
101                 //incase the user had some mismatches between the tree and group files we don't want group xxx to be analyzed
102                 for (int i = 0; i < globaldata->Groups.size(); i++) { if (globaldata->Groups[i] == "xxx") { globaldata->Groups.erase(globaldata->Groups.begin()+i);  break; }  }
103                  
104                 vector<string> outputNames;
105                 
106                 //diversity calculator
107                 PhyloDiversity phylo(globaldata->gTreemap);
108                 
109                 vector<Tree*> trees = globaldata->gTree;
110                 
111                 //for each of the users trees
112                 for(int i = 0; i < trees.size(); i++) {
113                 
114                         if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str());         } return 0; }
115                         
116                         phylo.setTotalGroupBranchLengths(trees[i]);
117                         
118                         string outFile = outputDir + getRootName(getSimpleName(globaldata->getTreeFile()))  + toString(i+1) + ".phylo.diversity";
119                         if (rarefy) { outFile += ".rarefaction"; }
120                         outputNames.push_back(outFile);
121                         
122                         int numLeafNodes = trees[i]->getNumLeaves();
123                         
124                         //create a vector containing indexes of leaf nodes, randomize it, select nodes to send to calculator
125                         vector<int> randomLeaf;
126                         for (int j = 0; j < numLeafNodes; j++) {  
127                                 if (inUsersGroups(trees[i]->tree[j].getGroup(), globaldata->Groups) == true) { //is this a node from the group the user selected.
128                                         randomLeaf.push_back(j); 
129                                 }
130                         }
131                         
132                         numLeafNodes = randomLeaf.size();  //reset the number of leaf nodes you are using 
133                         
134                         //convert freq percentage to number
135                         int increment = 100;
136                         if (freq < 1.0) {  increment = numLeafNodes * freq;  }
137                         else { increment = freq;  }
138                         
139                         //each group, each sampling, if no rarefy iters = 1;
140                         vector< vector<float> > diversity;
141                         diversity.resize(globaldata->Groups.size());
142                         
143                         //initialize sampling spots
144                         vector<int> numSampledList;
145                         for(int k = 0; k < numLeafNodes; k++){  if((k == 0) || (k+1) % increment == 0){  numSampledList.push_back(k); }   }
146                         if(numLeafNodes % increment != 0){      numSampledList.push_back(numLeafNodes);   }
147                         
148                         //initialize diversity
149                         for (int j = 0; j < diversity.size(); j++) {   diversity[j].resize(numSampledList.size(), 0.0);  }  //                  10sampled       20 sampled ...
150                                                                                                                                                                                                                                 //groupA                0.0                     0.0
151                                                                                                                                                                                                                         //then for each iter you add to score and then when printing divide by iters to get average
152                         for (int l = 0; l < iters; l++) {
153                                 random_shuffle(randomLeaf.begin(), randomLeaf.end());
154                 
155                                 vector<int> leavesSampled;
156                                 EstOutput data;
157                                 int count = 0;
158                                 for(int k = 0; k < numLeafNodes; k++){
159                                                 
160                                         if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str());         } return 0; }
161                                         
162                                         leavesSampled.push_back(randomLeaf[k]);
163                                                 
164                                         if((k == 0) || (k+1) % increment == 0){ //ready to calc?
165                                                 
166                                                 data = phylo.getValues(trees[i], leavesSampled);
167                                                 
168                                                 //datas results are in the same order as globaldatas groups
169                                                 for (int h = 0; h < data.size(); h++) {  diversity[h][count] += data[h];  }
170                                                 
171                                                 count++;
172                                         }
173                                 }
174                 
175                                 if(numLeafNodes % increment != 0){      
176                                         
177                                         data = phylo.getValues(trees[i], leavesSampled);
178                                         
179                                         //datas results are in the same order as globaldatas groups
180                                         for (int h = 0; h < data.size(); h++) {  diversity[h][count] += data[h];  }
181                                 }
182                         }
183                         
184                         printData(numSampledList, diversity, outFile);
185
186                 }
187                 
188         
189                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str());         } return 0; }
190
191                 m->mothurOutEndLine();
192                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
193                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
194                 m->mothurOutEndLine();
195
196                 
197                 return 0;
198         }
199         catch(exception& e) {
200                 m->errorOut(e, "PhyloDiversityCommand", "execute");
201                 exit(1);
202         }
203 }
204 //**********************************************************************************************************************
205
206 void PhyloDiversityCommand::printData(vector<int>& num, vector< vector<float> >& div, string file){
207         try {
208                 ofstream out;
209                 openOutputFile(file, out);
210                 
211                 out << "numSampled\t";
212                 for (int i = 0; i < globaldata->Groups.size(); i++) { out << globaldata->Groups[i] << '\t';  }
213                 out << endl;
214                 
215                 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
216                 
217                 for (int i = 0; i < num.size(); i++) {  
218                         if (i == (num.size()-1)) {  out << num[i] << '\t';  }
219                         else {  out << (num[i]+1) << '\t';  }
220                         
221                         for (int j = 0; j < div.size(); j++) {
222                                 float score = div[j][i] / (float)iters;
223                                 out << setprecision(6) << score << '\t';
224                         }
225                         out << endl;
226                 }
227                 
228                 out.close();
229                 
230         }
231         catch(exception& e) {
232                 m->errorOut(e, "PhyloDiversityCommand", "printData");
233                 exit(1);
234         }
235 }
236
237 //**********************************************************************************************************************