]> git.donarmstrong.com Git - mothur.git/blob - phylodiversitycommand.cpp
added phylo.diversity command. added hard parameter to cluster, hcluster and read...
[mothur.git] / phylodiversitycommand.cpp
1 /*
2  *  phylodiversitycommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 4/30/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "phylodiversitycommand.h"
11 #include "phylodiversity.h"
12
13 //**********************************************************************************************************************
14 PhyloDiversityCommand::PhyloDiversityCommand(string option)  {
15         try {
16                 globaldata = GlobalData::getInstance();
17                 abort = false;
18                 
19                 //allow user to run help
20                 if(option == "help") { help(); abort = true; }
21                 
22                 else {
23                         //valid paramters for this command
24                         string Array[] =  {"freq","rarefy","iters","groups","outputdir","inputdir"};
25                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
26                         
27                         OptionParser parser(option);
28                         map<string,string> parameters = parser.getParameters();
29                         
30                         ValidParameters validParameter;
31                 
32                         //check to make sure all parameters are valid for command
33                         for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
34                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
35                         }
36                         
37                         //if the user changes the output directory command factory will send this info to us in the output parameter 
38                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
39                         
40                         if (globaldata->gTree.size() == 0) {//no trees were read
41                                 m->mothurOut("You must execute the read.tree command, before you may execute the phylo.diversity command."); m->mothurOutEndLine(); abort = true;  }
42
43                         string temp;
44                         temp = validParameter.validFile(parameters, "freq", false);                     if (temp == "not found") { temp = "100"; }
45                         convert(temp, freq); 
46                         
47                         temp = validParameter.validFile(parameters, "iters", false);                    if (temp == "not found") { temp = "1000"; }
48                         convert(temp, iters); 
49                         
50                         temp = validParameter.validFile(parameters, "rarefy", false);                   if (temp == "not found") { temp = "F"; }
51                         rarefy = isTrue(temp);
52                         if (!rarefy) { iters = 1;  }
53                         
54                         groups = validParameter.validFile(parameters, "groups", false);                 
55                         if (groups == "not found") { groups = ""; Groups = globaldata->gTreemap->namesOfGroups;  globaldata->Groups = Groups;  }
56                         else { 
57                                 splitAtDash(groups, Groups);
58                                 globaldata->Groups = Groups;
59                         }
60                 }
61                 
62         }
63         catch(exception& e) {
64                 m->errorOut(e, "PhyloDiversityCommand", "PhyloDiversityCommand");
65                 exit(1);
66         }                       
67 }
68 //**********************************************************************************************************************
69
70 void PhyloDiversityCommand::help(){
71         try {
72
73
74         }
75         catch(exception& e) {
76                 m->errorOut(e, "PhyloDiversityCommand", "help");
77                 exit(1);
78         }
79 }
80
81 //**********************************************************************************************************************
82
83 PhyloDiversityCommand::~PhyloDiversityCommand(){}
84
85 //**********************************************************************************************************************
86
87 int PhyloDiversityCommand::execute(){
88         try {
89                 
90                 if (abort == true) { return 0; }
91                 
92                 //incase the user had some mismatches between the tree and group files we don't want group xxx to be analyzed
93                 for (int i = 0; i < globaldata->Groups.size(); i++) { if (globaldata->Groups[i] == "xxx") { globaldata->Groups.erase(globaldata->Groups.begin()+i);  break; }  }
94                  
95                 vector<string> outputNames;
96                 
97                 //diversity calculator
98                 PhyloDiversity phylo(globaldata->gTreemap);
99                 
100                 vector<Tree*> trees = globaldata->gTree;
101                 
102                 //for each of the users trees
103                 for(int i = 0; i < trees.size(); i++) {
104                 
105                         if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str());         } return 0; }
106                         
107                         phylo.setTotalGroupBranchLengths(trees[i]);
108                         
109                         string outFile = outputDir + getRootName(getSimpleName(globaldata->getTreeFile()))  + toString(i+1) + ".phylo.diversity";
110                         if (rarefy) { outFile += ".rarefaction"; }
111                         outputNames.push_back(outFile);
112                         
113                         int numLeafNodes = trees[i]->getNumLeaves();
114                         
115                         //create a vector containing indexes of leaf nodes, randomize it, select nodes to send to calculator
116                         vector<int> randomLeaf;
117                         for (int j = 0; j < numLeafNodes; j++) {  
118                                 if (inUsersGroups(trees[i]->tree[j].getGroup(), globaldata->Groups) == true) { //is this a node from the group the user selected.
119                                         randomLeaf.push_back(j); 
120                                 }
121                         }
122                         
123                         numLeafNodes = randomLeaf.size();  //reset the number of leaf nodes you are using 
124                         
125                         //each group, each sampling, if no rarefy iters = 1;
126                         vector< vector<float> > diversity;
127                         diversity.resize(globaldata->Groups.size());
128                         
129                         //initialize sampling spots
130                         vector<int> numSampledList;
131                         for(int k = 0; k < numLeafNodes; k++){  if((k == 0) || (k+1) % freq == 0){  numSampledList.push_back(k); }   }
132                         if(numLeafNodes % freq != 0){   numSampledList.push_back(numLeafNodes);   }
133                         
134                         //initialize diversity
135                         for (int j = 0; j < diversity.size(); j++) {   diversity[j].resize(numSampledList.size(), 0.0);  }  //                  10sampled       20 sampled ...
136                                                                                                                                                                                                                                 //groupA                0.0                     0.0
137                                                                                                                                                                                                                         //then for each iter you add to score and then when printing divide by iters to get average
138                         for (int l = 0; l < iters; l++) {
139                                 random_shuffle(randomLeaf.begin(), randomLeaf.end());
140                 
141                                 vector<int> leavesSampled;
142                                 EstOutput data;
143                                 int count = 0;
144                                 for(int k = 0; k < numLeafNodes; k++){
145                                                 
146                                         if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str());         } return 0; }
147                                         
148                                         leavesSampled.push_back(randomLeaf[k]);
149                                                 
150                                         if((k == 0) || (k+1) % freq == 0){ //ready to calc?
151                                                 
152                                                 data = phylo.getValues(trees[i], leavesSampled);
153                                                 
154                                                 //datas results are in the same order as globaldatas groups
155                                                 for (int h = 0; h < data.size(); h++) {  diversity[h][count] += data[h];  }
156                                                 
157                                                 count++;
158                                         }
159                                 }
160                 
161                                 if(numLeafNodes % freq != 0){   
162                                         
163                                         data = phylo.getValues(trees[i], leavesSampled);
164                                         
165                                         //datas results are in the same order as globaldatas groups
166                                         for (int h = 0; h < data.size(); h++) {  diversity[h][count] += data[h];  }
167                                 }
168                         }
169                         
170                         printData(numSampledList, diversity, outFile);
171
172                 }
173                 
174         
175                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str());         } return 0; }
176
177                 m->mothurOutEndLine();
178                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
179                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
180                 m->mothurOutEndLine();
181
182                 
183                 return 0;
184         }
185         catch(exception& e) {
186                 m->errorOut(e, "PhyloDiversityCommand", "execute");
187                 exit(1);
188         }
189 }
190 //**********************************************************************************************************************
191
192 void PhyloDiversityCommand::printData(vector<int>& num, vector< vector<float> >& div, string file){
193         try {
194                 ofstream out;
195                 openOutputFile(file, out);
196                 
197                 out << "numSampled\t";
198                 for (int i = 0; i < globaldata->Groups.size(); i++) { out << globaldata->Groups[i] << '\t';  }
199                 out << endl;
200                 
201                 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
202                 
203                 for (int i = 0; i < num.size(); i++) {  
204                         if (i == (num.size()-1)) {  out << num[i] << '\t';  }
205                         else {  out << (num[i]+1) << '\t';  }
206                         
207                         for (int j = 0; j < div.size(); j++) {
208                                 float score = div[j][i] / (float)iters;
209                                 out << setprecision(6) << score << '\t';
210                         }
211                         out << endl;
212                 }
213                 
214                 out.close();
215                 
216         }
217         catch(exception& e) {
218                 m->errorOut(e, "PhyloDiversityCommand", "printData");
219                 exit(1);
220         }
221 }
222
223 //**********************************************************************************************************************