]> git.donarmstrong.com Git - mothur.git/blob - unifracunweightedcommand.cpp
fixed bug in read.tree updates help and validparameters strings, added ability for...
[mothur.git] / unifracunweightedcommand.cpp
1 /*
2  *  unifracunweightedcommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 2/9/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "unifracunweightedcommand.h"
11
12 /***********************************************************/
13 UnifracUnweightedCommand::UnifracUnweightedCommand() {
14         try {
15                 globaldata = GlobalData::getInstance();
16                 
17                 T = globaldata->gTree;
18                 tmap = globaldata->gTreemap;
19                 sumFile = globaldata->getTreeFile() + ".uwsummary";
20                 openOutputFile(sumFile, outSum);
21
22                 setGroups(); //sets users groups to analyze
23                 convert(globaldata->getIters(), iters);  //how many random trees to generate
24                 unweighted = new Unweighted(tmap);
25
26         }
27         catch(exception& e) {
28                 cout << "Standard Error: " << e.what() << " has occurred in the UnifracUnweightedCommand class Function UnifracUnweightedCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
29                 exit(1);
30         }
31         catch(...) {
32                 cout << "An unknown error has occurred in the UnifracUnweightedCommand class function UnifracUnweightedCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
33                 exit(1);
34         }
35 }
36 /***********************************************************/
37 int UnifracUnweightedCommand::execute() {
38         try {
39
40                 userData.resize(numComp,0);  //data[0] = unweightedscore 
41                 randomData.resize(numComp,0); //data[0] = unweightedscore
42                 //create new tree with same num nodes and leaves as users
43                 
44                 outSum << "Tree#" << '\t' << "Groups" << '\t'  <<  "UWScore" <<'\t' << "UWSig" <<  endl;
45                 cout << "Tree#" << '\t' << "Groups" << '\t'  <<  "UWScore" << '\t' << "UWSig" <<  endl;
46                 
47                 //get pscores for users trees
48                 for (int i = 0; i < T.size(); i++) {
49                         counter = 0;
50                         unweightedFile = globaldata->getTreeFile()  + toString(i+1) + ".unweighted";
51                         unweightedFileout = globaldata->getTreeFile() + "temp." + toString(i+1) + ".unweighted";
52                         
53                         outSum << i+1 << '\t';
54                         cout << i+1 << '\t';
55                         
56                         //get unweighted for users tree
57                         rscoreFreq.resize(numComp);  
58                         rCumul.resize(numComp);  
59                         utreeScores.resize(numComp);  
60                         UWScoreSig.resize(numComp); 
61
62                         userData = unweighted->getValues(T[i]);  //userData[0] = unweightedscore
63                         
64                         //output scores for each combination
65                         for(int k = 0; k < numComp; k++) {
66                                 //saves users score
67                                 utreeScores[k].push_back(userData[k]);
68
69                         }
70                         
71                         //get unweighted scores for random trees
72                         for (int j = 0; j < iters; j++) {
73                                 //we need a different getValues because when we swap the labels we only want to swap those in each parwise comparison
74                                 randomData = unweighted->getValues(T[i], "", "");
75                                 
76                                 for(int k = 0; k < numComp; k++) {      
77                                         //add trees unweighted score to map of scores
78                                         it2 = rscoreFreq[k].find(randomData[k]);
79                                         if (it2 != rscoreFreq[k].end()) {//already have that score
80                                                 rscoreFreq[k][randomData[k]]++;
81                                         }else{//first time we have seen this score
82                                                 rscoreFreq[k][randomData[k]] = 1;
83                                         }
84                                 
85                                         //add randoms score to validscores
86                                         validScores[randomData[k]] = randomData[k];
87                                 }
88                                 
89                         }
90                 
91                 for(int a = 0; a < numComp; a++) {
92                         float rcumul = 1.0000;
93                         //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print.
94                         for (it = validScores.begin(); it != validScores.end(); it++) { 
95                                 //make rscoreFreq map and rCumul
96                                 it2 = rscoreFreq[a].find(it->first);
97                                 rCumul[a][it->first] = rcumul;
98                                 //get percentage of random trees with that info
99                                 if (it2 != rscoreFreq[a].end()) {  rscoreFreq[a][it->first] /= iters; rcumul-= it2->second;  }
100                                 else { rscoreFreq[a][it->first] = 0.0000; } //no random trees with that score
101                         }
102                         UWScoreSig[a].push_back(rCumul[a][userData[a]]);
103                 }
104                 
105                 
106                 
107                 printUnweightedFile();
108                 printUWSummaryFile();
109                 
110                 rscoreFreq.clear(); 
111                 rCumul.clear();  
112                 validScores.clear(); 
113                 utreeScores.clear();  
114                 UWScoreSig.clear(); 
115         }
116                 //reset groups parameter
117                 globaldata->Groups.clear(); globaldata->setGroups("");
118                 outSum.close();
119                 
120                 return 0;
121                 
122         }
123         catch(exception& e) {
124                 cout << "Standard Error: " << e.what() << " has occurred in the UnifracUnweightedCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
125                 exit(1);
126         }
127         catch(...) {
128                 cout << "An unknown error has occurred in the UnifracUnweightedCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
129                 exit(1);
130         }
131 }
132 /***********************************************************/
133 void UnifracUnweightedCommand::printUnweightedFile() {
134         try {
135                 vector<double> data;
136                 
137                 for(int a = 0; a < numComp; a++) {
138                         initFile(groupComb[a]);
139                         //print each line
140                         for (it = validScores.begin(); it != validScores.end(); it++) { 
141                                 data.push_back(it->first);  data.push_back(rscoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]); 
142                                 output(data);
143                                 data.clear();
144                         } 
145                         resetFile();
146                 }
147                 
148                 out.close();
149                 inFile.close();
150                 remove(unweightedFileout.c_str());
151                 
152         }
153         catch(exception& e) {
154                 cout << "Standard Error: " << e.what() << " has occurred in the UnifracUnweightedCommand class Function printUnweightedFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
155                 exit(1);
156         }
157         catch(...) {
158                 cout << "An unknown error has occurred in the UnifracUnweightedCommand class function printUnweightedFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
159                 exit(1);
160         }
161 }
162
163 /***********************************************************/
164 void UnifracUnweightedCommand::printUWSummaryFile() {
165         try {
166                                 
167                 //format output
168                 outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint);
169                         
170                 //print each line
171
172                 for(int a = 0; a < numComp; a++) {
173                         if (UWScoreSig[a][0] > (1/(float)iters)) {
174                                 outSum << setprecision(6) << groupComb[a]  << '\t' << utreeScores[a][0] << '\t' << setprecision(globaldata->getIters().length()) << UWScoreSig[a][0] << endl;
175                                 cout << setprecision(6)  << groupComb[a]  << '\t' << utreeScores[a][0] << '\t' << setprecision(globaldata->getIters().length()) << UWScoreSig[a][0] << endl; 
176                         }else {
177                                 outSum << setprecision(6) << groupComb[a]  << '\t' << utreeScores[a][0] << '\t' << setprecision(globaldata->getIters().length()) << "<" << (1/float(iters)) << endl;
178                                 cout << setprecision(6)  << groupComb[a]  << '\t' << utreeScores[a][0] << '\t' << setprecision(globaldata->getIters().length()) << "<" << (1/float(iters)) << endl; 
179                         }
180                 }
181                 
182         }
183         catch(exception& e) {
184                 cout << "Standard Error: " << e.what() << " has occurred in the UnifracUnweightedCommand class Function printUWSummaryFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
185                 exit(1);
186         }
187         catch(...) {
188                 cout << "An unknown error has occurred in the UnifracUnweightedCommand class function printUWSummaryFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
189                 exit(1);
190         }
191 }
192 /***********************************************************/
193
194 void UnifracUnweightedCommand::setGroups() {
195         try {
196                 string allGroups = "";
197                 numGroups = 0;
198                 //if the user has not entered specific groups to analyze then do them all
199                 if (globaldata->Groups.size() != 0) {
200                         if (globaldata->Groups[0] != "all") {
201                                 //check that groups are valid
202                                 for (int i = 0; i < globaldata->Groups.size(); i++) {
203                                         if (tmap->isValidGroup(globaldata->Groups[i]) != true) {
204                                                 cout << globaldata->Groups[i] << " is not a valid group, and will be disregarded." << endl;
205                                                 // erase the invalid group from globaldata->Groups
206                                                 globaldata->Groups.erase(globaldata->Groups.begin()+i);
207                                         }
208                                 }
209                         
210                                 //if the user only entered invalid groups
211                                 if (globaldata->Groups.size() == 0) { 
212                                         cout << "When using the groups parameter you must have at least 1 valid group. I will run the command using all the groups in your groupfile." << endl; 
213                                         for (int i = 0; i < tmap->namesOfGroups.size(); i++) {
214                                                 if (tmap->namesOfGroups[i] != "xxx") {
215                                                         globaldata->Groups.push_back(tmap->namesOfGroups[i]);
216                                                         numGroups++;
217                                                         allGroups += tmap->namesOfGroups[i] + "-";
218                                                 }
219                                         }
220                                         allGroups = allGroups.substr(0, allGroups.length()-1);
221                                 }else {
222                                         for (int i = 0; i < globaldata->Groups.size(); i++) {
223                                                 allGroups += globaldata->Groups[i] + "-";
224                                                 numGroups++;
225                                         }
226                                         allGroups = allGroups.substr(0, allGroups.length()-1);
227                                 }
228                         }else{//user has enter "all" and wants the default groups
229                                 globaldata->Groups.clear();
230                                 for (int i = 0; i < tmap->namesOfGroups.size(); i++) {
231                                         if (tmap->namesOfGroups[i] != "xxx") {
232                                                 globaldata->Groups.push_back(tmap->namesOfGroups[i]);
233                                                 numGroups++;
234                                                 allGroups += tmap->namesOfGroups[i] + "-";
235                                         }
236                                 }
237                                 allGroups = allGroups.substr(0, allGroups.length()-1);
238                                 globaldata->setGroups("");
239                         }
240                 }else {
241                         for (int i = 0; i < tmap->namesOfGroups.size(); i++) {
242                                 if (tmap->namesOfGroups[i] != "xxx") {
243                                         allGroups += tmap->namesOfGroups[i] + "-";
244                                 }
245                         }
246                         allGroups = allGroups.substr(0, allGroups.length()-1);
247                         numGroups = 1;
248                 }
249                 
250                 //calculate number of comparsions
251                 numComp = 0;
252                 for (int r=0; r<numGroups; r++) { 
253                         for (int l = r+1; l < numGroups; l++) {
254                                 groupComb.push_back(globaldata->Groups[r]+ "-" + globaldata->Groups[l]);
255                                 numComp++;
256                         }
257                 }
258                 
259                 //ABC
260                 if (numComp != 1) {
261                         groupComb.push_back(allGroups);
262                         numComp++;
263                 }
264         }
265         catch(exception& e) {
266                 cout << "Standard Error: " << e.what() << " has occurred in the UnifracUnweightedCommand class Function setGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
267                 exit(1);
268         }
269         catch(...) {
270                 cout << "An unknown error has occurred in the UnifracUnweightedCommand class function setGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
271                 exit(1);
272         }               
273
274 }
275 /*****************************************************************/
276
277 void UnifracUnweightedCommand::initFile(string label){
278         try {
279                 if(counter != 0){
280                         openOutputFile(unweightedFileout, out);
281                         openInputFile(unweightedFile, inFile);
282
283                         string inputBuffer;
284                         getline(inFile, inputBuffer);
285                 
286                         out     <<  inputBuffer << '\t' << label + "RandFreq" << '\t' << label + "RandCumul" << endl;           
287                 }else{
288                         openOutputFile(unweightedFileout, out);
289                         out     << label + "Score" << '\t' << label + "RandFreq" << '\t' << label + "RandCumul" << endl;
290                 }
291
292                 out.setf(ios::fixed, ios::floatfield);
293                 out.setf(ios::showpoint);
294         }
295         catch(exception& e) {
296                 cout << "Standard Error: " << e.what() << " has occurred in the UnifracUnweightedCommand class Function initFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
297                 exit(1);
298         }
299         catch(...) {
300                 cout << "An unknown error has occurred in the UnifracUnweightedCommand class function initFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
301                 exit(1);
302         }
303 }
304
305 /***********************************************************************/
306
307 void UnifracUnweightedCommand::output(vector<double> data){
308         try {
309                 if(counter != 0){               
310                         string inputBuffer;
311                         getline(inFile, inputBuffer);
312 //                      out     <<  inputBuffer << setprecision(6) << '\t' << data[0] << setprecision(globaldata->getIters().length()) << '\t' << data[1] << '\t' << data[2] << endl;
313                 
314                         out << inputBuffer << '\t' << setprecision(6) << data[0] << setprecision(globaldata->getIters().length())  << '\t' << data[1] << '\t' << data[2] << endl;
315                 }
316                 else{
317                         out << setprecision(6) << data[0] << setprecision(globaldata->getIters().length())  << '\t' << data[1] << '\t' << data[2] << endl;
318                 }
319
320         }
321         catch(exception& e) {
322                 cout << "Standard Error: " << e.what() << " has occurred in the UnifracUnweightedCommand class Function output. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
323                 exit(1);
324         }
325         catch(...) {
326                 cout << "An unknown error has occurred in the UnifracUnweightedCommand class function output. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
327                 exit(1);
328         }
329 };
330
331 /***********************************************************************/
332
333 void UnifracUnweightedCommand::resetFile(){
334         try {
335                 if(counter != 0){
336                         out.close();
337                         inFile.close();
338                 }
339                 else{
340                         out.close();
341                 }
342                 counter = 1;
343                 
344                 remove(unweightedFile.c_str());
345                 rename(unweightedFileout.c_str(), unweightedFile.c_str());
346         }
347         catch(exception& e) {
348                 cout << "Standard Error: " << e.what() << " has occurred in the UnifracUnweightedCommand class Function resetFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
349                 exit(1);
350         }
351         catch(...) {
352                 cout << "An unknown error has occurred in the UnifracUnweightedCommand class function resetFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
353                 exit(1);
354         }       
355 }
356