]> git.donarmstrong.com Git - mothur.git/blob - unifracunweightedcommand.cpp
2a169d04c542e6c1999fddbe5e6f53612f56ee74
[mothur.git] / unifracunweightedcommand.cpp
1 /*
2  *  unifracunweightedcommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 2/9/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "unifracunweightedcommand.h"
11
12 /***********************************************************/
13 UnifracUnweightedCommand::UnifracUnweightedCommand() {
14         try {
15                 globaldata = GlobalData::getInstance();
16                 
17                 T = globaldata->gTree;
18                 tmap = globaldata->gTreemap;
19                 sumFile = globaldata->getTreeFile() + ".uwsummary";
20                 openOutputFile(sumFile, outSum);
21
22                 setGroups(); //sets users groups to analyze
23                 convert(globaldata->getIters(), iters);  //how many random trees to generate
24                 unweighted = new Unweighted(tmap);
25                 counter = 0;
26
27         }
28         catch(exception& e) {
29                 cout << "Standard Error: " << e.what() << " has occurred in the UnifracUnweightedCommand class Function UnifracUnweightedCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
30                 exit(1);
31         }
32         catch(...) {
33                 cout << "An unknown error has occurred in the UnifracUnweightedCommand class function UnifracUnweightedCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
34                 exit(1);
35         }
36 }
37 /***********************************************************/
38 int UnifracUnweightedCommand::execute() {
39         try {
40         
41                 userData.resize(numComp,0);  //data[0] = unweightedscore 
42                 randomData.resize(numComp,0); //data[0] = unweightedscore
43                 //create new tree with same num nodes and leaves as users
44                                 
45                 //get pscores for users trees
46                 for (int i = 0; i < T.size(); i++) {
47                         unweightedFile = globaldata->getTreeFile()  + toString(i+1) + ".unweighted";
48                         unweightedFileout = globaldata->getTreeFile() + "temp." + toString(i+1) + ".unweighted";
49                                                 //column headers
50                         outSum << "Tree# " << i+1 << endl;
51                         outSum << "Comb" << '\t'  <<  "UWScore" << '\t' << '\t' << "UWSig" <<  endl;
52
53                         //get unweighted for users tree
54                         rscoreFreq.resize(numComp);  
55                         rCumul.resize(numComp);  
56                         validScores.resize(numComp); 
57                         utreeScores.resize(numComp);  
58                         UWScoreSig.resize(numComp); 
59
60                         cout << "Processing tree " << i+1 << endl;
61                         userData = unweighted->getValues(T[i]);  //userData[0] = unweightedscore
62                         
63                         //output scores for each combination
64                         for(int k = 0; k < numComp; k++) {
65                                 //add users score to valid scores
66                                 validScores[k][userData[k]] = userData[k];
67                         
68                                 //saves users score
69                                 utreeScores[k].push_back(userData[k]);
70                         }
71                         
72                         //get unweighted scores for random trees
73                         for (int j = 0; j < iters; j++) {
74                                 //we need a different getValues because when we swap the labels we only want to swap those in each parwise comparison
75                                 randomData = unweighted->getValues(T[i], "", "");
76                                 
77                                 for(int k = 0; k < numComp; k++) {      
78 //cout << "iter " << j << " comp " << k << " = " << randomData[k] << endl;
79                                         //add trees unweighted score to map of scores
80                                         it2 = rscoreFreq[k].find(randomData[k]);
81                                         if (it2 != rscoreFreq[k].end()) {//already have that score
82                                                 rscoreFreq[k][randomData[k]]++;
83                                         }else{//first time we have seen this score
84                                                 rscoreFreq[k][randomData[k]] = 1;
85                                         }
86                                 
87                                         //add randoms score to validscores
88                                         validScores[k][randomData[k]] = randomData[k];
89                                 }
90                         }
91                 
92                 for(int a = 0; a < numComp; a++) {
93                         float rcumul = 1.0000;
94                         //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print.
95                         for (it = validScores[a].begin(); it != validScores[a].end(); it++) { 
96                                 //make rscoreFreq map and rCumul
97                                 it2 = rscoreFreq[a].find(it->first);
98                                 rCumul[a][it->first] = rcumul;
99                                 //get percentage of random trees with that info
100                                 if (it2 != rscoreFreq[a].end()) {  rscoreFreq[a][it->first] /= iters; rcumul-= it2->second;  }
101                                 else { rscoreFreq[a][it->first] = 0.0000; } //no random trees with that score
102                         }
103                         UWScoreSig[a].push_back(rCumul[a][userData[a]]);
104                 }
105                 
106                 printUnweightedFile();
107                 printUWSummaryFile();
108                 
109                 rscoreFreq.clear(); 
110                 rCumul.clear();  
111                 validScores.clear(); 
112                 utreeScores.clear();  
113                 UWScoreSig.clear(); 
114         }
115                 //reset groups parameter
116                 globaldata->Groups.clear(); globaldata->setGroups("");
117                 outSum.close();
118                 
119                 return 0;
120                 
121         }
122         catch(exception& e) {
123                 cout << "Standard Error: " << e.what() << " has occurred in the UnifracUnweightedCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
124                 exit(1);
125         }
126         catch(...) {
127                 cout << "An unknown error has occurred in the UnifracUnweightedCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
128                 exit(1);
129         }
130 }
131 /***********************************************************/
132 void UnifracUnweightedCommand::printUnweightedFile() {
133         try {
134                 vector<double> data;
135                 
136                 for(int a = 0; a < numComp; a++) {
137                         initFile(groupComb[a]);
138                         //print each line
139                         for (it = validScores[a].begin(); it != validScores[a].end(); it++) { 
140                                 data.push_back(it->first);  data.push_back(rscoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]); 
141                                 output(data);
142                                 data.clear();
143                         } 
144                         resetFile();
145                 }
146                 
147                 out.close();
148                 inFile.close();
149                 remove(unweightedFileout.c_str());
150                 
151         }
152         catch(exception& e) {
153                 cout << "Standard Error: " << e.what() << " has occurred in the UnifracUnweightedCommand class Function printUnweightedFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
154                 exit(1);
155         }
156         catch(...) {
157                 cout << "An unknown error has occurred in the UnifracUnweightedCommand class function printUnweightedFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
158                 exit(1);
159         }
160 }
161
162 /***********************************************************/
163 void UnifracUnweightedCommand::printUWSummaryFile() {
164         try {
165                                 
166                 //format output
167                 outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint);
168                 
169                 //print each line
170                 for(int a = 0; a < numComp; a++) {
171                         outSum << setprecision(6) << groupComb[a] << '\t' << '\t' << utreeScores[a][0] << '\t' << UWScoreSig[a][0] << endl;
172                         cout << setprecision(6)  << groupComb[a] << '\t' << '\t' << utreeScores[a][0] << '\t' << UWScoreSig[a][0] << endl; 
173                 }
174                 
175         }
176         catch(exception& e) {
177                 cout << "Standard Error: " << e.what() << " has occurred in the UnifracUnweightedCommand class Function printUWSummaryFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
178                 exit(1);
179         }
180         catch(...) {
181                 cout << "An unknown error has occurred in the UnifracUnweightedCommand class function printUWSummaryFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
182                 exit(1);
183         }
184 }
185 /***********************************************************/
186
187 void UnifracUnweightedCommand::setGroups() {
188         try {
189                 string allGroups = "";
190                 numGroups = 0;
191                 //if the user has not entered specific groups to analyze then do them all
192                 if (globaldata->Groups.size() != 0) {
193                         if (globaldata->Groups[0] != "all") {
194                                 //check that groups are valid
195                                 for (int i = 0; i < globaldata->Groups.size(); i++) {
196                                         if (tmap->isValidGroup(globaldata->Groups[i]) != true) {
197                                                 cout << globaldata->Groups[i] << " is not a valid group, and will be disregarded." << endl;
198                                                 // erase the invalid group from globaldata->Groups
199                                                 globaldata->Groups.erase(globaldata->Groups.begin()+i);
200                                         }
201                                 }
202                         
203                                 //if the user only entered invalid groups
204                                 if (globaldata->Groups.size() == 0) { 
205                                         cout << "When using the groups parameter you must have at least 1 valid group. I will run the command using all the groups in your groupfile." << endl; 
206                                         for (int i = 0; i < tmap->namesOfGroups.size(); i++) {
207                                                 globaldata->Groups.push_back(tmap->namesOfGroups[i]);
208                                                 numGroups++;
209                                                 allGroups += tmap->namesOfGroups[i];
210                                         }
211                                 }else {
212                                         for (int i = 0; i < globaldata->Groups.size(); i++) {
213                                                 allGroups += globaldata->Groups[i];
214                                                 numGroups++;
215                                         }
216                                 }
217                         }else{//user has enter "all" and wants the default groups
218                                 for (int i = 0; i < tmap->namesOfGroups.size(); i++) {
219                                         globaldata->Groups.push_back(tmap->namesOfGroups[i]);
220                                         numGroups++;
221                                         allGroups += tmap->namesOfGroups[i];
222                                 }
223                                 globaldata->setGroups("");
224                         }
225                 }else {
226                         for (int i = 0; i < tmap->namesOfGroups.size(); i++) {
227                                 allGroups += tmap->namesOfGroups[i];
228                         }
229                         numGroups = 1;
230                 }
231                 
232                 //calculate number of comparsions
233                 numComp = 0;
234                 for (int r=0; r<numGroups; r++) { 
235                         for (int l = r+1; l < numGroups; l++) {
236                                 groupComb.push_back(globaldata->Groups[r]+globaldata->Groups[l]);
237                                 numComp++;
238                         }
239                 }
240                 
241                 //ABC
242                 if (numComp != 1) {
243                         groupComb.push_back(allGroups);
244                         numComp++;
245                 }
246         }
247         catch(exception& e) {
248                 cout << "Standard Error: " << e.what() << " has occurred in the UnifracUnweightedCommand class Function setGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
249                 exit(1);
250         }
251         catch(...) {
252                 cout << "An unknown error has occurred in the UnifracUnweightedCommand class function setGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
253                 exit(1);
254         }               
255
256 }
257 /*****************************************************************/
258
259 void UnifracUnweightedCommand::initFile(string label){
260         try {
261                 if(counter != 0){
262                         openOutputFile(unweightedFileout, out);
263                         openInputFile(unweightedFile, inFile);
264
265                         string inputBuffer;
266                         getline(inFile, inputBuffer);
267                 
268                         out     <<  inputBuffer << '\t' << label + "Score" << '\t' << label + "RandFreq" << '\t' << label + "RandCumul" << endl;                
269                 }else{
270                         openOutputFile(unweightedFileout, out);
271                         out     << label + "Score" << '\t' << label + "RandFreq" << '\t' << label + "RandCumul" << endl;
272                 }
273
274                 out.setf(ios::fixed, ios::floatfield);
275                 out.setf(ios::showpoint);
276         }
277         catch(exception& e) {
278                 cout << "Standard Error: " << e.what() << " has occurred in the UnifracUnweightedCommand class Function initFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
279                 exit(1);
280         }
281         catch(...) {
282                 cout << "An unknown error has occurred in the UnifracUnweightedCommand class function initFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
283                 exit(1);
284         }
285 }
286
287 /***********************************************************************/
288
289 void UnifracUnweightedCommand::output(vector<double> data){
290         try {
291                 if(counter != 0){               
292                         string inputBuffer;
293                         getline(inFile, inputBuffer);
294                 
295                         out     <<  inputBuffer << setprecision(6) << '\t' << data[0] << '\t' << data[1] << '\t' << data[2] << endl;
296                 }
297                 else{
298                         out << setprecision(6) << data[0] << '\t' << data[1] << '\t' << data[2] << endl;
299                 }
300
301         }
302         catch(exception& e) {
303                 cout << "Standard Error: " << e.what() << " has occurred in the UnifracUnweightedCommand class Function output. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
304                 exit(1);
305         }
306         catch(...) {
307                 cout << "An unknown error has occurred in the UnifracUnweightedCommand class function output. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
308                 exit(1);
309         }
310 };
311
312 /***********************************************************************/
313
314 void UnifracUnweightedCommand::resetFile(){
315         try {
316                 if(counter != 0){
317                         out.close();
318                         inFile.close();
319                 }
320                 else{
321                         out.close();
322                 }
323                 counter = 1;
324                 
325                 remove(unweightedFile.c_str());
326                 rename(unweightedFileout.c_str(), unweightedFile.c_str());
327         }
328         catch(exception& e) {
329                 cout << "Standard Error: " << e.what() << " has occurred in the UnifracUnweightedCommand class Function resetFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
330                 exit(1);
331         }
332         catch(...) {
333                 cout << "An unknown error has occurred in the UnifracUnweightedCommand class function resetFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
334                 exit(1);
335         }       
336 }
337