]> git.donarmstrong.com Git - mothur.git/blob - unifracunweightedcommand.cpp
6fd986fc6022d6642179d58ed3b497c4f2166439
[mothur.git] / unifracunweightedcommand.cpp
1 /*
2  *  unifracunweightedcommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 2/9/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "unifracunweightedcommand.h"
11
12 /***********************************************************/
13 UnifracUnweightedCommand::UnifracUnweightedCommand() {
14         try {
15                 globaldata = GlobalData::getInstance();
16                 
17                 T = globaldata->gTree;
18                 tmap = globaldata->gTreemap;
19                 sumFile = globaldata->getTreeFile() + ".uwsummary";
20                 openOutputFile(sumFile, outSum);
21
22                 setGroups(); //sets users groups to analyze
23                 convert(globaldata->getIters(), iters);  //how many random trees to generate
24                 unweighted = new Unweighted(tmap);
25
26         }
27         catch(exception& e) {
28                 cout << "Standard Error: " << e.what() << " has occurred in the UnifracUnweightedCommand class Function UnifracUnweightedCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
29                 exit(1);
30         }
31         catch(...) {
32                 cout << "An unknown error has occurred in the UnifracUnweightedCommand class function UnifracUnweightedCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
33                 exit(1);
34         }
35 }
36 /***********************************************************/
37 int UnifracUnweightedCommand::execute() {
38         try {
39         
40                 userData.resize(numComp,0);  //data[0] = unweightedscore 
41                 randomData.resize(numComp,0); //data[0] = unweightedscore
42                 //create new tree with same num nodes and leaves as users
43                                 
44                 //get pscores for users trees
45                 for (int i = 0; i < T.size(); i++) {
46                         counter = 0;
47                         unweightedFile = globaldata->getTreeFile()  + toString(i+1) + ".unweighted";
48                         unweightedFileout = globaldata->getTreeFile() + "temp." + toString(i+1) + ".unweighted";
49                         
50                         //column headers
51 //                      outSum << "Tree# " << i+1 << endl;
52                         outSum << "Tree#" << '\t' << "Groups" << '\t'  <<  "UWScore" <<'\t' << "UWSig" <<  endl;
53 //                      cout << "Tree# " << i+1 << endl;
54                         cout << "Tree#" << '\t' << "Groups" << '\t'  <<  "UWScore" << '\t' << "UWSig" <<  endl;
55
56
57                         //get unweighted for users tree
58                         rscoreFreq.resize(numComp);  
59                         rCumul.resize(numComp);  
60                         utreeScores.resize(numComp);  
61                         UWScoreSig.resize(numComp); 
62
63                         userData = unweighted->getValues(T[i]);  //userData[0] = unweightedscore
64                         
65                         //output scores for each combination
66                         for(int k = 0; k < numComp; k++) {
67                                 //saves users score
68                                 utreeScores[k].push_back(userData[k]);
69                         }
70                         
71                         //get unweighted scores for random trees
72                         for (int j = 0; j < iters; j++) {
73                                 //we need a different getValues because when we swap the labels we only want to swap those in each parwise comparison
74                                 randomData = unweighted->getValues(T[i], "", "");
75                                 
76                                 for(int k = 0; k < numComp; k++) {      
77                                         //add trees unweighted score to map of scores
78                                         it2 = rscoreFreq[k].find(randomData[k]);
79                                         if (it2 != rscoreFreq[k].end()) {//already have that score
80                                                 rscoreFreq[k][randomData[k]]++;
81                                         }else{//first time we have seen this score
82                                                 rscoreFreq[k][randomData[k]] = 1;
83                                         }
84                                 
85                                         //add randoms score to validscores
86                                         validScores[randomData[k]] = randomData[k];
87                                 }
88                         }
89                 
90                 for(int a = 0; a < numComp; a++) {
91                         float rcumul = 1.0000;
92                         //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print.
93                         for (it = validScores.begin(); it != validScores.end(); it++) { 
94                                 //make rscoreFreq map and rCumul
95                                 it2 = rscoreFreq[a].find(it->first);
96                                 rCumul[a][it->first] = rcumul;
97                                 //get percentage of random trees with that info
98                                 if (it2 != rscoreFreq[a].end()) {  rscoreFreq[a][it->first] /= iters; rcumul-= it2->second;  }
99                                 else { rscoreFreq[a][it->first] = 0.0000; } //no random trees with that score
100                         }
101                         UWScoreSig[a].push_back(rCumul[a][userData[a]]);
102                 }
103                 
104                 printUnweightedFile();
105                 printUWSummaryFile();
106                 
107                 rscoreFreq.clear(); 
108                 rCumul.clear();  
109                 validScores.clear(); 
110                 utreeScores.clear();  
111                 UWScoreSig.clear(); 
112         }
113                 //reset groups parameter
114                 globaldata->Groups.clear(); globaldata->setGroups("");
115                 outSum.close();
116                 
117                 return 0;
118                 
119         }
120         catch(exception& e) {
121                 cout << "Standard Error: " << e.what() << " has occurred in the UnifracUnweightedCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
122                 exit(1);
123         }
124         catch(...) {
125                 cout << "An unknown error has occurred in the UnifracUnweightedCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
126                 exit(1);
127         }
128 }
129 /***********************************************************/
130 void UnifracUnweightedCommand::printUnweightedFile() {
131         try {
132                 vector<double> data;
133                 
134                 for(int a = 0; a < numComp; a++) {
135                         initFile(groupComb[a]);
136                         //print each line
137                         for (it = validScores.begin(); it != validScores.end(); it++) { 
138                                 data.push_back(it->first);  data.push_back(rscoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]); 
139                                 output(data);
140                                 data.clear();
141                         } 
142                         resetFile();
143                 }
144                 
145                 out.close();
146                 inFile.close();
147                 remove(unweightedFileout.c_str());
148                 
149         }
150         catch(exception& e) {
151                 cout << "Standard Error: " << e.what() << " has occurred in the UnifracUnweightedCommand class Function printUnweightedFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
152                 exit(1);
153         }
154         catch(...) {
155                 cout << "An unknown error has occurred in the UnifracUnweightedCommand class function printUnweightedFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
156                 exit(1);
157         }
158 }
159
160 /***********************************************************/
161 void UnifracUnweightedCommand::printUWSummaryFile() {
162         try {
163                                 
164                 //format output
165                 outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint);
166                         
167                 //print each line
168                 for (int i = 0; i< T.size(); i++) {
169                         for(int a = 0; a < numComp; a++) {
170                                 if (UWScoreSig[a][i] > (1/(float)iters)) {
171                                         outSum << setprecision(globaldata->getIters().length()) << i+1 << '\t' << groupComb[a] << '\t' << utreeScores[a][i] << '\t' << UWScoreSig[a][i] << endl;
172                                         cout << setprecision(globaldata->getIters().length()) << i+1 << '\t' << groupComb[a] << '\t' << utreeScores[a][i] << '\t' << UWScoreSig[a][i] << endl;
173                                 }else {
174                                         outSum << setprecision(globaldata->getIters().length()) << i+1 << '\t' << groupComb[a] << '\t' << utreeScores[a][i] << '\t' << "<" << (1/float(iters)) << endl;
175                                         cout << setprecision(globaldata->getIters().length()) << i+1 << '\t' << groupComb[a] << '\t' << utreeScores[a][i] << '\t' << "<" << (1/float(iters)) << endl;
176                                 }
177                         }
178                 }
179                 
180         }
181         catch(exception& e) {
182                 cout << "Standard Error: " << e.what() << " has occurred in the UnifracUnweightedCommand class Function printUWSummaryFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
183                 exit(1);
184         }
185         catch(...) {
186                 cout << "An unknown error has occurred in the UnifracUnweightedCommand class function printUWSummaryFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
187                 exit(1);
188         }
189 }
190 /***********************************************************/
191
192 void UnifracUnweightedCommand::setGroups() {
193         try {
194                 string allGroups = "";
195                 numGroups = 0;
196                 //if the user has not entered specific groups to analyze then do them all
197                 if (globaldata->Groups.size() != 0) {
198                         if (globaldata->Groups[0] != "all") {
199                                 //check that groups are valid
200                                 for (int i = 0; i < globaldata->Groups.size(); i++) {
201                                         if (tmap->isValidGroup(globaldata->Groups[i]) != true) {
202                                                 cout << globaldata->Groups[i] << " is not a valid group, and will be disregarded." << endl;
203                                                 // erase the invalid group from globaldata->Groups
204                                                 globaldata->Groups.erase(globaldata->Groups.begin()+i);
205                                         }
206                                 }
207                         
208                                 //if the user only entered invalid groups
209                                 if (globaldata->Groups.size() == 0) { 
210                                         cout << "When using the groups parameter you must have at least 1 valid group. I will run the command using all the groups in your groupfile." << endl; 
211                                         for (int i = 0; i < tmap->namesOfGroups.size(); i++) {
212                                                 globaldata->Groups.push_back(tmap->namesOfGroups[i]);
213                                                 numGroups++;
214                                                 allGroups += tmap->namesOfGroups[i] + "-";
215                                         }
216                                         allGroups = allGroups.substr(0, allGroups.length()-1);
217                                 }else {
218                                         for (int i = 0; i < globaldata->Groups.size(); i++) {
219                                                 allGroups += globaldata->Groups[i] + "-";
220                                                 numGroups++;
221                                         }
222                                         allGroups = allGroups.substr(0, allGroups.length()-1);
223                                 }
224                         }else{//user has enter "all" and wants the default groups
225                                 globaldata->Groups.clear();
226                                 for (int i = 0; i < tmap->namesOfGroups.size(); i++) {
227                                         globaldata->Groups.push_back(tmap->namesOfGroups[i]);
228                                         numGroups++;
229                                         allGroups += tmap->namesOfGroups[i] + "-";
230                                 }
231                                 allGroups = allGroups.substr(0, allGroups.length()-1);
232                                 globaldata->setGroups("");
233                         }
234                 }else {
235                         for (int i = 0; i < tmap->namesOfGroups.size(); i++) {
236                                 allGroups += tmap->namesOfGroups[i] + "-";
237                         }
238                         allGroups = allGroups.substr(0, allGroups.length()-1);
239                         numGroups = 1;
240                 }
241                 
242                 //calculate number of comparsions
243                 numComp = 0;
244                 for (int r=0; r<numGroups; r++) { 
245                         for (int l = r+1; l < numGroups; l++) {
246                                 groupComb.push_back(globaldata->Groups[r]+ "-" + globaldata->Groups[l]);
247                                 numComp++;
248                         }
249                 }
250                 
251                 //ABC
252                 if (numComp != 1) {
253                         groupComb.push_back(allGroups);
254                         numComp++;
255                 }
256         }
257         catch(exception& e) {
258                 cout << "Standard Error: " << e.what() << " has occurred in the UnifracUnweightedCommand class Function setGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
259                 exit(1);
260         }
261         catch(...) {
262                 cout << "An unknown error has occurred in the UnifracUnweightedCommand class function setGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
263                 exit(1);
264         }               
265
266 }
267 /*****************************************************************/
268
269 void UnifracUnweightedCommand::initFile(string label){
270         try {
271                 if(counter != 0){
272                         openOutputFile(unweightedFileout, out);
273                         openInputFile(unweightedFile, inFile);
274
275                         string inputBuffer;
276                         getline(inFile, inputBuffer);
277                 
278                         out     <<  inputBuffer << '\t' << label + "RandFreq" << '\t' << label + "RandCumul" << endl;           
279                 }else{
280                         openOutputFile(unweightedFileout, out);
281                         out     << label + "Score" << '\t' << label + "RandFreq" << '\t' << label + "RandCumul" << endl;
282                 }
283
284                 out.setf(ios::fixed, ios::floatfield);
285                 out.setf(ios::showpoint);
286         }
287         catch(exception& e) {
288                 cout << "Standard Error: " << e.what() << " has occurred in the UnifracUnweightedCommand class Function initFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
289                 exit(1);
290         }
291         catch(...) {
292                 cout << "An unknown error has occurred in the UnifracUnweightedCommand class function initFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
293                 exit(1);
294         }
295 }
296
297 /***********************************************************************/
298
299 void UnifracUnweightedCommand::output(vector<double> data){
300         try {
301                 if(counter != 0){               
302                         string inputBuffer;
303                         getline(inFile, inputBuffer);
304 //                      out     <<  inputBuffer << setprecision(6) << '\t' << data[0] << setprecision(globaldata->getIters().length()) << '\t' << data[1] << '\t' << data[2] << endl;
305                 
306                         out     <<  inputBuffer << setprecision(globaldata->getIters().length()-1) << '\t' << data[1] << '\t' << data[2] << endl;
307                 }
308                 else{
309                         out << setprecision(6) << data[0] << setprecision(globaldata->getIters().length()-1) << '\t' << data[1] << '\t' << data[2] << endl;
310                 }
311
312         }
313         catch(exception& e) {
314                 cout << "Standard Error: " << e.what() << " has occurred in the UnifracUnweightedCommand class Function output. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
315                 exit(1);
316         }
317         catch(...) {
318                 cout << "An unknown error has occurred in the UnifracUnweightedCommand class function output. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
319                 exit(1);
320         }
321 };
322
323 /***********************************************************************/
324
325 void UnifracUnweightedCommand::resetFile(){
326         try {
327                 if(counter != 0){
328                         out.close();
329                         inFile.close();
330                 }
331                 else{
332                         out.close();
333                 }
334                 counter = 1;
335                 
336                 remove(unweightedFile.c_str());
337                 rename(unweightedFileout.c_str(), unweightedFile.c_str());
338         }
339         catch(exception& e) {
340                 cout << "Standard Error: " << e.what() << " has occurred in the UnifracUnweightedCommand class Function resetFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
341                 exit(1);
342         }
343         catch(...) {
344                 cout << "An unknown error has occurred in the UnifracUnweightedCommand class function resetFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
345                 exit(1);
346         }       
347 }
348