]> git.donarmstrong.com Git - mothur.git/blob - unifracunweightedcommand.cpp
unifrac.unweighted command changes
[mothur.git] / unifracunweightedcommand.cpp
1 /*
2  *  unifracunweightedcommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 2/9/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "unifracunweightedcommand.h"
11
12 /***********************************************************/
13 UnifracUnweightedCommand::UnifracUnweightedCommand() {
14         try {
15                 globaldata = GlobalData::getInstance();
16                 
17                 T = globaldata->gTree;
18                 tmap = globaldata->gTreemap;
19                 sumFile = globaldata->getTreeFile() + ".uwsummary";
20                 openOutputFile(sumFile, outSum);
21
22                 setGroups(); //sets users groups to analyze
23                 convert(globaldata->getIters(), iters);  //how many random trees to generate
24                 unweighted = new Unweighted(tmap);
25
26         }
27         catch(exception& e) {
28                 cout << "Standard Error: " << e.what() << " has occurred in the UnifracUnweightedCommand class Function UnifracUnweightedCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
29                 exit(1);
30         }
31         catch(...) {
32                 cout << "An unknown error has occurred in the UnifracUnweightedCommand class function UnifracUnweightedCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
33                 exit(1);
34         }
35 }
36 /***********************************************************/
37 int UnifracUnweightedCommand::execute() {
38         try {
39         
40                 userData.resize(numComp,0);  //data[0] = unweightedscore 
41                 randomData.resize(numComp,0); //data[0] = unweightedscore
42                 //create new tree with same num nodes and leaves as users
43                                 
44                 //get pscores for users trees
45                 for (int i = 0; i < T.size(); i++) {
46                         unweightedFile = globaldata->getTreeFile()  + toString(i+1) + ".unweighted";
47                         unweightedFileout = globaldata->getTreeFile() + "temp." + toString(i+1) + ".unweighted";
48                                                 //column headers
49                         outSum << "Tree# " << i+1 << endl;
50                         outSum << "Comb" << '\t'  <<  "UWScore" << '\t' << '\t' << "UWSig" <<  endl;
51
52                         //get unweighted for users tree
53                         rscoreFreq.resize(numComp);  
54                         rCumul.resize(numComp);  
55                         validScores.resize(numComp); 
56                         utreeScores.resize(numComp);  
57                         UWScoreSig.resize(numComp); 
58
59                         cout << "Processing tree " << i+1 << endl;
60                         userData = unweighted->getValues(T[i]);  //userData[0] = unweightedscore
61                         
62                         //output scores for each combination
63                         for(int k = 0; k < numComp; k++) {
64                                 //add users score to valid scores
65                                 validScores[k][userData[k]] = userData[k];
66                         
67                                 //saves users score
68                                 utreeScores[k].push_back(userData[k]);
69                         }
70                         
71                         //get unweighted scores for random trees
72                         for (int j = 0; j < iters; j++) {
73                                 //we need a different getValues because when we swap the labels we only want to swap those in each parwise comparison
74                                 randomData = unweighted->getValues(T[i], "", "");
75                                 
76                                 for(int k = 0; k < numComp; k++) {      
77 //cout << "iter " << j << " comp " << k << " = " << randomData[k] << endl;
78                                         //add trees unweighted score to map of scores
79                                         it2 = rscoreFreq[k].find(randomData[k]);
80                                         if (it2 != rscoreFreq[k].end()) {//already have that score
81                                                 rscoreFreq[k][randomData[k]]++;
82                                         }else{//first time we have seen this score
83                                                 rscoreFreq[k][randomData[k]] = 1;
84                                         }
85                                 
86                                         //add randoms score to validscores
87                                         validScores[k][randomData[k]] = randomData[k];
88                                 }
89                         }
90                 
91                 for(int a = 0; a < numComp; a++) {
92                         float rcumul = 1.0000;
93                         //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print.
94                         for (it = validScores[a].begin(); it != validScores[a].end(); it++) { 
95                                 //make rscoreFreq map and rCumul
96                                 it2 = rscoreFreq[a].find(it->first);
97                                 rCumul[a][it->first] = rcumul;
98                                 //get percentage of random trees with that info
99                                 if (it2 != rscoreFreq[a].end()) {  rscoreFreq[a][it->first] /= iters; rcumul-= it2->second;  }
100                                 else { rscoreFreq[a][it->first] = 0.0000; } //no random trees with that score
101                         }
102                         UWScoreSig[a].push_back(rCumul[a][userData[a]]);
103                 }
104                 
105                 printUnweightedFile();
106                 printUWSummaryFile();
107                 
108                 rscoreFreq.clear(); 
109                 rCumul.clear();  
110                 validScores.clear(); 
111                 utreeScores.clear();  
112                 UWScoreSig.clear(); 
113         }
114                 //reset groups parameter
115                 globaldata->Groups.clear(); globaldata->setGroups("");
116                 outSum.close();
117                 
118                 return 0;
119                 
120         }
121         catch(exception& e) {
122                 cout << "Standard Error: " << e.what() << " has occurred in the UnifracUnweightedCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
123                 exit(1);
124         }
125         catch(...) {
126                 cout << "An unknown error has occurred in the UnifracUnweightedCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
127                 exit(1);
128         }
129 }
130 /***********************************************************/
131 void UnifracUnweightedCommand::printUnweightedFile() {
132         try {
133                 vector<double> data;
134                 
135                 for(int a = 0; a < numComp; a++) {
136                         initFile(groupComb[a]);
137                         //print each line
138                         for (it = validScores[a].begin(); it != validScores[a].end(); it++) { 
139                                 data.push_back(it->first);  data.push_back(rscoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]); 
140                                 output(data);
141                                 data.clear();
142                         } 
143                         resetFile();
144                 }
145                 
146                 out.close();
147                 inFile.close();
148                 remove(unweightedFileout.c_str());
149                 
150         }
151         catch(exception& e) {
152                 cout << "Standard Error: " << e.what() << " has occurred in the UnifracUnweightedCommand class Function printUnweightedFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
153                 exit(1);
154         }
155         catch(...) {
156                 cout << "An unknown error has occurred in the UnifracUnweightedCommand class function printUnweightedFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
157                 exit(1);
158         }
159 }
160
161 /***********************************************************/
162 void UnifracUnweightedCommand::printUWSummaryFile() {
163         try {
164                                 
165                 //format output
166                 outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint);
167                 
168                 //print each line
169                 for(int a = 0; a < numComp; a++) {
170                         outSum << setprecision(6) << groupComb[a] << '\t' << '\t' << utreeScores[a][0] << '\t' << UWScoreSig[a][0] << endl;
171                         cout << setprecision(6)  << groupComb[a] << '\t' << '\t' << utreeScores[a][0] << '\t' << UWScoreSig[a][0] << endl; 
172                 }
173                 
174         }
175         catch(exception& e) {
176                 cout << "Standard Error: " << e.what() << " has occurred in the UnifracUnweightedCommand class Function printUWSummaryFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
177                 exit(1);
178         }
179         catch(...) {
180                 cout << "An unknown error has occurred in the UnifracUnweightedCommand class function printUWSummaryFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
181                 exit(1);
182         }
183 }
184 /***********************************************************/
185
186 void UnifracUnweightedCommand::setGroups() {
187         try {
188                 string allGroups = "";
189                 numGroups = 0;
190                 //if the user has not entered specific groups to analyze then do them all
191                 if (globaldata->Groups.size() != 0) {
192                         if (globaldata->Groups[0] != "all") {
193                                 //check that groups are valid
194                                 for (int i = 0; i < globaldata->Groups.size(); i++) {
195                                         if (tmap->isValidGroup(globaldata->Groups[i]) != true) {
196                                                 cout << globaldata->Groups[i] << " is not a valid group, and will be disregarded." << endl;
197                                                 // erase the invalid group from globaldata->Groups
198                                                 globaldata->Groups.erase(globaldata->Groups.begin()+i);
199                                         }
200                                 }
201                         
202                                 //if the user only entered invalid groups
203                                 if (globaldata->Groups.size() == 0) { 
204                                         cout << "When using the groups parameter you must have at least 1 valid group. I will run the command using all the groups in your groupfile." << endl; 
205                                         for (int i = 0; i < tmap->namesOfGroups.size(); i++) {
206                                                 globaldata->Groups.push_back(tmap->namesOfGroups[i]);
207                                                 numGroups++;
208                                                 allGroups += tmap->namesOfGroups[i];
209                                         }
210                                 }else {
211                                         for (int i = 0; i < globaldata->Groups.size(); i++) {
212                                                 allGroups += globaldata->Groups[i];
213                                                 numGroups++;
214                                         }
215                                 }
216                         }else{//user has enter "all" and wants the default groups
217                                 for (int i = 0; i < tmap->namesOfGroups.size(); i++) {
218                                         globaldata->Groups.push_back(tmap->namesOfGroups[i]);
219                                         numGroups++;
220                                         allGroups += tmap->namesOfGroups[i];
221                                 }
222                                 globaldata->setGroups("");
223                         }
224                 }else {
225                         for (int i = 0; i < tmap->namesOfGroups.size(); i++) {
226                                 allGroups += tmap->namesOfGroups[i];
227                         }
228                         numGroups = 1;
229                 }
230                 
231                 //calculate number of comparsions
232                 numComp = 0;
233                 for (int r=0; r<numGroups; r++) { 
234                         for (int l = r+1; l < numGroups; l++) {
235                                 groupComb.push_back(globaldata->Groups[r]+globaldata->Groups[l]);
236                                 numComp++;
237                         }
238                 }
239                 
240                 //ABC
241                 if (numComp != 1) {
242                         groupComb.push_back(allGroups);
243                         numComp++;
244                 }
245         }
246         catch(exception& e) {
247                 cout << "Standard Error: " << e.what() << " has occurred in the UnifracUnweightedCommand class Function setGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
248                 exit(1);
249         }
250         catch(...) {
251                 cout << "An unknown error has occurred in the UnifracUnweightedCommand class function setGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
252                 exit(1);
253         }               
254
255 }
256 /*****************************************************************/
257
258 void UnifracUnweightedCommand::initFile(string label){
259         try {
260                 if(counter != 0){
261                         openOutputFile(unweightedFileout, out);
262                         openInputFile(unweightedFile, inFile);
263
264                         string inputBuffer;
265                         getline(inFile, inputBuffer);
266                 
267                         out     <<  inputBuffer << '\t' << label + "Score" << '\t' << label + "RandFreq" << '\t' << label + "RandCumul" << endl;                
268                 }else{
269                         openOutputFile(unweightedFileout, out);
270                         out     << label + "Score" << '\t' << label + "RandFreq" << '\t' << label + "RandCumul" << endl;
271                 }
272
273                 out.setf(ios::fixed, ios::floatfield);
274                 out.setf(ios::showpoint);
275         }
276         catch(exception& e) {
277                 cout << "Standard Error: " << e.what() << " has occurred in the UnifracUnweightedCommand class Function initFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
278                 exit(1);
279         }
280         catch(...) {
281                 cout << "An unknown error has occurred in the UnifracUnweightedCommand class function initFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
282                 exit(1);
283         }
284 }
285
286 /***********************************************************************/
287
288 void UnifracUnweightedCommand::output(vector<double> data){
289         try {
290                 if(counter != 0){               
291                         string inputBuffer;
292                         getline(inFile, inputBuffer);
293                 
294                         out     <<  inputBuffer << setprecision(6) << '\t' << data[0] << '\t' << data[1] << '\t' << data[2] << endl;
295                 }
296                 else{
297                         out << setprecision(6) << data[0] << '\t' << data[1] << '\t' << data[2] << endl;
298                 }
299
300         }
301         catch(exception& e) {
302                 cout << "Standard Error: " << e.what() << " has occurred in the UnifracUnweightedCommand class Function output. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
303                 exit(1);
304         }
305         catch(...) {
306                 cout << "An unknown error has occurred in the UnifracUnweightedCommand class function output. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
307                 exit(1);
308         }
309 };
310
311 /***********************************************************************/
312
313 void UnifracUnweightedCommand::resetFile(){
314         try {
315                 if(counter != 0){
316                         out.close();
317                         inFile.close();
318                 }
319                 else{
320                         out.close();
321                 }
322                 counter = 1;
323                 
324                 remove(unweightedFile.c_str());
325                 rename(unweightedFileout.c_str(), unweightedFile.c_str());
326         }
327         catch(exception& e) {
328                 cout << "Standard Error: " << e.what() << " has occurred in the UnifracUnweightedCommand class Function resetFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
329                 exit(1);
330         }
331         catch(...) {
332                 cout << "An unknown error has occurred in the UnifracUnweightedCommand class function resetFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
333                 exit(1);
334         }       
335 }
336