]> git.donarmstrong.com Git - mothur.git/blob - unifracunweightedcommand.cpp
working on readtree
[mothur.git] / unifracunweightedcommand.cpp
1 /*
2  *  unifracunweightedcommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 2/9/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "unifracunweightedcommand.h"
11
12 /***********************************************************/
13 UnifracUnweightedCommand::UnifracUnweightedCommand() {
14         try {
15                 globaldata = GlobalData::getInstance();
16                 
17                 T = globaldata->gTree;
18                 tmap = globaldata->gTreemap;
19                 sumFile = globaldata->getTreeFile() + ".uwsummary";
20                 openOutputFile(sumFile, outSum);
21
22                 setGroups(); //sets users groups to analyze
23                 convert(globaldata->getIters(), iters);  //how many random trees to generate
24                 unweighted = new Unweighted(tmap);
25
26         }
27         catch(exception& e) {
28                 cout << "Standard Error: " << e.what() << " has occurred in the UnifracUnweightedCommand class Function UnifracUnweightedCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
29                 exit(1);
30         }
31         catch(...) {
32                 cout << "An unknown error has occurred in the UnifracUnweightedCommand class function UnifracUnweightedCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
33                 exit(1);
34         }
35 }
36 /***********************************************************/
37 int UnifracUnweightedCommand::execute() {
38         try {
39         
40                 userData.resize(numComp,0);  //data[0] = unweightedscore 
41                 randomData.resize(numComp,0); //data[0] = unweightedscore
42                 //create new tree with same num nodes and leaves as users
43                                 
44                 //get pscores for users trees
45                 for (int i = 0; i < T.size(); i++) {
46                         counter = 0;
47                         unweightedFile = globaldata->getTreeFile()  + toString(i+1) + ".unweighted";
48                         unweightedFileout = globaldata->getTreeFile() + "temp." + toString(i+1) + ".unweighted";
49                                                 //column headers
50                         outSum << "Tree# " << i+1 << endl;
51                         outSum << "Comb" << '\t'  <<  "UWScore" << '\t' << '\t' << "UWSig" <<  endl;
52
53                         //get unweighted for users tree
54                         rscoreFreq.resize(numComp);  
55                         rCumul.resize(numComp);  
56                         utreeScores.resize(numComp);  
57                         UWScoreSig.resize(numComp); 
58
59                         cout << "Processing tree " << i+1 << endl;
60                         userData = unweighted->getValues(T[i]);  //userData[0] = unweightedscore
61                         
62                         //output scores for each combination
63                         for(int k = 0; k < numComp; k++) {
64                                 //saves users score
65                                 utreeScores[k].push_back(userData[k]);
66                         }
67                         
68                         //get unweighted scores for random trees
69                         for (int j = 0; j < iters; j++) {
70                                 //we need a different getValues because when we swap the labels we only want to swap those in each parwise comparison
71                                 randomData = unweighted->getValues(T[i], "", "");
72                                 
73                                 for(int k = 0; k < numComp; k++) {      
74 //cout << "iter " << j << " comp " << k << " = " << randomData[k] << endl;
75                                         //add trees unweighted score to map of scores
76                                         it2 = rscoreFreq[k].find(randomData[k]);
77                                         if (it2 != rscoreFreq[k].end()) {//already have that score
78                                                 rscoreFreq[k][randomData[k]]++;
79                                         }else{//first time we have seen this score
80                                                 rscoreFreq[k][randomData[k]] = 1;
81                                         }
82                                 
83                                         //add randoms score to validscores
84                                         validScores[randomData[k]] = randomData[k];
85                                 }
86                         }
87                 
88                 for(int a = 0; a < numComp; a++) {
89                         float rcumul = 1.0000;
90                         //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print.
91                         for (it = validScores.begin(); it != validScores.end(); it++) { 
92                                 //make rscoreFreq map and rCumul
93                                 it2 = rscoreFreq[a].find(it->first);
94                                 rCumul[a][it->first] = rcumul;
95                                 //get percentage of random trees with that info
96                                 if (it2 != rscoreFreq[a].end()) {  rscoreFreq[a][it->first] /= iters; rcumul-= it2->second;  }
97                                 else { rscoreFreq[a][it->first] = 0.0000; } //no random trees with that score
98                         }
99                         UWScoreSig[a].push_back(rCumul[a][userData[a]]);
100                 }
101                 
102                 printUnweightedFile();
103                 printUWSummaryFile();
104                 
105                 rscoreFreq.clear(); 
106                 rCumul.clear();  
107                 validScores.clear(); 
108                 utreeScores.clear();  
109                 UWScoreSig.clear(); 
110         }
111                 //reset groups parameter
112                 globaldata->Groups.clear(); globaldata->setGroups("");
113                 outSum.close();
114                 
115                 return 0;
116                 
117         }
118         catch(exception& e) {
119                 cout << "Standard Error: " << e.what() << " has occurred in the UnifracUnweightedCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
120                 exit(1);
121         }
122         catch(...) {
123                 cout << "An unknown error has occurred in the UnifracUnweightedCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
124                 exit(1);
125         }
126 }
127 /***********************************************************/
128 void UnifracUnweightedCommand::printUnweightedFile() {
129         try {
130                 vector<double> data;
131                 
132                 for(int a = 0; a < numComp; a++) {
133                         initFile(groupComb[a]);
134                         //print each line
135                         for (it = validScores.begin(); it != validScores.end(); it++) { 
136                                 data.push_back(it->first);  data.push_back(rscoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]); 
137                                 output(data);
138                                 data.clear();
139                         } 
140                         resetFile();
141                 }
142                 
143                 out.close();
144                 inFile.close();
145                 remove(unweightedFileout.c_str());
146                 
147         }
148         catch(exception& e) {
149                 cout << "Standard Error: " << e.what() << " has occurred in the UnifracUnweightedCommand class Function printUnweightedFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
150                 exit(1);
151         }
152         catch(...) {
153                 cout << "An unknown error has occurred in the UnifracUnweightedCommand class function printUnweightedFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
154                 exit(1);
155         }
156 }
157
158 /***********************************************************/
159 void UnifracUnweightedCommand::printUWSummaryFile() {
160         try {
161                                 
162                 //format output
163                 outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint);
164                 
165                 //print each line
166                 for(int a = 0; a < numComp; a++) {
167                         outSum << setprecision(6) << groupComb[a] << '\t' << '\t' << utreeScores[a][0] << '\t' << UWScoreSig[a][0] << endl;
168                         cout << setprecision(6)  << groupComb[a] << '\t' << '\t' << utreeScores[a][0] << '\t' << UWScoreSig[a][0] << endl; 
169                 }
170                 
171         }
172         catch(exception& e) {
173                 cout << "Standard Error: " << e.what() << " has occurred in the UnifracUnweightedCommand class Function printUWSummaryFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
174                 exit(1);
175         }
176         catch(...) {
177                 cout << "An unknown error has occurred in the UnifracUnweightedCommand class function printUWSummaryFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
178                 exit(1);
179         }
180 }
181 /***********************************************************/
182
183 void UnifracUnweightedCommand::setGroups() {
184         try {
185                 string allGroups = "";
186                 numGroups = 0;
187                 //if the user has not entered specific groups to analyze then do them all
188                 if (globaldata->Groups.size() != 0) {
189                         if (globaldata->Groups[0] != "all") {
190                                 //check that groups are valid
191                                 for (int i = 0; i < globaldata->Groups.size(); i++) {
192                                         if (tmap->isValidGroup(globaldata->Groups[i]) != true) {
193                                                 cout << globaldata->Groups[i] << " is not a valid group, and will be disregarded." << endl;
194                                                 // erase the invalid group from globaldata->Groups
195                                                 globaldata->Groups.erase(globaldata->Groups.begin()+i);
196                                         }
197                                 }
198                         
199                                 //if the user only entered invalid groups
200                                 if (globaldata->Groups.size() == 0) { 
201                                         cout << "When using the groups parameter you must have at least 1 valid group. I will run the command using all the groups in your groupfile." << endl; 
202                                         for (int i = 0; i < tmap->namesOfGroups.size(); i++) {
203                                                 globaldata->Groups.push_back(tmap->namesOfGroups[i]);
204                                                 numGroups++;
205                                                 allGroups += tmap->namesOfGroups[i];
206                                         }
207                                 }else {
208                                         for (int i = 0; i < globaldata->Groups.size(); i++) {
209                                                 allGroups += globaldata->Groups[i];
210                                                 numGroups++;
211                                         }
212                                 }
213                         }else{//user has enter "all" and wants the default groups
214                                 for (int i = 0; i < tmap->namesOfGroups.size(); i++) {
215                                         globaldata->Groups.push_back(tmap->namesOfGroups[i]);
216                                         numGroups++;
217                                         allGroups += tmap->namesOfGroups[i];
218                                 }
219                                 globaldata->setGroups("");
220                         }
221                 }else {
222                         for (int i = 0; i < tmap->namesOfGroups.size(); i++) {
223                                 allGroups += tmap->namesOfGroups[i];
224                         }
225                         numGroups = 1;
226                 }
227                 
228                 //calculate number of comparsions
229                 numComp = 0;
230                 for (int r=0; r<numGroups; r++) { 
231                         for (int l = r+1; l < numGroups; l++) {
232                                 groupComb.push_back(globaldata->Groups[r]+globaldata->Groups[l]);
233                                 numComp++;
234                         }
235                 }
236                 
237                 //ABC
238                 if (numComp != 1) {
239                         groupComb.push_back(allGroups);
240                         numComp++;
241                 }
242         }
243         catch(exception& e) {
244                 cout << "Standard Error: " << e.what() << " has occurred in the UnifracUnweightedCommand class Function setGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
245                 exit(1);
246         }
247         catch(...) {
248                 cout << "An unknown error has occurred in the UnifracUnweightedCommand class function setGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
249                 exit(1);
250         }               
251
252 }
253 /*****************************************************************/
254
255 void UnifracUnweightedCommand::initFile(string label){
256         try {
257                 if(counter != 0){
258                         openOutputFile(unweightedFileout, out);
259                         openInputFile(unweightedFile, inFile);
260
261                         string inputBuffer;
262                         getline(inFile, inputBuffer);
263                 
264                         out     <<  inputBuffer << '\t' << label + "Score" << '\t' << label + "RandFreq" << '\t' << label + "RandCumul" << endl;                
265                 }else{
266                         openOutputFile(unweightedFileout, out);
267                         out     << label + "Score" << '\t' << label + "RandFreq" << '\t' << label + "RandCumul" << endl;
268                 }
269
270                 out.setf(ios::fixed, ios::floatfield);
271                 out.setf(ios::showpoint);
272         }
273         catch(exception& e) {
274                 cout << "Standard Error: " << e.what() << " has occurred in the UnifracUnweightedCommand class Function initFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
275                 exit(1);
276         }
277         catch(...) {
278                 cout << "An unknown error has occurred in the UnifracUnweightedCommand class function initFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
279                 exit(1);
280         }
281 }
282
283 /***********************************************************************/
284
285 void UnifracUnweightedCommand::output(vector<double> data){
286         try {
287                 if(counter != 0){               
288                         string inputBuffer;
289                         getline(inFile, inputBuffer);
290                 
291                         out     <<  inputBuffer << setprecision(6) << '\t' << data[0] << '\t' << data[1] << '\t' << data[2] << endl;
292                 }
293                 else{
294                         out << setprecision(6) << data[0] << '\t' << data[1] << '\t' << data[2] << endl;
295                 }
296
297         }
298         catch(exception& e) {
299                 cout << "Standard Error: " << e.what() << " has occurred in the UnifracUnweightedCommand class Function output. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
300                 exit(1);
301         }
302         catch(...) {
303                 cout << "An unknown error has occurred in the UnifracUnweightedCommand class function output. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
304                 exit(1);
305         }
306 };
307
308 /***********************************************************************/
309
310 void UnifracUnweightedCommand::resetFile(){
311         try {
312                 if(counter != 0){
313                         out.close();
314                         inFile.close();
315                 }
316                 else{
317                         out.close();
318                 }
319                 counter = 1;
320                 
321                 remove(unweightedFile.c_str());
322                 rename(unweightedFileout.c_str(), unweightedFile.c_str());
323         }
324         catch(exception& e) {
325                 cout << "Standard Error: " << e.what() << " has occurred in the UnifracUnweightedCommand class Function resetFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
326                 exit(1);
327         }
328         catch(...) {
329                 cout << "An unknown error has occurred in the UnifracUnweightedCommand class function resetFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
330                 exit(1);
331         }       
332 }
333