]> git.donarmstrong.com Git - mothur.git/blob - unifracweightedcommand.cpp
fixed memory leak in parsimony calculator and added progress bars to parsimony and...
[mothur.git] / unifracweightedcommand.cpp
1 /*
2  *  unifracweightedcommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 2/9/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "unifracweightedcommand.h"
11
12 /***********************************************************/
13 UnifracWeightedCommand::UnifracWeightedCommand() {
14         try {
15                 globaldata = GlobalData::getInstance();
16                 
17                 T = globaldata->gTree;
18                 tmap = globaldata->gTreemap;
19                 sumFile = globaldata->getTreeFile() + ".wsummary";
20                 openOutputFile(sumFile, outSum);
21                                 
22                 setGroups();    //sets the groups the user wants to analyze                     
23                 convert(globaldata->getIters(), iters);  //how many random trees to generate
24                 weighted = new Weighted(tmap);
25
26         }
27         catch(exception& e) {
28                 cout << "Standard Error: " << e.what() << " has occurred in the UnifracWeightedCommand class Function UnifracWeightedCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
29                 exit(1);
30         }
31         catch(...) {
32                 cout << "An unknown error has occurred in the UnifracWeightedCommand class function UnifracWeightedCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
33                 exit(1);
34         }
35 }
36 /***********************************************************/
37 int UnifracWeightedCommand::execute() {
38         try {
39                 Progress* reading;
40                 reading = new Progress("Comparing to random:", iters);
41                 
42                 //get weighted for users tree
43                 userData.resize(numComp,0);  //data[0] = weightedscore AB, data[1] = weightedscore AC...
44                 randomData.resize(numComp,0); //data[0] = weightedscore AB, data[1] = weightedscore AC...
45                                 
46                 //create new tree with same num nodes and leaves as users
47                 randT = new Tree();
48                 
49                 //get weighted scores for users trees
50                 for (int i = 0; i < T.size(); i++) {
51                         counter = 0;
52                         rScores.resize(numComp);  //data[0] = weightedscore AB, data[1] = weightedscore AC...
53                         uScores.resize(numComp);  //data[0] = weightedscore AB, data[1] = weightedscore AC...
54                         weightedFile = globaldata->getTreeFile()  + toString(i+1) + ".weighted";
55                         weightedFileout = globaldata->getTreeFile() + "temp." + toString(i+1) + ".weighted";
56
57                         userData = weighted->getValues(T[i]);  //userData[0] = weightedscore
58                         
59                         //save users score
60                         for (int s=0; s<numComp; s++) {
61                                 //add users score to vector of user scores
62                                 uScores[s].push_back(userData[s]);
63                                 
64                                 //save users tree score for summary file
65                                 utreeScores.push_back(userData[s]);
66                         }
67                         
68                         //get scores for random trees
69                         for (int j = 0; j < iters; j++) {
70                                 int count = 0;
71                                 for (int r=0; r<numGroups; r++) { 
72                                         for (int l = r+1; l < numGroups; l++) {
73                                                 //copy T[i]'s info.
74                                                 randT->getCopy(T[i]);
75                                                  
76                                                 //create a random tree with same topology as T[i], but different labels
77                                                 randT->assembleRandomUnifracTree(globaldata->Groups[r], globaldata->Groups[l]);
78                                                 //get wscore of random tree
79                                                 randomData = weighted->getValues(randT, globaldata->Groups[r], globaldata->Groups[l]);
80                                                 
81                                                 //save scores
82                                                 rScores[count].push_back(randomData[0]);
83                                                 count++;
84                                         }
85                                 }
86                                 
87                                 //update progress bar
88                                 reading->update(j);
89
90                         }
91
92                         //removeValidScoresDuplicates(); 
93                         //find the signifigance of the score for summary file
94                         for (int f = 0; f < numComp; f++) {
95                                 //sort random scores
96                                 sort(rScores[f].begin(), rScores[f].end());
97                                 
98                                 //the index of the score higher than yours is returned 
99                                 //so if you have 1000 random trees the index returned is 100 
100                                 //then there are 900 trees with a score greater then you. 
101                                 //giving you a signifigance of 0.900
102                                 int index = findIndex(userData[f], f);    if (index == -1) { cout << "error in UnifracWeightedCommand" << endl; exit(1); } //error code
103                         
104                                 //the signifigance is the number of trees with the users score or higher 
105                                 WScoreSig.push_back((iters-index)/(float)iters);
106                         }
107                         
108                         //out << "Tree# " << i << endl;
109                         calculateFreqsCumuls();
110                         printWeightedFile();
111                         
112                         //clear data
113                         rScores.clear();
114                         uScores.clear();
115                         validScores.clear();
116                 }
117                 
118                 //finish progress bar
119                 reading->finish();
120                 delete reading;
121                 
122                 printWSummaryFile();
123                 
124                 //clear out users groups
125                 globaldata->Groups.clear();
126                 
127                 delete randT;
128                 
129                 return 0;
130                 
131         }
132         catch(exception& e) {
133                 cout << "Standard Error: " << e.what() << " has occurred in the UnifracWeightedCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
134                 exit(1);
135         }
136         catch(...) {
137                 cout << "An unknown error has occurred in the UnifracWeightedCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
138                 exit(1);
139         }
140 }
141 /***********************************************************/
142 void UnifracWeightedCommand::printWeightedFile() {
143         try {
144                 vector<double> data;
145                 
146                 for(int a = 0; a < numComp; a++) {
147                         initFile(groupComb[a]);
148                         //print each line
149                         for (it = validScores.begin(); it != validScores.end(); it++) { 
150                                 data.push_back(it->first);  data.push_back(rScoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]); 
151                                 output(data);
152                                 data.clear();
153                         } 
154                         resetFile();
155                 }
156                 
157                 out.close();
158                 inFile.close();
159                 remove(weightedFileout.c_str());
160                 
161         }
162         catch(exception& e) {
163                 cout << "Standard Error: " << e.what() << " has occurred in the UnifracWeightedCommand class Function printWeightedFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
164                 exit(1);
165         }
166         catch(...) {
167                 cout << "An unknown error has occurred in the UnifracWeightedCommand class function printWeightedFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
168                 exit(1);
169         }
170 }
171
172
173 /***********************************************************/
174 void UnifracWeightedCommand::printWSummaryFile() {
175         try {
176                 //column headers
177                 outSum << "Tree#" << '\t' << "Groups" << '\t' << "WScore" << '\t' << "WSig" <<  endl;
178                 cout << "Tree#" << '\t' << "Groups" << '\t' << "WScore" << '\t' << "WSig" <<  endl;
179                 
180                 //format output
181                 outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint);
182                 
183                 //print each line
184                 int count = 0;
185                 for (int i = 0; i < T.size(); i++) { 
186                         for (int j = 0; j < numComp; j++) {
187                                 if (WScoreSig[count] > (1/(float)iters)) {
188                                         outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(globaldata->getIters().length()) << WScoreSig[count] << endl; 
189                                         cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(globaldata->getIters().length()) << WScoreSig[count] << endl; 
190                                 }else{
191                                         outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(globaldata->getIters().length()) << "<" << (1/float(iters)) << endl; 
192                                         cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(globaldata->getIters().length()) << "<" << (1/float(iters)) << endl; 
193                                 }
194                                 count++;
195                         }
196                 }
197                 outSum.close();
198         }
199         catch(exception& e) {
200                 cout << "Standard Error: " << e.what() << " has occurred in the UnifracWeightedCommand class Function printWeightedFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
201                 exit(1);
202         }
203         catch(...) {
204                 cout << "An unknown error has occurred in the UnifracWeightedCommand class function printWeightedFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
205                 exit(1);
206         }
207 }
208
209 /***********************************************************/
210 int UnifracWeightedCommand::findIndex(float score, int index) {
211         try{
212                 for (int i = 0; i < rScores[index].size(); i++) {
213                         if (rScores[index][i] >= score) {       return i;       }
214                 }
215                 return rScores[index].size();
216         }
217         catch(exception& e) {
218                 cout << "Standard Error: " << e.what() << " has occurred in the UnifracWeightedCommand class Function findIndex. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
219                 exit(1);
220         }
221         catch(...) {
222                 cout << "An unknown error has occurred in the UnifracWeightedCommand class function findIndex. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
223                 exit(1);
224         }
225 }
226
227 /***********************************************************/
228 void UnifracWeightedCommand::setGroups() {
229         try {
230                 //if the user has not entered specific groups to analyze then do them all
231                 if (globaldata->Groups.size() == 0) {
232                         numGroups = tmap->getNumGroups();
233                         for (int i=0; i < numGroups; i++) { 
234                                 globaldata->Groups.push_back(tmap->namesOfGroups[i]);
235                         }
236                 }else {
237                         if (globaldata->getGroups() != "all") {
238                                 //check that groups are valid
239                                 for (int i = 0; i < globaldata->Groups.size(); i++) {
240                                         if (tmap->isValidGroup(globaldata->Groups[i]) != true) {
241                                                 cout << globaldata->Groups[i] << " is not a valid group, and will be disregarded." << endl;
242                                                 // erase the invalid group from globaldata->Groups
243                                                 globaldata->Groups.erase (globaldata->Groups.begin()+i);
244                                         }
245                                 }
246                         
247                                 //if the user only entered invalid groups
248                                 if (globaldata->Groups.size() == 0) { 
249                                         numGroups = tmap->getNumGroups();
250                                         for (int i=0; i < numGroups; i++) { 
251                                                 globaldata->Groups.push_back(tmap->namesOfGroups[i]);
252                                         }
253                                         cout << "When using the groups parameter you must have at least 2 valid groups. I will run the command using all the groups in your groupfile." << endl; 
254                                 }else if (globaldata->Groups.size() == 1) { 
255                                         cout << "When using the groups parameter you must have at least 2 valid groups. I will run the command using all the groups in your groupfile." << endl;
256                                         numGroups = tmap->getNumGroups();
257                                         globaldata->Groups.clear();
258                                         for (int i=0; i < numGroups; i++) { 
259                                                 globaldata->Groups.push_back(tmap->namesOfGroups[i]);
260                                         }
261                                 }else { numGroups = globaldata->Groups.size(); }
262                         }else { //users wants all groups
263                                 numGroups = tmap->getNumGroups();
264                                 globaldata->Groups.clear();
265                                 globaldata->setGroups("");
266                                 for (int i=0; i < numGroups; i++) { 
267                                         globaldata->Groups.push_back(tmap->namesOfGroups[i]);
268                                 }
269                         }
270                 }
271                 
272                 //calculate number of comparisons i.e. with groups A,B,C = AB, AC, BC = 3;
273                 numComp = 0;
274                 for (int i=0; i<numGroups; i++) { 
275                         numComp += i; 
276                         for (int l = i+1; l < numGroups; l++) {
277                                 //set group comparison labels
278                                 groupComb.push_back(globaldata->Groups[i] + "-" + globaldata->Groups[l]);
279                         }
280                 }
281         }
282         catch(exception& e) {
283                 cout << "Standard Error: " << e.what() << " has occurred in the UnifracWeightedCommand class Function setGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
284                 exit(1);
285         }
286         catch(...) {
287                 cout << "An unknown error has occurred in the UnifracWeightedCommand class function setGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
288                 exit(1);
289         }
290 }
291
292 /***********************************************************/
293
294 void UnifracWeightedCommand::calculateFreqsCumuls() {
295         try {
296                 //clear out old tree values
297                 rScoreFreq.clear();
298                 rScoreFreq.resize(numComp);
299                 rCumul.clear();
300                 rCumul.resize(numComp);
301                 validScores.clear();
302         
303                 //calculate frequency
304                 for (int f = 0; f < numComp; f++) {
305                         for (int i = 0; i < rScores[f].size(); i++) { //looks like 0,0,1,1,1,2,4,7...  you want to make a map that say rScoreFreq[0] = 2, rScoreFreq[1] = 3...
306                                 validScores[rScores[f][i]] = rScores[f][i];
307                                 it = rScoreFreq[f].find(rScores[f][i]);
308                                 if (it != rScoreFreq[f].end()) {
309                                         rScoreFreq[f][rScores[f][i]]++;
310                                 }else{
311                                         rScoreFreq[f][rScores[f][i]] = 1;
312                                 }
313                         }
314                 }
315                 
316                 //calculate rcumul
317                 for(int a = 0; a < numComp; a++) {
318                         float rcumul = 1.0000;
319                         //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print.
320                         for (it = validScores.begin(); it != validScores.end(); it++) {
321                                 //make rscoreFreq map and rCumul
322                                 it2 = rScoreFreq[a].find(it->first);
323                                 rCumul[a][it->first] = rcumul;
324                                 //get percentage of random trees with that info
325                                 if (it2 != rScoreFreq[a].end()) {  rScoreFreq[a][it->first] /= iters; rcumul-= it2->second;  }
326                                 else { rScoreFreq[a][it->first] = 0.0000; } //no random trees with that score
327                         }
328                 }
329
330         }
331         catch(exception& e) {
332                 cout << "Standard Error: " << e.what() << " has occurred in the UnifracWeightedCommand class Function calculateFreqsCums. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
333                 exit(1);
334         }
335         catch(...) {
336                 cout << "An unknown error has occurred in the UnifracWeightedCommand class function calculateFreqsCums. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
337                 exit(1);
338         }
339
340 }
341
342 /*****************************************************************/
343
344 void UnifracWeightedCommand::initFile(string label){
345         try {
346                 if(counter != 0){
347                         openOutputFile(weightedFileout, out);
348                         openInputFile(weightedFile, inFile);
349
350                         string inputBuffer;
351                         getline(inFile, inputBuffer);
352                 
353                         out     <<  inputBuffer << '\t' << label + "RandFreq" << '\t' << label + "RandCumul" << endl;           
354                 }else{
355                         openOutputFile(weightedFileout, out);
356                         out     << label + "Score" << '\t' << label + "RandFreq" << '\t' << label + "RandCumul" << endl;
357                 }
358
359                 out.setf(ios::fixed, ios::floatfield);
360                 out.setf(ios::showpoint);
361         }
362         catch(exception& e) {
363                 cout << "Standard Error: " << e.what() << " has occurred in the UnifracWeightedCommand class Function initFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
364                 exit(1);
365         }
366         catch(...) {
367                 cout << "An unknown error has occurred in the UnifracWeightedCommand class function initFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
368                 exit(1);
369         }
370 }
371
372 /***********************************************************************/
373
374 void UnifracWeightedCommand::output(vector<double> data){
375         try {
376                 if(counter != 0){               
377                         string inputBuffer;
378                         getline(inFile, inputBuffer);
379
380                         out << inputBuffer << '\t' << setprecision(6) << data[0] << setprecision(globaldata->getIters().length())  << '\t' << data[1] << '\t' << data[2] << endl;
381                 }
382                 else{
383                         out << setprecision(6) << data[0] << setprecision(globaldata->getIters().length())  << '\t' << data[1] << '\t' << data[2] << endl;
384
385                 }
386                 
387         }
388         catch(exception& e) {
389                 cout << "Standard Error: " << e.what() << " has occurred in the UnifracWeightedCommand class Function output. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
390                 exit(1);
391         }
392         catch(...) {
393                 cout << "An unknown error has occurred in the UnifracWeightedCommand class function output. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
394                 exit(1);
395         }
396 };
397
398 /***********************************************************************/
399
400 void UnifracWeightedCommand::resetFile(){
401         try {
402                 if(counter != 0){
403                         out.close();
404                         inFile.close();
405                 }
406                 else{
407                         out.close();
408                 }
409                 counter = 1;
410                 
411                 remove(weightedFile.c_str());
412                 rename(weightedFileout.c_str(), weightedFile.c_str());
413         }
414         catch(exception& e) {
415                 cout << "Standard Error: " << e.what() << " has occurred in the UnifracWeightedCommand class Function resetFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
416                 exit(1);
417         }
418         catch(...) {
419                 cout << "An unknown error has occurred in the UnifracWeightedCommand class function resetFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
420                 exit(1);
421         }       
422 }
423