]> git.donarmstrong.com Git - mothur.git/blob - unifracweightedcommand.cpp
started shared utilities, updates to venn and heatmap added tree.groups command
[mothur.git] / unifracweightedcommand.cpp
1 /*
2  *  unifracweightedcommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 2/9/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "unifracweightedcommand.h"
11
12 /***********************************************************/
13 UnifracWeightedCommand::UnifracWeightedCommand() {
14         try {
15                 globaldata = GlobalData::getInstance();
16                 
17                 T = globaldata->gTree;
18                 tmap = globaldata->gTreemap;
19                 sumFile = globaldata->getTreeFile() + ".wsummary";
20                 openOutputFile(sumFile, outSum);
21                                 
22                 util = new SharedUtil();
23                 string s; //to make work with setgroups
24                 util->setGroups(globaldata->Groups, tmap->namesOfGroups, s, numGroups, "weighted");     //sets the groups the user wants to analyze
25                 util->getCombos(groupComb, globaldata->Groups, numComp);
26                 globaldata->setGroups("");
27                                 
28                 convert(globaldata->getIters(), iters);  //how many random trees to generate
29                 weighted = new Weighted(tmap);
30
31         }
32         catch(exception& e) {
33                 cout << "Standard Error: " << e.what() << " has occurred in the UnifracWeightedCommand class Function UnifracWeightedCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
34                 exit(1);
35         }
36         catch(...) {
37                 cout << "An unknown error has occurred in the UnifracWeightedCommand class function UnifracWeightedCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
38                 exit(1);
39         }
40 }
41 /***********************************************************/
42 int UnifracWeightedCommand::execute() {
43         try {
44                 Progress* reading;
45                 reading = new Progress("Comparing to random:", iters);
46                 
47                 //get weighted for users tree
48                 userData.resize(numComp,0);  //data[0] = weightedscore AB, data[1] = weightedscore AC...
49                 randomData.resize(numComp,0); //data[0] = weightedscore AB, data[1] = weightedscore AC...
50                                 
51                 //create new tree with same num nodes and leaves as users
52                 randT = new Tree();
53                 
54                 //get weighted scores for users trees
55                 for (int i = 0; i < T.size(); i++) {
56                         counter = 0;
57                         rScores.resize(numComp);  //data[0] = weightedscore AB, data[1] = weightedscore AC...
58                         uScores.resize(numComp);  //data[0] = weightedscore AB, data[1] = weightedscore AC...
59                         //weightedFile = globaldata->getTreeFile()  + toString(i+1) + ".weighted";
60                         //weightedFileout = globaldata->getTreeFile() + "temp." + toString(i+1) + ".weighted";
61                         output = new ThreeColumnFile2(globaldata->getTreeFile()  + toString(i+1) + ".weighted");
62
63                         userData = weighted->getValues(T[i]);  //userData[0] = weightedscore
64                         
65                         //save users score
66                         for (int s=0; s<numComp; s++) {
67                                 //add users score to vector of user scores
68                                 uScores[s].push_back(userData[s]);
69                                 
70                                 //save users tree score for summary file
71                                 utreeScores.push_back(userData[s]);
72                         }
73                         
74                         //get scores for random trees
75                         for (int j = 0; j < iters; j++) {
76                                 int count = 0;
77                                 for (int r=0; r<numGroups; r++) { 
78                                         for (int l = r+1; l < numGroups; l++) {
79                                                 //copy T[i]'s info.
80                                                 randT->getCopy(T[i]);
81                                                  
82                                                 //create a random tree with same topology as T[i], but different labels
83                                                 randT->assembleRandomUnifracTree(globaldata->Groups[r], globaldata->Groups[l]);
84                                                 //get wscore of random tree
85                                                 randomData = weighted->getValues(randT, globaldata->Groups[r], globaldata->Groups[l]);
86                                                 
87                                                 //save scores
88                                                 rScores[count].push_back(randomData[0]);
89                                                 count++;
90                                         }
91                                 }
92                                 
93                                 //update progress bar
94                                 reading->update(j);
95
96                         }
97
98                         //removeValidScoresDuplicates(); 
99                         //find the signifigance of the score for summary file
100                         for (int f = 0; f < numComp; f++) {
101                                 //sort random scores
102                                 sort(rScores[f].begin(), rScores[f].end());
103                                 
104                                 //the index of the score higher than yours is returned 
105                                 //so if you have 1000 random trees the index returned is 100 
106                                 //then there are 900 trees with a score greater then you. 
107                                 //giving you a signifigance of 0.900
108                                 int index = findIndex(userData[f], f);    if (index == -1) { cout << "error in UnifracWeightedCommand" << endl; exit(1); } //error code
109                         
110                                 //the signifigance is the number of trees with the users score or higher 
111                                 WScoreSig.push_back((iters-index)/(float)iters);
112                         }
113                         
114                         //out << "Tree# " << i << endl;
115                         calculateFreqsCumuls();
116                         printWeightedFile();
117                         
118                         delete output;
119                         
120                         //clear data
121                         rScores.clear();
122                         uScores.clear();
123                         validScores.clear();
124                 }
125                 
126                 //finish progress bar
127                 reading->finish();
128                 delete reading;
129                 
130                 printWSummaryFile();
131                 
132                 //clear out users groups
133                 globaldata->Groups.clear();
134                 
135                 delete randT;
136                 
137                 return 0;
138                 
139         }
140         catch(exception& e) {
141                 cout << "Standard Error: " << e.what() << " has occurred in the UnifracWeightedCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
142                 exit(1);
143         }
144         catch(...) {
145                 cout << "An unknown error has occurred in the UnifracWeightedCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
146                 exit(1);
147         }
148 }
149 /***********************************************************/
150 void UnifracWeightedCommand::printWeightedFile() {
151         try {
152                 vector<double> data;
153                 vector<string> tags;
154                 tags.push_back("Score"); tags.push_back("RandFreq"); tags.push_back("RandCumul");
155                 
156                 for(int a = 0; a < numComp; a++) {
157                         output->initFile(groupComb[a], tags);
158                         //print each line
159                         for (it = validScores.begin(); it != validScores.end(); it++) { 
160                                 data.push_back(it->first);  data.push_back(rScoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]); 
161                                 output->output(data);
162                                 data.clear();
163                         } 
164                         output->resetFile();
165                 }
166                 
167                 //out.close();
168                 //inFile.close();
169                 //remove(weightedFileout.c_str());
170                 
171         }
172         catch(exception& e) {
173                 cout << "Standard Error: " << e.what() << " has occurred in the UnifracWeightedCommand class Function printWeightedFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
174                 exit(1);
175         }
176         catch(...) {
177                 cout << "An unknown error has occurred in the UnifracWeightedCommand class function printWeightedFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
178                 exit(1);
179         }
180 }
181
182
183 /***********************************************************/
184 void UnifracWeightedCommand::printWSummaryFile() {
185         try {
186                 //column headers
187                 outSum << "Tree#" << '\t' << "Groups" << '\t' << "WScore" << '\t' << "WSig" <<  endl;
188                 cout << "Tree#" << '\t' << "Groups" << '\t' << "WScore" << '\t' << "WSig" <<  endl;
189                 
190                 //format output
191                 outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint);
192                 
193                 //print each line
194                 int count = 0;
195                 for (int i = 0; i < T.size(); i++) { 
196                         for (int j = 0; j < numComp; j++) {
197                                 if (WScoreSig[count] > (1/(float)iters)) {
198                                         outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(globaldata->getIters().length()) << WScoreSig[count] << endl; 
199                                         cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(globaldata->getIters().length()) << WScoreSig[count] << endl; 
200                                 }else{
201                                         outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(globaldata->getIters().length()) << "<" << (1/float(iters)) << endl; 
202                                         cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(globaldata->getIters().length()) << "<" << (1/float(iters)) << endl; 
203                                 }
204                                 count++;
205                         }
206                 }
207                 outSum.close();
208         }
209         catch(exception& e) {
210                 cout << "Standard Error: " << e.what() << " has occurred in the UnifracWeightedCommand class Function printWeightedFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
211                 exit(1);
212         }
213         catch(...) {
214                 cout << "An unknown error has occurred in the UnifracWeightedCommand class function printWeightedFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
215                 exit(1);
216         }
217 }
218
219 /***********************************************************/
220 int UnifracWeightedCommand::findIndex(float score, int index) {
221         try{
222                 for (int i = 0; i < rScores[index].size(); i++) {
223                         if (rScores[index][i] >= score) {       return i;       }
224                 }
225                 return rScores[index].size();
226         }
227         catch(exception& e) {
228                 cout << "Standard Error: " << e.what() << " has occurred in the UnifracWeightedCommand class Function findIndex. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
229                 exit(1);
230         }
231         catch(...) {
232                 cout << "An unknown error has occurred in the UnifracWeightedCommand class function findIndex. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
233                 exit(1);
234         }
235 }
236
237 /***********************************************************/
238
239 void UnifracWeightedCommand::calculateFreqsCumuls() {
240         try {
241                 //clear out old tree values
242                 rScoreFreq.clear();
243                 rScoreFreq.resize(numComp);
244                 rCumul.clear();
245                 rCumul.resize(numComp);
246                 validScores.clear();
247         
248                 //calculate frequency
249                 for (int f = 0; f < numComp; f++) {
250                         for (int i = 0; i < rScores[f].size(); i++) { //looks like 0,0,1,1,1,2,4,7...  you want to make a map that say rScoreFreq[0] = 2, rScoreFreq[1] = 3...
251                                 validScores[rScores[f][i]] = rScores[f][i];
252                                 it = rScoreFreq[f].find(rScores[f][i]);
253                                 if (it != rScoreFreq[f].end()) {
254                                         rScoreFreq[f][rScores[f][i]]++;
255                                 }else{
256                                         rScoreFreq[f][rScores[f][i]] = 1;
257                                 }
258                         }
259                 }
260                 
261                 //calculate rcumul
262                 for(int a = 0; a < numComp; a++) {
263                         float rcumul = 1.0000;
264                         //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print.
265                         for (it = validScores.begin(); it != validScores.end(); it++) {
266                                 //make rscoreFreq map and rCumul
267                                 it2 = rScoreFreq[a].find(it->first);
268                                 rCumul[a][it->first] = rcumul;
269                                 //get percentage of random trees with that info
270                                 if (it2 != rScoreFreq[a].end()) {  rScoreFreq[a][it->first] /= iters; rcumul-= it2->second;  }
271                                 else { rScoreFreq[a][it->first] = 0.0000; } //no random trees with that score
272                         }
273                 }
274
275         }
276         catch(exception& e) {
277                 cout << "Standard Error: " << e.what() << " has occurred in the UnifracWeightedCommand class Function calculateFreqsCums. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
278                 exit(1);
279         }
280         catch(...) {
281                 cout << "An unknown error has occurred in the UnifracWeightedCommand class function calculateFreqsCums. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
282                 exit(1);
283         }
284
285 }
286
287 /*****************************************************************
288
289 void UnifracWeightedCommand::initFile(string label){
290         try {
291                 if(counter != 0){
292                         openOutputFile(weightedFileout, out);
293                         openInputFile(weightedFile, inFile);
294
295                         string inputBuffer;
296                         getline(inFile, inputBuffer);
297                 
298                         out     <<  inputBuffer << '\t' << label + "RandFreq" << '\t' << label + "RandCumul" << endl;           
299                 }else{
300                         openOutputFile(weightedFileout, out);
301                         out     << label + "Score" << '\t' << label + "RandFreq" << '\t' << label + "RandCumul" << endl;
302                 }
303
304                 out.setf(ios::fixed, ios::floatfield);
305                 out.setf(ios::showpoint);
306         }
307         catch(exception& e) {
308                 cout << "Standard Error: " << e.what() << " has occurred in the UnifracWeightedCommand class Function initFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
309                 exit(1);
310         }
311         catch(...) {
312                 cout << "An unknown error has occurred in the UnifracWeightedCommand class function initFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
313                 exit(1);
314         }
315 }
316
317 /***********************************************************************
318
319 void UnifracWeightedCommand::output(vector<double> data){
320         try {
321                 if(counter != 0){               
322                         string inputBuffer;
323                         getline(inFile, inputBuffer);
324
325                         out << inputBuffer << '\t' << setprecision(6) << data[0] << setprecision(globaldata->getIters().length())  << '\t' << data[1] << '\t' << data[2] << endl;
326                 }
327                 else{
328                         out << setprecision(6) << data[0] << setprecision(globaldata->getIters().length())  << '\t' << data[1] << '\t' << data[2] << endl;
329
330                 }
331                 
332         }
333         catch(exception& e) {
334                 cout << "Standard Error: " << e.what() << " has occurred in the UnifracWeightedCommand class Function output. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
335                 exit(1);
336         }
337         catch(...) {
338                 cout << "An unknown error has occurred in the UnifracWeightedCommand class function output. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
339                 exit(1);
340         }
341 };
342
343 /***********************************************************************
344
345 void UnifracWeightedCommand::resetFile(){
346         try {
347                 if(counter != 0){
348                         out.close();
349                         inFile.close();
350                 }
351                 else{
352                         out.close();
353                 }
354                 counter = 1;
355                 
356                 remove(weightedFile.c_str());
357                 rename(weightedFileout.c_str(), weightedFile.c_str());
358         }
359         catch(exception& e) {
360                 cout << "Standard Error: " << e.what() << " has occurred in the UnifracWeightedCommand class Function resetFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
361                 exit(1);
362         }
363         catch(...) {
364                 cout << "An unknown error has occurred in the UnifracWeightedCommand class function resetFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
365                 exit(1);
366         }
367         
368         
369                         
370 }
371
372 */
373
374
375
376