]> git.donarmstrong.com Git - mothur.git/blob - unifracweightedcommand.cpp
c2584eabfe07975d103c17c6d93e96e2ae1770e2
[mothur.git] / unifracweightedcommand.cpp
1 /*
2  *  unifracweightedcommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 2/9/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "unifracweightedcommand.h"
11
12 /***********************************************************/
13 UnifracWeightedCommand::UnifracWeightedCommand() {
14         try {
15                 globaldata = GlobalData::getInstance();
16                 
17                 T = globaldata->gTree;
18                 tmap = globaldata->gTreemap;
19                 sumFile = globaldata->getTreeFile() + ".wsummary";
20                 openOutputFile(sumFile, outSum);
21                                 
22                 setGroups();    //sets the groups the user wants to analyze                     
23                 convert(globaldata->getIters(), iters);  //how many random trees to generate
24                 weighted = new Weighted(tmap);
25
26         }
27         catch(exception& e) {
28                 cout << "Standard Error: " << e.what() << " has occurred in the UnifracWeightedCommand class Function UnifracWeightedCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
29                 exit(1);
30         }
31         catch(...) {
32                 cout << "An unknown error has occurred in the UnifracWeightedCommand class function UnifracWeightedCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
33                 exit(1);
34         }
35 }
36 /***********************************************************/
37 int UnifracWeightedCommand::execute() {
38         try {
39                 Progress* reading;
40                 reading = new Progress("Comparing to random:", iters);
41                 
42                 //get weighted for users tree
43                 userData.resize(numComp,0);  //data[0] = weightedscore AB, data[1] = weightedscore AC...
44                 randomData.resize(numComp,0); //data[0] = weightedscore AB, data[1] = weightedscore AC...
45                                 
46                 //create new tree with same num nodes and leaves as users
47                 randT = new Tree();
48                 
49                 //get weighted scores for users trees
50                 for (int i = 0; i < T.size(); i++) {
51                         counter = 0;
52                         rScores.resize(numComp);  //data[0] = weightedscore AB, data[1] = weightedscore AC...
53                         uScores.resize(numComp);  //data[0] = weightedscore AB, data[1] = weightedscore AC...
54                         weightedFile = globaldata->getTreeFile()  + toString(i+1) + ".weighted";
55                         weightedFileout = globaldata->getTreeFile() + "temp." + toString(i+1) + ".weighted";
56
57                         userData = weighted->getValues(T[i]);  //userData[0] = weightedscore
58                         
59                         //save users score
60                         for (int s=0; s<numComp; s++) {
61                                 //add users score to vector of user scores
62                                 uScores[s].push_back(userData[s]);
63                                 
64                                 //save users tree score for summary file
65                                 utreeScores.push_back(userData[s]);
66                         }
67                         
68                         //get scores for random trees
69                         for (int j = 0; j < iters; j++) {
70                                 int count = 0;
71                                 for (int r=0; r<numGroups; r++) { 
72                                         for (int l = r+1; l < numGroups; l++) {
73                                                 //copy T[i]'s info.
74                                                 randT->getCopy(T[i]);
75                                                  
76                                                 //create a random tree with same topology as T[i], but different labels
77                                                 randT->assembleRandomUnifracTree(globaldata->Groups[r], globaldata->Groups[l]);
78                                                 //get wscore of random tree
79                                                 randomData = weighted->getValues(randT, globaldata->Groups[r], globaldata->Groups[l]);
80                                                 
81                                                 //save scores
82                                                 rScores[count].push_back(randomData[0]);
83                                                 count++;
84                                         }
85                                 }
86                                 
87                                 //update progress bar
88                                 reading->update(j);
89
90                         }
91
92                         //removeValidScoresDuplicates(); 
93                         //find the signifigance of the score for summary file
94                         for (int f = 0; f < numComp; f++) {
95                                 //sort random scores
96                                 sort(rScores[f].begin(), rScores[f].end());
97                                 
98                                 //the index of the score higher than yours is returned 
99                                 //so if you have 1000 random trees the index returned is 100 
100                                 //then there are 900 trees with a score greater then you. 
101                                 //giving you a signifigance of 0.900
102                                 int index = findIndex(userData[f], f);    if (index == -1) { cout << "error in UnifracWeightedCommand" << endl; exit(1); } //error code
103                         
104                                 //the signifigance is the number of trees with the users score or higher 
105                                 WScoreSig.push_back((iters-index)/(float)iters);
106                         }
107                         
108                         //out << "Tree# " << i << endl;
109                         calculateFreqsCumuls();
110                         printWeightedFile();
111                         
112                         //clear data
113                         rScores.clear();
114                         uScores.clear();
115                         validScores.clear();
116                 }
117                 
118                 //finish progress bar
119                 reading->finish();
120                 delete reading;
121                 
122                 printWSummaryFile();
123                 
124                 //clear out users groups
125                 globaldata->Groups.clear();
126                 
127                 delete randT;
128                 
129                 return 0;
130                 
131         }
132         catch(exception& e) {
133                 cout << "Standard Error: " << e.what() << " has occurred in the UnifracWeightedCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
134                 exit(1);
135         }
136         catch(...) {
137                 cout << "An unknown error has occurred in the UnifracWeightedCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
138                 exit(1);
139         }
140 }
141 /***********************************************************/
142 void UnifracWeightedCommand::printWeightedFile() {
143         try {
144                 vector<double> data;
145                 
146                 for(int a = 0; a < numComp; a++) {
147                         initFile(groupComb[a]);
148                         //print each line
149                         for (it = validScores.begin(); it != validScores.end(); it++) { 
150                                 data.push_back(it->first);  data.push_back(rScoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]); 
151                                 output(data);
152                                 data.clear();
153                         } 
154                         resetFile();
155                 }
156                 
157                 out.close();
158                 inFile.close();
159                 remove(weightedFileout.c_str());
160                 
161         }
162         catch(exception& e) {
163                 cout << "Standard Error: " << e.what() << " has occurred in the UnifracWeightedCommand class Function printWeightedFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
164                 exit(1);
165         }
166         catch(...) {
167                 cout << "An unknown error has occurred in the UnifracWeightedCommand class function printWeightedFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
168                 exit(1);
169         }
170 }
171
172
173 /***********************************************************/
174 void UnifracWeightedCommand::printWSummaryFile() {
175         try {
176                 //column headers
177                 outSum << "Tree#" << '\t' << "Groups" << '\t' << "WScore" << '\t' << "WSig" <<  endl;
178                 cout << "Tree#" << '\t' << "Groups" << '\t' << "WScore" << '\t' << "WSig" <<  endl;
179                 
180                 //format output
181                 outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint);
182                 
183                 //print each line
184                 int count = 0;
185                 for (int i = 0; i < T.size(); i++) { 
186                         for (int j = 0; j < numComp; j++) {
187                                 if (WScoreSig[count] > (1/(float)iters)) {
188                                         outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(globaldata->getIters().length()) << WScoreSig[count] << endl; 
189                                         cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(globaldata->getIters().length()) << WScoreSig[count] << endl; 
190                                 }else{
191                                         outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(globaldata->getIters().length()) << "<" << (1/float(iters)) << endl; 
192                                         cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(globaldata->getIters().length()) << "<" << (1/float(iters)) << endl; 
193                                 }
194                                 count++;
195                         }
196                 }
197                 outSum.close();
198         }
199         catch(exception& e) {
200                 cout << "Standard Error: " << e.what() << " has occurred in the UnifracWeightedCommand class Function printWeightedFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
201                 exit(1);
202         }
203         catch(...) {
204                 cout << "An unknown error has occurred in the UnifracWeightedCommand class function printWeightedFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
205                 exit(1);
206         }
207 }
208
209 /***********************************************************/
210 int UnifracWeightedCommand::findIndex(float score, int index) {
211         try{
212                 for (int i = 0; i < rScores[index].size(); i++) {
213                         if (rScores[index][i] >= score) {       return i;       }
214                 }
215                 return rScores[index].size();
216         }
217         catch(exception& e) {
218                 cout << "Standard Error: " << e.what() << " has occurred in the UnifracWeightedCommand class Function findIndex. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
219                 exit(1);
220         }
221         catch(...) {
222                 cout << "An unknown error has occurred in the UnifracWeightedCommand class function findIndex. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
223                 exit(1);
224         }
225 }
226
227 /***********************************************************/
228 void UnifracWeightedCommand::setGroups() {
229         try {
230                 numGroups = 0;
231                 //if the user has not entered specific groups to analyze then do them all
232                 if (globaldata->Groups.size() == 0) {
233                         for (int i=0; i < tmap->getNumGroups(); i++) { 
234                                 if (tmap->namesOfGroups[i] != "xxx") {
235                                         globaldata->Groups.push_back(tmap->namesOfGroups[i]);
236                                         numGroups++;
237                                 }
238                         }
239                 }else {
240                         if (globaldata->getGroups() != "all") {
241                                 //check that groups are valid
242                                 for (int i = 0; i < globaldata->Groups.size(); i++) {
243                                         if (tmap->isValidGroup(globaldata->Groups[i]) != true) {
244                                                 cout << globaldata->Groups[i] << " is not a valid group, and will be disregarded." << endl;
245                                                 // erase the invalid group from globaldata->Groups
246                                                 globaldata->Groups.erase (globaldata->Groups.begin()+i);
247                                         }
248                                 }
249                         
250                                 //if the user only entered invalid groups
251                                 if (globaldata->Groups.size() == 0) { 
252                                         for (int i=0; i < tmap->getNumGroups(); i++) { 
253                                                 if (tmap->namesOfGroups[i] != "xxx") {
254                                                         globaldata->Groups.push_back(tmap->namesOfGroups[i]);
255                                                         numGroups++;
256                                                 }
257                                         }
258                                         cout << "When using the groups parameter you must have at least 2 valid groups. I will run the command using all the groups in your groupfile." << endl; 
259                                 }else if (globaldata->Groups.size() == 1) { 
260                                         cout << "When using the groups parameter you must have at least 2 valid groups. I will run the command using all the groups in your groupfile." << endl;
261                                         globaldata->Groups.clear();
262                                         for (int i=0; i < tmap->getNumGroups(); i++) { 
263                                                 if (tmap->namesOfGroups[i] != "xxx") {
264                                                         globaldata->Groups.push_back(tmap->namesOfGroups[i]);
265                                                         numGroups++;
266                                                 }
267                                         }
268                                 }else { numGroups = globaldata->Groups.size(); }
269                         }else { //users wants all groups
270                                 globaldata->Groups.clear();
271                                 globaldata->setGroups("");
272                                 for (int i=0; i < tmap->getNumGroups(); i++) { 
273                                         if (tmap->namesOfGroups[i] != "xxx") {
274                                                 globaldata->Groups.push_back(tmap->namesOfGroups[i]);
275                                                 numGroups++;
276                                         }
277                                 }
278                         }
279                 }
280                 
281                 //calculate number of comparisons i.e. with groups A,B,C = AB, AC, BC = 3;
282                 numComp = 0;
283                 for (int i=0; i<numGroups; i++) { 
284                         numComp += i; 
285                         for (int l = i+1; l < numGroups; l++) {
286                                 //set group comparison labels
287                                 groupComb.push_back(globaldata->Groups[i] + "-" + globaldata->Groups[l]);
288                         }
289                 }
290         }
291         catch(exception& e) {
292                 cout << "Standard Error: " << e.what() << " has occurred in the UnifracWeightedCommand class Function setGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
293                 exit(1);
294         }
295         catch(...) {
296                 cout << "An unknown error has occurred in the UnifracWeightedCommand class function setGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
297                 exit(1);
298         }
299 }
300
301 /***********************************************************/
302
303 void UnifracWeightedCommand::calculateFreqsCumuls() {
304         try {
305                 //clear out old tree values
306                 rScoreFreq.clear();
307                 rScoreFreq.resize(numComp);
308                 rCumul.clear();
309                 rCumul.resize(numComp);
310                 validScores.clear();
311         
312                 //calculate frequency
313                 for (int f = 0; f < numComp; f++) {
314                         for (int i = 0; i < rScores[f].size(); i++) { //looks like 0,0,1,1,1,2,4,7...  you want to make a map that say rScoreFreq[0] = 2, rScoreFreq[1] = 3...
315                                 validScores[rScores[f][i]] = rScores[f][i];
316                                 it = rScoreFreq[f].find(rScores[f][i]);
317                                 if (it != rScoreFreq[f].end()) {
318                                         rScoreFreq[f][rScores[f][i]]++;
319                                 }else{
320                                         rScoreFreq[f][rScores[f][i]] = 1;
321                                 }
322                         }
323                 }
324                 
325                 //calculate rcumul
326                 for(int a = 0; a < numComp; a++) {
327                         float rcumul = 1.0000;
328                         //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print.
329                         for (it = validScores.begin(); it != validScores.end(); it++) {
330                                 //make rscoreFreq map and rCumul
331                                 it2 = rScoreFreq[a].find(it->first);
332                                 rCumul[a][it->first] = rcumul;
333                                 //get percentage of random trees with that info
334                                 if (it2 != rScoreFreq[a].end()) {  rScoreFreq[a][it->first] /= iters; rcumul-= it2->second;  }
335                                 else { rScoreFreq[a][it->first] = 0.0000; } //no random trees with that score
336                         }
337                 }
338
339         }
340         catch(exception& e) {
341                 cout << "Standard Error: " << e.what() << " has occurred in the UnifracWeightedCommand class Function calculateFreqsCums. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
342                 exit(1);
343         }
344         catch(...) {
345                 cout << "An unknown error has occurred in the UnifracWeightedCommand class function calculateFreqsCums. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
346                 exit(1);
347         }
348
349 }
350
351 /*****************************************************************/
352
353 void UnifracWeightedCommand::initFile(string label){
354         try {
355                 if(counter != 0){
356                         openOutputFile(weightedFileout, out);
357                         openInputFile(weightedFile, inFile);
358
359                         string inputBuffer;
360                         getline(inFile, inputBuffer);
361                 
362                         out     <<  inputBuffer << '\t' << label + "RandFreq" << '\t' << label + "RandCumul" << endl;           
363                 }else{
364                         openOutputFile(weightedFileout, out);
365                         out     << label + "Score" << '\t' << label + "RandFreq" << '\t' << label + "RandCumul" << endl;
366                 }
367
368                 out.setf(ios::fixed, ios::floatfield);
369                 out.setf(ios::showpoint);
370         }
371         catch(exception& e) {
372                 cout << "Standard Error: " << e.what() << " has occurred in the UnifracWeightedCommand class Function initFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
373                 exit(1);
374         }
375         catch(...) {
376                 cout << "An unknown error has occurred in the UnifracWeightedCommand class function initFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
377                 exit(1);
378         }
379 }
380
381 /***********************************************************************/
382
383 void UnifracWeightedCommand::output(vector<double> data){
384         try {
385                 if(counter != 0){               
386                         string inputBuffer;
387                         getline(inFile, inputBuffer);
388
389                         out << inputBuffer << '\t' << setprecision(6) << data[0] << setprecision(globaldata->getIters().length())  << '\t' << data[1] << '\t' << data[2] << endl;
390                 }
391                 else{
392                         out << setprecision(6) << data[0] << setprecision(globaldata->getIters().length())  << '\t' << data[1] << '\t' << data[2] << endl;
393
394                 }
395                 
396         }
397         catch(exception& e) {
398                 cout << "Standard Error: " << e.what() << " has occurred in the UnifracWeightedCommand class Function output. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
399                 exit(1);
400         }
401         catch(...) {
402                 cout << "An unknown error has occurred in the UnifracWeightedCommand class function output. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
403                 exit(1);
404         }
405 };
406
407 /***********************************************************************/
408
409 void UnifracWeightedCommand::resetFile(){
410         try {
411                 if(counter != 0){
412                         out.close();
413                         inFile.close();
414                 }
415                 else{
416                         out.close();
417                 }
418                 counter = 1;
419                 
420                 remove(weightedFile.c_str());
421                 rename(weightedFileout.c_str(), weightedFile.c_str());
422         }
423         catch(exception& e) {
424                 cout << "Standard Error: " << e.what() << " has occurred in the UnifracWeightedCommand class Function resetFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
425                 exit(1);
426         }
427         catch(...) {
428                 cout << "An unknown error has occurred in the UnifracWeightedCommand class function resetFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
429                 exit(1);
430         }       
431 }
432