]> git.donarmstrong.com Git - mothur.git/blob - unifracweightedcommand.cpp
weightedcommand
[mothur.git] / unifracweightedcommand.cpp
1 /*
2  *  unifracweightedcommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 2/9/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "unifracweightedcommand.h"
11
12 /***********************************************************/
13 UnifracWeightedCommand::UnifracWeightedCommand() {
14         try {
15                 globaldata = GlobalData::getInstance();
16                 
17                 T = globaldata->gTree;
18                 tmap = globaldata->gTreemap;
19                 weightedFile = globaldata->getTreeFile() + ".weighted";
20                 openOutputFile(weightedFile, out);
21                 //column headers
22                 out << "Group" << '\t' << "Score" << '\t' << "UserFreq" << '\t' << "UserCumul" << '\t' << "RandFreq" << '\t' << "RandCumul" << endl;
23
24                 sumFile = globaldata->getTreeFile() + ".wsummary";
25                 openOutputFile(sumFile, outSum);
26                                 
27                 setGroups();    //sets the groups the user wants to analyze                     
28                 convert(globaldata->getIters(), iters);  //how many random trees to generate
29                 weighted = new Weighted(tmap);
30
31         }
32         catch(exception& e) {
33                 cout << "Standard Error: " << e.what() << " has occurred in the UnifracWeightedCommand class Function UnifracWeightedCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
34                 exit(1);
35         }
36         catch(...) {
37                 cout << "An unknown error has occurred in the UnifracWeightedCommand class function UnifracWeightedCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
38                 exit(1);
39         }
40 }
41 /***********************************************************/
42 int UnifracWeightedCommand::execute() {
43         try {
44                 
45                 //get weighted for users tree
46                 userData.resize(numComp,0);  //data[0] = weightedscore AB, data[1] = weightedscore AC...
47                 randomData.resize(numComp,0); //data[0] = weightedscore AB, data[1] = weightedscore AC...
48                                 
49                 //create new tree with same num nodes and leaves as users
50                 randT = new Tree();
51                 
52                 //get pscores for users trees
53                 for (int i = 0; i < T.size(); i++) {
54                         rScores.resize(numComp);  //data[0] = weightedscore AB, data[1] = weightedscore AC...
55                         uScores.resize(numComp);  //data[0] = weightedscore AB, data[1] = weightedscore AC...
56                         validScores.resize(numComp); 
57                                                         
58                         cout << "Processing tree " << i+1 << endl;
59                         userData = weighted->getValues(T[i]);  //userData[0] = weightedscore
60                         
61                         //save users score
62                         for (int s=0; s<numComp; s++) {
63                                 //add users score to vector of user scores
64                                 uScores[s].push_back(userData[s]);
65                                 
66                                 //add users score to vector of valid scores
67                                 validScores[s].push_back(userData[s]);
68
69                                 //save users tree score for summary file
70                                 utreeScores.push_back(userData[s]);
71                         }
72                         
73                         //get scores for random trees
74                         for (int j = 0; j < iters; j++) {
75                                 int n = 1;
76                                 int count = 0;
77                                 for (int r=0; r<numGroups; r++) { 
78                                         for (int l = n; l < numGroups; l++) {
79                                                 //copy T[i]'s info.
80                                                 randT->getCopy(T[i]);
81                                                  
82                                                 if (globaldata->Groups.size() != 0) {
83                                                         //create a random tree with same topology as T[i], but different labels
84                                                         randT->assembleRandomUnifracTree(globaldata->Groups[r], globaldata->Groups[l]);
85                                                         //get wscore of random tree
86                                                         randomData = weighted->getValues(randT, globaldata->Groups[r], globaldata->Groups[l]);
87                                                 }else {
88                                                         //create a random tree with same topology as T[i], but different labels
89                                                         randT->assembleRandomUnifracTree(tmap->namesOfGroups[r], tmap->namesOfGroups[l]);
90                                                         //get wscore of random tree
91                                                         randomData = weighted->getValues(randT, tmap->namesOfGroups[r], tmap->namesOfGroups[l]);
92                                                 }
93
94                                                 //save scores
95                                                 rScores[count].push_back(randomData[0]);
96                                                 validScores[count][randomData[0]] = randomData[0];
97                                                 count++;
98                                         }
99                                         n++;
100                                 }
101                         }
102
103                         removeValidScoresDuplicates(); 
104                         
105                         //find the signifigance of the score for summary file
106                         for (int f = 0; f < numComp; f++) {
107                                 //sort random scores
108                                 sort(rScores[f].begin(), rScores[f].end());
109                                 
110                                 //the index of the score higher than yours is returned 
111                                 //so if you have 1000 random trees the index returned is 100 
112                                 //then there are 900 trees with a score greater then you. 
113                                 //giving you a signifigance of 0.900
114                                 int index = findIndex(userData[f]);    if (index == -1) { cout << "error in UnifracWeightedCommand" << endl; exit(1); } //error code
115                         
116                                 //the signifigance is the number of trees with the users score or higher 
117                                 WScoreSig.push_back((iters-index)/(float)iters);
118                         }
119                         
120                         out << "Tree# " << i << endl;
121                         //printWeightedFile();
122                         
123                         //clear data
124                         rScores.clear();
125                         uScores.clear();
126                         validScores.clear();
127                 }
128                 
129                 printWSummaryFile();
130                 
131                 //clear out users groups
132                 globaldata->Groups.clear();
133                 
134                 delete randT;
135                 
136                 return 0;
137                 
138         }
139         catch(exception& e) {
140                 cout << "Standard Error: " << e.what() << " has occurred in the UnifracWeightedCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
141                 exit(1);
142         }
143         catch(...) {
144                 cout << "An unknown error has occurred in the UnifracWeightedCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
145                 exit(1);
146         }
147 }
148 /***********************************************************
149 void UnifracWeightedCommand::printWeightedFile() {
150         try {
151                                                 
152                 //format output
153                 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
154                 
155                 //for each group
156                 for (int e = 0; e < numComp; e++) {
157                         //print each line in that group
158                         for (i = 0; i < validScores[e].size(); i++) { 
159                                 out << setprecision(6) <<  groupComb[e] << '\t' << validScores[e][i] << '\t' << '\t' << uscoreFreq[e][it->first] << '\t' << uCumul[e][it->first] << '\t' << rscoreFreq[e][it->first] << '\t' << rCumul[e][it->first] << endl; 
160                         } 
161                 }
162                 
163                 out.close();
164                 
165         }
166         catch(exception& e) {
167                 cout << "Standard Error: " << e.what() << " has occurred in the UnifracWeightedCommand class Function printWeightedFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
168                 exit(1);
169         }
170         catch(...) {
171                 cout << "An unknown error has occurred in the UnifracWeightedCommand class function printWeightedFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
172                 exit(1);
173         }
174 }
175
176
177 /***********************************************************/
178 void UnifracWeightedCommand::printWSummaryFile() {
179         try {
180                 //column headers
181                 outSum << "Tree#" << '\t' << "Groups" << '\t' << '\t' << "WScore" << '\t' << '\t' << "WSig" <<  endl;
182                 cout << "Tree#" << '\t' << "Groups" << '\t' << '\t' << "WScore" << '\t' << '\t' << "WSig" <<  endl;
183                 
184                 //format output
185                 outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint);
186                 
187                 //print each line
188                 int count = 0;
189                 for (int i = 0; i < T.size(); i++) { 
190                         for (int j = 0; j < numComp; j++) {
191                                 outSum << setprecision(6) << i+1 << '\t' << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << WScoreSig[count] << endl; 
192                                 cout << setprecision(6) << i+1 << '\t' << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << WScoreSig[count] << endl; 
193                                 count++;
194                         }
195                 }
196                 outSum.close();
197         }
198         catch(exception& e) {
199                 cout << "Standard Error: " << e.what() << " has occurred in the UnifracWeightedCommand class Function printWeightedFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
200                 exit(1);
201         }
202         catch(...) {
203                 cout << "An unknown error has occurred in the UnifracWeightedCommand class function printWeightedFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
204                 exit(1);
205         }
206 }
207
208 /***********************************************************/
209 void UnifracWeightedCommand::removeValidScoresDuplicates() {
210         try {
211                 for (int e = 0; e < numComp; e++) {
212                         //sort valid scores
213                         sort(validScores[e].begin(), validScores[e].end());
214                         
215                         for (int i = 0; i< validScores[e].size()-1; i++) { 
216                                 if (validScores[e][i] == validScores[e][i+1]) { validScores[e].erase(validScores[e].begin()+i); }
217                         }
218                 }
219         }
220         catch(exception& e) {
221                 cout << "Standard Error: " << e.what() << " has occurred in the UnifracWeightedCommand class Function removeValidScoresDuplicates. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
222                 exit(1);
223         }
224         catch(...) {
225                 cout << "An unknown error has occurred in the UnifracWeightedCommand class function removeValidScoresDuplicates. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
226                 exit(1);
227         }
228 }
229
230 /***********************************************************/
231 int UnifracWeightedCommand::findIndex(float score) {
232         try{
233                 for (int e = 0; e < numComp; e++) {
234                         for (int i = 0; i < rScores[e].size(); i++) {
235 //cout << rScores[e][i] << " number " << i << endl;
236                                 if (rScores[e][i] >= score) { return i; }
237                         }
238                 }
239                 return -1;
240         }
241         catch(exception& e) {
242                 cout << "Standard Error: " << e.what() << " has occurred in the UnifracWeightedCommand class Function findIndex. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
243                 exit(1);
244         }
245         catch(...) {
246                 cout << "An unknown error has occurred in the UnifracWeightedCommand class function findIndex. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
247                 exit(1);
248         }
249 }
250
251 /***********************************************************/
252 void UnifracWeightedCommand::setGroups() {
253         try {
254                 //if the user has not entered specific groups to analyze then do them all
255                 if (globaldata->Groups.size() == 0) {
256                         numGroups = tmap->getNumGroups();
257                 }else {
258                         if (globaldata->getGroups() != "all") {
259                                 //check that groups are valid
260                                 for (int i = 0; i < globaldata->Groups.size(); i++) {
261                                         if (tmap->isValidGroup(globaldata->Groups[i]) != true) {
262                                                 cout << globaldata->Groups[i] << " is not a valid group, and will be disregarded." << endl;
263                                                 // erase the invalid group from globaldata->Groups
264                                                 globaldata->Groups.erase (globaldata->Groups.begin()+i);
265                                         }
266                                 }
267                         
268                                 //if the user only entered invalid groups
269                                 if (globaldata->Groups.size() == 0) { 
270                                         numGroups = tmap->getNumGroups();
271                                         cout << "When using the groups parameter you must have at least 2 valid groups. I will run the command using all the groups in your groupfile." << endl; 
272                                 }else if (globaldata->Groups.size() == 1) { 
273                                         cout << "When using the groups parameter you must have at least 2 valid groups. I will run the command using all the groups in your groupfile." << endl;
274                                         numGroups = tmap->getNumGroups();
275                                         globaldata->Groups.clear();
276                                 }else { numGroups = globaldata->Groups.size(); }
277                         }else { //users wants all groups
278                                 numGroups = tmap->getNumGroups();
279                                 globaldata->Groups.clear();
280                                 globaldata->setGroups("");
281                         }
282                 }
283                 
284                 //calculate number of comparisons i.e. with groups A,B,C = AB, AC, BC = 3;
285                 numComp = 0;
286                 int n = 1;
287                 for (int i=1; i<numGroups; i++) { 
288                         numComp += i; 
289                         for (int l = n; l < numGroups; l++) {
290                                 //set group comparison labels
291                                 if (globaldata->Groups.size() != 0) {
292                                         groupComb.push_back(globaldata->Groups[i-1]+globaldata->Groups[l]);
293                                 }else {
294                                         groupComb.push_back(tmap->namesOfGroups[i-1]+tmap->namesOfGroups[l]);
295                                 }
296                         }
297                         n++;
298                 }
299         }
300         catch(exception& e) {
301                 cout << "Standard Error: " << e.what() << " has occurred in the UnifracWeightedCommand class Function setGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
302                 exit(1);
303         }
304         catch(...) {
305                 cout << "An unknown error has occurred in the UnifracWeightedCommand class function setGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
306                 exit(1);
307         }
308 }
309