]> git.donarmstrong.com Git - mothur.git/blob - parsimonycommand.cpp
added bootstrap.shared command and fixed some bugs with heatmap
[mothur.git] / parsimonycommand.cpp
1 /*
2  *  parsimonycommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 1/26/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "parsimonycommand.h"
11
12 /***********************************************************/
13 ParsimonyCommand::ParsimonyCommand() {
14         try {
15                 globaldata = GlobalData::getInstance();
16                 
17                 //randomtree will tell us if user had their own treefile or if they just want the random distribution
18                 randomtree = globaldata->getRandomTree();
19                 
20                 //user has entered their own tree
21                 if (randomtree == "") { 
22                         T = globaldata->gTree;
23                         tmap = globaldata->gTreemap;
24                         output = new ColumnFile(globaldata->getTreeFile()  +  ".parsimony");
25                         sumFile = globaldata->getTreeFile() + ".psummary";
26                         openOutputFile(sumFile, outSum);
27                 }else { //user wants random distribution
28                         savetmap = globaldata->gTreemap;
29                         getUserInput();
30                         output = new ColumnFile(randomtree);
31                 }
32                 
33                 //set users groups to analyze
34                 util = new SharedUtil();
35                 util->setGroups(globaldata->Groups, tmap->namesOfGroups, allGroups, numGroups, "unweighted");   //sets the groups the user wants to analyze
36                 util->getCombos(groupComb, globaldata->Groups, numComp);
37                 globaldata->setGroups("");
38                 
39                 //ABC
40                 if (numComp != 1) {
41                         groupComb.push_back(allGroups);
42                         numComp++;
43                 }
44
45                 convert(globaldata->getIters(), iters);  //how many random trees to generate
46                 pars = new Parsimony(tmap);
47                 counter = 0;
48
49         }
50         catch(exception& e) {
51                 cout << "Standard Error: " << e.what() << " has occurred in the ParsimonyCommand class Function ParsimonyCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
52                 exit(1);
53         }
54         catch(...) {
55                 cout << "An unknown error has occurred in the ParsimonyCommand class function ParsimonyCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
56                 exit(1);
57         }
58 }
59 /***********************************************************/
60 int ParsimonyCommand::execute() {
61         try {
62                 Progress* reading;
63                 reading = new Progress("Comparing to random:", iters);
64                 
65                 //get pscore for users tree
66                 userData.resize(numComp,0);  //data = AB, AC, BC, ABC.
67                 randomData.resize(numComp,0);  //data = AB, AC, BC, ABC.
68                 rscoreFreq.resize(numComp);  
69                 uscoreFreq.resize(numComp);  
70                 rCumul.resize(numComp);  
71                 uCumul.resize(numComp);  
72                 userTreeScores.resize(numComp);  
73                 UScoreSig.resize(numComp); 
74                                 
75                 if (randomtree == "") {
76                         //get pscores for users trees
77                         for (int i = 0; i < T.size(); i++) {
78                                 userData = pars->getValues(T[i]);  //data = AB, AC, BC, ABC.
79
80                                 //output scores for each combination
81                                 for(int k = 0; k < numComp; k++) {
82
83                                         //update uscoreFreq
84                                         it = uscoreFreq[k].find(userData[k]);
85                                         if (it == uscoreFreq[k].end()) {//new score
86                                                 uscoreFreq[k][userData[k]] = 1;
87                                         }else{ uscoreFreq[k][userData[k]]++; }
88                                         
89                                         //add users score to valid scores
90                                         validScores[userData[k]] = userData[k];
91                                         
92                                         //save score for summary file
93                                         userTreeScores[k].push_back(userData[k]);
94                                 }
95                         }
96                         
97                         //get pscores for random trees
98                         for (int j = 0; j < iters; j++) {
99                                 //create new tree with same num nodes and leaves as users
100                                 randT = new Tree();
101
102                                 //create random relationships between nodes
103                                 randT->assembleRandomTree();
104
105                                 //get pscore of random tree
106                                 randomData = pars->getValues(randT);
107                                         
108                                 for(int r = 0; r < numComp; r++) {
109                                         //add trees pscore to map of scores
110                                         it2 = rscoreFreq[r].find(randomData[r]);
111                                         if (it2 != rscoreFreq[r].end()) {//already have that score
112                                                 rscoreFreq[r][randomData[r]]++;
113                                         }else{//first time we have seen this score
114                                                 rscoreFreq[r][randomData[r]] = 1;
115                                         }
116                         
117                                         //add randoms score to validscores
118                                         validScores[randomData[r]] = randomData[r];
119                                 }
120                                 
121                                 //update progress bar
122                                 reading->update(j);
123                                 
124                                 delete randT;
125                         }
126
127                 }else {
128                         //get pscores for random trees
129                         for (int j = 0; j < iters; j++) {
130                                 //create new tree with same num nodes and leaves as users
131                                 randT = new Tree();
132                                 //create random relationships between nodes
133
134                                 randT->assembleRandomTree();
135
136                                 //get pscore of random tree
137                                 randomData = pars->getValues(randT);
138                         
139                                 for(int r = 0; r < numComp; r++) {
140                                         //add trees pscore to map of scores
141                                         it2 = rscoreFreq[r].find(randomData[r]);
142                                         if (it2 != rscoreFreq[r].end()) {//already have that score
143                                                 rscoreFreq[r][randomData[r]]++;
144                                         }else{//first time we have seen this score
145                                                 rscoreFreq[r][randomData[r]] = 1;
146                                         }
147                         
148                                         //add randoms score to validscores
149                                         validScores[randomData[r]] = randomData[r];
150                                 }
151                                 
152                                 //update progress bar
153                                 reading->update(j);
154                                 
155                                 delete randT;
156                         }
157                 }
158
159                 for(int a = 0; a < numComp; a++) {
160                         float rcumul = 0.0000;
161                         float ucumul = 0.0000;
162                         //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print.
163                         for (it = validScores.begin(); it != validScores.end(); it++) { 
164                                 if (randomtree == "") {
165                                         it2 = uscoreFreq[a].find(it->first);
166                                         //user data has that score 
167                                         if (it2 != uscoreFreq[a].end()) { uscoreFreq[a][it->first] /= T.size(); ucumul+= it2->second;  }
168                                         else { uscoreFreq[a][it->first] = 0.0000; } //no user trees with that score
169                                         //make uCumul map
170                                         uCumul[a][it->first] = ucumul;
171                                 }
172                         
173                                 //make rscoreFreq map and rCumul
174                                 it2 = rscoreFreq[a].find(it->first);
175                                 //get percentage of random trees with that info
176                                 if (it2 != rscoreFreq[a].end()) {  rscoreFreq[a][it->first] /= iters; rcumul+= it2->second;  }
177                                 else { rscoreFreq[a][it->first] = 0.0000; } //no random trees with that score
178                                 rCumul[a][it->first] = rcumul;
179                         }
180                         
181                         //find the signifigance of each user trees score when compared to the random trees and save for printing the summary file
182                         for (int h = 0; h < userTreeScores[a].size(); h++) {
183                                 UScoreSig[a].push_back(rCumul[a][userTreeScores[a][h]]);
184                         }
185                 }
186                 
187                 //finish progress bar
188                 reading->finish();
189                 delete reading;
190
191                 
192                 printParsimonyFile();
193                 if (randomtree == "") { printUSummaryFile(); }
194                 
195                 //reset globaldata's treemap if you just did random distrib
196                 if (randomtree != "") {
197                         //memory leak prevention
198                         //if (globaldata->gTreemap != NULL) { delete globaldata->gTreemap;  }
199                         globaldata->gTreemap = savetmap;
200                 }
201                 
202                 //reset randomTree parameter to ""
203                 globaldata->setRandomTree("");
204                 //reset groups parameter
205                 globaldata->Groups.clear(); 
206                 
207                 return 0;
208                 
209         }
210         catch(exception& e) {
211                 cout << "Standard Error: " << e.what() << " has occurred in the ParsimonyCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
212                 exit(1);
213         }
214         catch(...) {
215                 cout << "An unknown error has occurred in the ParsimonyCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
216                 exit(1);
217         }
218 }
219
220 /***********************************************************/
221 void ParsimonyCommand::printParsimonyFile() {
222         try {
223                 vector<double> data;
224                 vector<string> tags;
225                 
226                 if (randomtree == "") {
227                         tags.push_back("Score"); tags.push_back("UserFreq"); tags.push_back("UserCumul"); tags.push_back("RandFreq"); tags.push_back("RandCumul");
228                 }else {
229                         tags.push_back("Score"); tags.push_back("RandFreq"); tags.push_back("RandCumul");
230                 }
231
232                 for(int a = 0; a < numComp; a++) {
233                         output->initFile(groupComb[a], tags);
234                         //print each line
235                         for (it = validScores.begin(); it != validScores.end(); it++) { 
236                                 if (randomtree == "") {
237                                         data.push_back(it->first);  data.push_back(uscoreFreq[a][it->first]); data.push_back(uCumul[a][it->first]); data.push_back(rscoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]); 
238                                 }else{
239                                         data.push_back(it->first);  data.push_back(rscoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]); 
240                                 }
241                                 output->output(data);
242                                 data.clear();
243                         } 
244                         output->resetFile();
245                 }
246         }
247         catch(exception& e) {
248                 cout << "Standard Error: " << e.what() << " has occurred in the ParsimonyCommand class Function printParsimonyFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
249                 exit(1);
250         }
251         catch(...) {
252                 cout << "An unknown error has occurred in the ParsimonyCommand class function printParsimonyFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
253                 exit(1);
254         }
255 }
256 /***********************************************************/
257 void ParsimonyCommand::printUSummaryFile() {
258         try {
259                 //column headers
260                 outSum << "Tree#" << '\t' << "Groups" << '\t'  <<  "ParsScore" << '\t' << "ParsSig" <<  endl;
261                 cout << "Tree#" << '\t' << "Groups" << '\t'  <<  "ParsScore" << '\t' << "ParsSig" <<  endl;
262                 
263                 //format output
264                 outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint);
265                 
266                 
267                 //print each line
268                 for (int i = 0; i< T.size(); i++) {
269                         for(int a = 0; a < numComp; a++) {
270                                 if (UScoreSig[a][i] > (1/(float)iters)) {
271                                         outSum << setprecision(6) << i+1 << '\t' << groupComb[a]  << '\t' << userTreeScores[a][i] << setprecision(globaldata->getIters().length()) << '\t' << UScoreSig[a][i] << endl;
272                                         cout << setprecision(6) << i+1 << '\t' << groupComb[a]  << '\t' << userTreeScores[a][i] << setprecision(globaldata->getIters().length()) << '\t' << UScoreSig[a][i] << endl;
273                                 }else {
274                                         outSum << setprecision(6) << i+1 << '\t' << groupComb[a] << '\t' << userTreeScores[a][i] << setprecision(globaldata->getIters().length())  << '\t' << "<" << (1/float(iters)) << endl;
275                                         cout << setprecision(6) << i+1 << '\t' << groupComb[a] << '\t' << userTreeScores[a][i] << setprecision(globaldata->getIters().length()) << '\t' << "<" << (1/float(iters)) << endl;
276                                 }
277                         }
278                 }
279                 
280                 outSum.close();
281         }
282         catch(exception& e) {
283                 cout << "Standard Error: " << e.what() << " has occurred in the ParsimonyCommand class Function printUSummaryFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
284                 exit(1);
285         }
286         catch(...) {
287                 cout << "An unknown error has occurred in the ParsimonyCommand class function printUSummaryFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
288                 exit(1);
289         }
290 }
291
292 /***********************************************************/
293 void ParsimonyCommand::getUserInput() {
294         try {
295         
296                 //create treemap
297                 tmap = new TreeMap();
298
299                 cout << "Please enter the number of groups you would like to analyze: ";
300                 cin >> numGroups;
301                         
302                 int num, count;
303                 count = 1;
304                 numEachGroup.resize(numGroups, 0);  
305                 
306                 for (int i = 1; i <= numGroups; i++) {
307                         cout << "Please enter the number of sequences in group " << i <<  ": ";
308                         cin >> num;
309                                 
310                         //set tmaps seqsPerGroup
311                         tmap->seqsPerGroup[toString(i)] = num;
312                         tmap->namesOfGroups.push_back(toString(i));
313                         
314                         //set tmaps namesOfSeqs
315                         for (int j = 0; j < num; j++) {
316                                 tmap->namesOfSeqs.push_back(toString(count));
317                                 tmap->treemap[toString(count)].groupname = toString(i);
318                                 count++;
319                         }
320                 }
321                 
322                 //clears buffer so next command doesn't have error
323                 string s;       
324                 getline(cin, s);
325                 
326                 //save tmap for later
327                 //memory leak prevention
328                 //if (globaldata->gTreemap != NULL) { delete globaldata->gTreemap;  }
329                 globaldata->gTreemap = tmap;
330                 
331         }
332         catch(exception& e) {
333                 cout << "Standard Error: " << e.what() << " has occurred in the ParsimonyCommand class Function getUserInput. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
334                 exit(1);
335         }
336         catch(...) {
337                 cout << "An unknown error has occurred in the ParsimonyCommand class function getUserInput. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
338                 exit(1);
339         }
340 }
341
342 /***********************************************************/
343
344