]> git.donarmstrong.com Git - mothur.git/blob - parsimonycommand.cpp
56305e2260d4af19050dfc0832add36c1208d13e
[mothur.git] / parsimonycommand.cpp
1 /*
2  *  parsimonycommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 1/26/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "parsimonycommand.h"
11
12 /***********************************************************/
13 ParsimonyCommand::ParsimonyCommand() {
14         try {
15                 globaldata = GlobalData::getInstance();
16                 
17                 //randomtree will tell us if user had their own treefile or if they just want the random distribution
18                 randomtree = globaldata->getRandomTree();
19                 
20                 //user has entered their own tree
21                 if (randomtree == "") { 
22                         T = globaldata->gTree;
23                         tmap = globaldata->gTreemap;
24                         output = new ColumnFile(globaldata->getTreeFile()  +  ".parsimony");
25                         sumFile = globaldata->getTreeFile() + ".psummary";
26                         openOutputFile(sumFile, outSum);
27                 }else { //user wants random distribution
28                         savetmap = globaldata->gTreemap;
29                         getUserInput();
30                         output = new ColumnFile(randomtree);
31                 }
32                 
33                 //set users groups to analyze
34                 util = new SharedUtil();
35                 util->setGroups(globaldata->Groups, tmap->namesOfGroups, allGroups, numGroups, "unweighted");   //sets the groups the user wants to analyze
36                 util->getCombos(groupComb, globaldata->Groups, numComp);
37                 globaldata->setGroups("");
38                 
39                 if (numGroups == 1) { numComp++; groupComb.push_back(allGroups); }
40                 
41                 convert(globaldata->getIters(), iters);  //how many random trees to generate
42                 pars = new Parsimony(tmap);
43                 counter = 0;
44
45         }
46         catch(exception& e) {
47                 cout << "Standard Error: " << e.what() << " has occurred in the ParsimonyCommand class Function ParsimonyCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
48                 exit(1);
49         }
50         catch(...) {
51                 cout << "An unknown error has occurred in the ParsimonyCommand class function ParsimonyCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
52                 exit(1);
53         }
54 }
55 /***********************************************************/
56 int ParsimonyCommand::execute() {
57         try {
58                 Progress* reading;
59                 reading = new Progress("Comparing to random:", iters);
60                 
61                 //get pscore for users tree
62                 userData.resize(numComp,0);  //data = AB, AC, BC, ABC.
63                 randomData.resize(numComp,0);  //data = AB, AC, BC, ABC.
64                 rscoreFreq.resize(numComp);  
65                 uscoreFreq.resize(numComp);  
66                 rCumul.resize(numComp);  
67                 uCumul.resize(numComp);  
68                 userTreeScores.resize(numComp);  
69                 UScoreSig.resize(numComp); 
70                                 
71                 if (randomtree == "") {
72                         //get pscores for users trees
73                         for (int i = 0; i < T.size(); i++) {
74                                 userData = pars->getValues(T[i]);  //data = AB, AC, BC, ABC.
75
76                                 //output scores for each combination
77                                 for(int k = 0; k < numComp; k++) {
78
79                                         //update uscoreFreq
80                                         it = uscoreFreq[k].find(userData[k]);
81                                         if (it == uscoreFreq[k].end()) {//new score
82                                                 uscoreFreq[k][userData[k]] = 1;
83                                         }else{ uscoreFreq[k][userData[k]]++; }
84                                         
85                                         //add users score to valid scores
86                                         validScores[userData[k]] = userData[k];
87                                         
88                                         //save score for summary file
89                                         userTreeScores[k].push_back(userData[k]);
90                                 }
91                         }
92                         
93                         //get pscores for random trees
94                         for (int j = 0; j < iters; j++) {
95                                 //create new tree with same num nodes and leaves as users
96                                 randT = new Tree();
97
98                                 //create random relationships between nodes
99                                 randT->assembleRandomTree();
100
101                                 //get pscore of random tree
102                                 randomData = pars->getValues(randT);
103                                         
104                                 for(int r = 0; r < numComp; r++) {
105                                         //add trees pscore to map of scores
106                                         it2 = rscoreFreq[r].find(randomData[r]);
107                                         if (it2 != rscoreFreq[r].end()) {//already have that score
108                                                 rscoreFreq[r][randomData[r]]++;
109                                         }else{//first time we have seen this score
110                                                 rscoreFreq[r][randomData[r]] = 1;
111                                         }
112                         
113                                         //add randoms score to validscores
114                                         validScores[randomData[r]] = randomData[r];
115                                 }
116                                 
117                                 //update progress bar
118                                 reading->update(j);
119                                 
120                                 delete randT;
121                         }
122
123                 }else {
124                         //get pscores for random trees
125                         for (int j = 0; j < iters; j++) {
126                                 //create new tree with same num nodes and leaves as users
127                                 randT = new Tree();
128                                 //create random relationships between nodes
129
130                                 randT->assembleRandomTree();
131
132                                 //get pscore of random tree
133                                 randomData = pars->getValues(randT);
134                         
135                                 for(int r = 0; r < numComp; r++) {
136                                         //add trees pscore to map of scores
137                                         it2 = rscoreFreq[r].find(randomData[r]);
138                                         if (it2 != rscoreFreq[r].end()) {//already have that score
139                                                 rscoreFreq[r][randomData[r]]++;
140                                         }else{//first time we have seen this score
141                                                 rscoreFreq[r][randomData[r]] = 1;
142                                         }
143                         
144                                         //add randoms score to validscores
145                                         validScores[randomData[r]] = randomData[r];
146                                 }
147                                 
148                                 //update progress bar
149                                 reading->update(j);
150                                 
151                                 delete randT;
152                         }
153                 }
154
155                 for(int a = 0; a < numComp; a++) {
156                         float rcumul = 0.0000;
157                         float ucumul = 0.0000;
158                         //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print.
159                         for (it = validScores.begin(); it != validScores.end(); it++) { 
160                                 if (randomtree == "") {
161                                         it2 = uscoreFreq[a].find(it->first);
162                                         //user data has that score 
163                                         if (it2 != uscoreFreq[a].end()) { uscoreFreq[a][it->first] /= T.size(); ucumul+= it2->second;  }
164                                         else { uscoreFreq[a][it->first] = 0.0000; } //no user trees with that score
165                                         //make uCumul map
166                                         uCumul[a][it->first] = ucumul;
167                                 }
168                         
169                                 //make rscoreFreq map and rCumul
170                                 it2 = rscoreFreq[a].find(it->first);
171                                 //get percentage of random trees with that info
172                                 if (it2 != rscoreFreq[a].end()) {  rscoreFreq[a][it->first] /= iters; rcumul+= it2->second;  }
173                                 else { rscoreFreq[a][it->first] = 0.0000; } //no random trees with that score
174                                 rCumul[a][it->first] = rcumul;
175                         }
176                         
177                         //find the signifigance of each user trees score when compared to the random trees and save for printing the summary file
178                         for (int h = 0; h < userTreeScores[a].size(); h++) {
179                                 UScoreSig[a].push_back(rCumul[a][userTreeScores[a][h]]);
180                         }
181                 }
182                 
183                 //finish progress bar
184                 reading->finish();
185                 delete reading;
186
187                 
188                 printParsimonyFile();
189                 if (randomtree == "") { printUSummaryFile(); }
190                 
191                 //reset globaldata's treemap if you just did random distrib
192                 if (randomtree != "") {
193                         //memory leak prevention
194                         //if (globaldata->gTreemap != NULL) { delete globaldata->gTreemap;  }
195                         globaldata->gTreemap = savetmap;
196                 }
197                 
198                 //reset randomTree parameter to ""
199                 globaldata->setRandomTree("");
200                 //reset groups parameter
201                 globaldata->Groups.clear(); 
202                 
203                 return 0;
204                 
205         }
206         catch(exception& e) {
207                 cout << "Standard Error: " << e.what() << " has occurred in the ParsimonyCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
208                 exit(1);
209         }
210         catch(...) {
211                 cout << "An unknown error has occurred in the ParsimonyCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
212                 exit(1);
213         }
214 }
215
216 /***********************************************************/
217 void ParsimonyCommand::printParsimonyFile() {
218         try {
219                 vector<double> data;
220                 vector<string> tags;
221                 
222                 if (randomtree == "") {
223                         tags.push_back("Score"); tags.push_back("UserFreq"); tags.push_back("UserCumul"); tags.push_back("RandFreq"); tags.push_back("RandCumul");
224                 }else {
225                         tags.push_back("Score"); tags.push_back("RandFreq"); tags.push_back("RandCumul");
226                 }
227
228                 for(int a = 0; a < numComp; a++) {
229                         output->initFile(groupComb[a], tags);
230                         //print each line
231                         for (it = validScores.begin(); it != validScores.end(); it++) { 
232                                 if (randomtree == "") {
233                                         data.push_back(it->first);  data.push_back(uscoreFreq[a][it->first]); data.push_back(uCumul[a][it->first]); data.push_back(rscoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]); 
234                                 }else{
235                                         data.push_back(it->first);  data.push_back(rscoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]); 
236                                 }
237                                 output->output(data);
238                                 data.clear();
239                         } 
240                         output->resetFile();
241                 }
242         }
243         catch(exception& e) {
244                 cout << "Standard Error: " << e.what() << " has occurred in the ParsimonyCommand class Function printParsimonyFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
245                 exit(1);
246         }
247         catch(...) {
248                 cout << "An unknown error has occurred in the ParsimonyCommand class function printParsimonyFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
249                 exit(1);
250         }
251 }
252 /***********************************************************/
253 void ParsimonyCommand::printUSummaryFile() {
254         try {
255                 //column headers
256                 outSum << "Tree#" << '\t' << "Groups" << '\t'  <<  "ParsScore" << '\t' << "ParsSig" <<  endl;
257                 cout << "Tree#" << '\t' << "Groups" << '\t'  <<  "ParsScore" << '\t' << "ParsSig" <<  endl;
258                 
259                 //format output
260                 outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint);
261                 
262                 
263                 //print each line
264                 for (int i = 0; i< T.size(); i++) {
265                         for(int a = 0; a < numComp; a++) {
266                                 if (UScoreSig[a][i] > (1/(float)iters)) {
267                                         outSum << setprecision(6) << i+1 << '\t' << groupComb[a]  << '\t' << userTreeScores[a][i] << setprecision(globaldata->getIters().length()) << '\t' << UScoreSig[a][i] << endl;
268                                         cout << setprecision(6) << i+1 << '\t' << groupComb[a]  << '\t' << userTreeScores[a][i] << setprecision(globaldata->getIters().length()) << '\t' << UScoreSig[a][i] << endl;
269                                 }else {
270                                         outSum << setprecision(6) << i+1 << '\t' << groupComb[a] << '\t' << userTreeScores[a][i] << setprecision(globaldata->getIters().length())  << '\t' << "<" << (1/float(iters)) << endl;
271                                         cout << setprecision(6) << i+1 << '\t' << groupComb[a] << '\t' << userTreeScores[a][i] << setprecision(globaldata->getIters().length()) << '\t' << "<" << (1/float(iters)) << endl;
272                                 }
273                         }
274                 }
275                 
276                 outSum.close();
277         }
278         catch(exception& e) {
279                 cout << "Standard Error: " << e.what() << " has occurred in the ParsimonyCommand class Function printUSummaryFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
280                 exit(1);
281         }
282         catch(...) {
283                 cout << "An unknown error has occurred in the ParsimonyCommand class function printUSummaryFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
284                 exit(1);
285         }
286 }
287
288 /***********************************************************/
289 void ParsimonyCommand::getUserInput() {
290         try {
291         
292                 //create treemap
293                 tmap = new TreeMap();
294
295                 cout << "Please enter the number of groups you would like to analyze: ";
296                 cin >> numGroups;
297                         
298                 int num, count;
299                 count = 1;
300                 numEachGroup.resize(numGroups, 0);  
301                 
302                 for (int i = 1; i <= numGroups; i++) {
303                         cout << "Please enter the number of sequences in group " << i <<  ": ";
304                         cin >> num;
305                                 
306                         //set tmaps seqsPerGroup
307                         tmap->seqsPerGroup[toString(i)] = num;
308                         tmap->namesOfGroups.push_back(toString(i));
309                         
310                         //set tmaps namesOfSeqs
311                         for (int j = 0; j < num; j++) {
312                                 tmap->namesOfSeqs.push_back(toString(count));
313                                 tmap->treemap[toString(count)].groupname = toString(i);
314                                 count++;
315                         }
316                 }
317                 
318                 //clears buffer so next command doesn't have error
319                 string s;       
320                 getline(cin, s);
321                 
322                 //save tmap for later
323                 //memory leak prevention
324                 //if (globaldata->gTreemap != NULL) { delete globaldata->gTreemap;  }
325                 globaldata->gTreemap = tmap;
326                 
327         }
328         catch(exception& e) {
329                 cout << "Standard Error: " << e.what() << " has occurred in the ParsimonyCommand class Function getUserInput. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
330                 exit(1);
331         }
332         catch(...) {
333                 cout << "An unknown error has occurred in the ParsimonyCommand class function getUserInput. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
334                 exit(1);
335         }
336 }
337
338 /***********************************************************/
339
340