]> git.donarmstrong.com Git - mothur.git/blob - parsimonycommand.cpp
fixed unweighted calculator
[mothur.git] / parsimonycommand.cpp
1 /*
2  *  parsimonycommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 1/26/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "parsimonycommand.h"
11
12 /***********************************************************/
13 ParsimonyCommand::ParsimonyCommand() {
14         try {
15                 globaldata = GlobalData::getInstance();
16                 
17                 //randomtree will tell us if user had their own treefile or if they just want the random distribution
18                 randomtree = globaldata->getRandomTree();
19                 
20                 //user has entered their own tree
21                 if (randomtree == "") { 
22                         T = globaldata->gTree;
23                         tmap = globaldata->gTreemap;
24                         parsFile = globaldata->getTreeFile() + ".parsimony";
25                         openOutputFile(parsFile, out);
26                         sumFile = globaldata->getTreeFile() + ".psummary";
27                         openOutputFile(sumFile, outSum);
28                         distFile = globaldata->getTreeFile() + ".pdistrib";
29                         openOutputFile(distFile, outDist);
30                         //set users groups to analyze
31                         setGroups();
32                 }else { //user wants random distribution
33                         savetmap = globaldata->gTreemap;
34                         getUserInput();
35                         parsFile = randomtree + ".rd_parsimony";
36                         openOutputFile(parsFile, out);
37                 }
38                 
39                 convert(globaldata->getIters(), iters);  //how many random trees to generate
40                 pars = new Parsimony(tmap);
41
42         }
43         catch(exception& e) {
44                 cout << "Standard Error: " << e.what() << " has occurred in the ParsimonyCommand class Function ParsimonyCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
45                 exit(1);
46         }
47         catch(...) {
48                 cout << "An unknown error has occurred in the ParsimonyCommand class function ParsimonyCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
49                 exit(1);
50         }
51 }
52 /***********************************************************/
53 int ParsimonyCommand::execute() {
54         try {
55                 
56                 //get pscore for users tree
57                 userData.resize(1,0);  //data[0] = pscore.
58                 randomData.resize(1,0);  //data[0] = pscore.
59                 
60                 //format output
61                 outDist.setf(ios::fixed, ios::floatfield); outDist.setf(ios::showpoint);
62                 outDist << "RandomTree#" << '\t' << "ParsScore" << endl;
63                 
64                 if (randomtree == "") {
65                         //get pscores for users trees
66                         for (int i = 0; i < T.size(); i++) {
67                                 cout << "Processing tree " << i+1 << endl;
68                                 userData = pars->getValues(T[i]);  //userData[0] = pscore
69                                 cout << "Tree " << i+1 << " parsimony score = " << userData[0] << endl;
70                                 //update uscoreFreq
71                                 it = uscoreFreq.find(userData[0]);
72                                 if (it == uscoreFreq.end()) {//new score
73                                         uscoreFreq[userData[0]] = 1;
74                                 }else{ uscoreFreq[userData[0]]++; }
75                         
76                                 //add users score to valid scores
77                                 validScores[userData[0]] = userData[0];
78                                 
79                                 //save score for summary file
80                                 userTreeScores.push_back(userData[0]);
81                         
82                         }
83                         
84                         //get pscores for random trees
85                         for (int j = 0; j < iters; j++) {
86                                 //create new tree with same num nodes and leaves as users
87                                 randT = new Tree();
88                                 //create random relationships between nodes
89                                 randT->assembleRandomTree();
90                                 //get pscore of random tree
91                                 randomData = pars->getValues(randT);
92                         
93                                 //add trees pscore to map of scores
94                                 it2 = rscoreFreq.find(randomData[0]);
95                                 if (it2 != rscoreFreq.end()) {//already have that score
96                                         rscoreFreq[randomData[0]]++;
97                                 }else{//first time we have seen this score
98                                         rscoreFreq[randomData[0]] = 1;
99                                 }
100                         
101                                 //add randoms score to validscores
102                                 validScores[randomData[0]] = randomData[0];
103                                 
104                                 //output info to pdistrib file
105                                 outDist << j+1 << '\t'<< '\t' << randomData[0] << endl;
106                                         
107                                 delete randT;
108                         }
109                 }else {
110                         //get pscores for random trees
111                         for (int j = 0; j < iters; j++) {
112                                 //create new tree with same num nodes and leaves as users
113                                 randT = new Tree();
114                                 //create random relationships between nodes
115                                 randT->assembleRandomTree();
116                                 //get pscore of random tree
117                                 randomData = pars->getValues(randT);
118                         
119                                 //add trees pscore to map of scores
120                                 it2 = rscoreFreq.find(randomData[0]);
121                                 if (it2 != rscoreFreq.end()) {//already have that score
122                                         rscoreFreq[randomData[0]]++;
123                                 }else{//first time we have seen this score
124                                         rscoreFreq[randomData[0]] = 1;
125                                 }
126                         
127                                 //add randoms score to validscores
128                                 validScores[randomData[0]] = randomData[0];
129                                         
130                                 delete randT;
131                         }
132                 }
133                 
134                 float rcumul = 0.0000;
135                 float ucumul = 0.0000;
136                 
137                 //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print.
138                 for (it = validScores.begin(); it != validScores.end(); it++) { 
139                         if (randomtree == "") {
140                                 it2 = uscoreFreq.find(it->first);
141                                 //user data has that score 
142                                 if (it2 != uscoreFreq.end()) { uscoreFreq[it->first] /= T.size(); ucumul+= it2->second;  }
143                                 else { uscoreFreq[it->first] = 0.0000; } //no user trees with that score
144                                 //make uCumul map
145                                 uCumul[it->first] = ucumul;
146                         }
147                         
148                         //make rscoreFreq map and rCumul
149                         it2 = rscoreFreq.find(it->first);
150                         //get percentage of random trees with that info
151                         if (it2 != rscoreFreq.end()) {  rscoreFreq[it->first] /= iters; rcumul+= it2->second;  }
152                         else { rscoreFreq[it->first] = 0.0000; } //no random trees with that score
153                         rCumul[it->first] = rcumul;
154                 }
155                 
156                 //find the signifigance of each user trees score when compared to the random trees and save for printing the summary file
157                 for (int h = 0; h < userTreeScores.size(); h++) {
158                         UScoreSig.push_back(rCumul[userTreeScores[h]]);
159                 }
160
161                 printParsimonyFile();
162                 printUSummaryFile();
163                 
164                 //reset globaldata's treemap if you just did random distrib
165                 if (randomtree != "") { globaldata->gTreemap = savetmap; }
166                 
167                 //reset randomTree parameter to ""
168                 globaldata->setRandomTree("");
169                 //reset groups parameter
170                 globaldata->Groups.clear();
171                 
172                 return 0;
173                 
174         }
175         catch(exception& e) {
176                 cout << "Standard Error: " << e.what() << " has occurred in the ParsimonyCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
177                 exit(1);
178         }
179         catch(...) {
180                 cout << "An unknown error has occurred in the ParsimonyCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
181                 exit(1);
182         }
183 }
184
185 /***********************************************************/
186 void ParsimonyCommand::printParsimonyFile() {
187         try {
188                 //column headers
189                 if (randomtree == "") {
190                         out << "Score" << '\t' << "UserFreq" << '\t' << "UserCumul" << '\t' << "RandFreq" << '\t' << "RandCumul" << endl;
191                 }else {
192                         out << "Score" << '\t' << "RandFreq" << '\t' << "RandCumul" << endl;
193                 }
194                 
195                 //format output
196                 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
197                 
198                 //print each line
199                 for (it = validScores.begin(); it != validScores.end(); it++) { 
200                         if (randomtree == "") {
201                                 out << setprecision(6) << it->first << '\t' << '\t' << uscoreFreq[it->first] << '\t' << uCumul[it->first] << '\t' << rscoreFreq[it->first] << '\t' << rCumul[it->first] << endl; 
202                         }else{
203                                 out << setprecision(6) << it->first << '\t' << '\t' << rscoreFreq[it->first] << '\t' << rCumul[it->first] << endl;      
204                         }
205                 } 
206                 
207                 out.close();
208                 
209         }
210         catch(exception& e) {
211                 cout << "Standard Error: " << e.what() << " has occurred in the ParsimonyCommand class Function printParsimonyFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
212                 exit(1);
213         }
214         catch(...) {
215                 cout << "An unknown error has occurred in the ParsimonyCommand class function printParsimonyFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
216                 exit(1);
217         }
218 }
219 /***********************************************************/
220 void ParsimonyCommand::printUSummaryFile() {
221         try {
222                 //column headers
223                 outSum << "Tree#" << '\t'  <<  "ParsScore" << '\t' << '\t' << "ParsSig" <<  endl;
224                 
225                 //format output
226                 outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint);
227                 
228                 //print each line
229                 for (int i = 0; i< T.size(); i++) {
230                         outSum << setprecision(6) << i+1 << '\t' << '\t' << userTreeScores[i] << '\t' << UScoreSig[i] << endl; 
231                 }
232                 
233                 outSum.close();
234         }
235         catch(exception& e) {
236                 cout << "Standard Error: " << e.what() << " has occurred in the ParsimonyCommand class Function printUSummaryFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
237                 exit(1);
238         }
239         catch(...) {
240                 cout << "An unknown error has occurred in the ParsimonyCommand class function printUSummaryFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
241                 exit(1);
242         }
243 }
244
245 /***********************************************************/
246 void ParsimonyCommand::getUserInput() {
247         try {
248         
249                 //create treemap
250                 tmap = new TreeMap();
251
252                 cout << "Please enter the number of groups you would like to analyze: ";
253                 cin >> numGroups;
254                         
255                 int num, count;
256                 count = 1;
257                 numEachGroup.resize(numGroups, 0);  
258                 
259                 for (int i = 1; i <= numGroups; i++) {
260                         cout << "Please enter the number of sequences in group " << i <<  ": ";
261                         cin >> num;
262                                 
263                         //set tmaps seqsPerGroup
264                         tmap->seqsPerGroup[toString(i)] = num;
265                         
266                         //set tmaps namesOfSeqs
267                         for (int j = 0; j < num; j++) {
268                                 tmap->namesOfSeqs.push_back(toString(count));
269                                 tmap->treemap[toString(count)].groupname = toString(i);
270                                 count++;
271                         }
272                 }
273                 
274                 //clears buffer so next command doesn't have error
275                 string s;       
276                 getline(cin, s);
277                 
278                 //save tmap for later
279                 globaldata->gTreemap = tmap;
280                 
281         }
282         catch(exception& e) {
283                 cout << "Standard Error: " << e.what() << " has occurred in the ParsimonyCommand class Function getUserInput. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
284                 exit(1);
285         }
286         catch(...) {
287                 cout << "An unknown error has occurred in the ParsimonyCommand class function getUserInput. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
288                 exit(1);
289         }
290 }
291 /***********************************************************/
292
293 void ParsimonyCommand::setGroups() {
294         try {
295                 //if the user has not entered specific groups to analyze then do them all
296                 if (globaldata->Groups.size() != 0) {
297                         //check that groups are valid
298                         for (int i = 0; i < globaldata->Groups.size(); i++) {
299                                 if (tmap->isValidGroup(globaldata->Groups[i]) != true) {
300                                         cout << globaldata->Groups[i] << " is not a valid group, and will be disregarded." << endl;
301                                         // erase the invalid group from globaldata->Groups
302                                         globaldata->Groups.erase (globaldata->Groups.begin()+i);
303                                 }
304                         }
305                         
306                         //if the user only entered invalid groups
307                         if (globaldata->Groups.size() == 0) { 
308                                 cout << "When using the groups parameter you must have at least 1 valid group. I will run the command using all the groups in your groupfile." << endl; 
309                                 for (int i = 0; i < tmap->namesOfGroups.size(); i++) {
310                                         globaldata->Groups.push_back(tmap->namesOfGroups[i]);
311                                 }
312                         }
313                                         
314                 }else {
315                         for (int i = 0; i < tmap->namesOfGroups.size(); i++) {
316                                 globaldata->Groups.push_back(tmap->namesOfGroups[i]);
317                         }
318                 }
319         }
320         catch(exception& e) {
321                 cout << "Standard Error: " << e.what() << " has occurred in the ParsimonyCommand class Function setGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
322                 exit(1);
323         }
324         catch(...) {
325                 cout << "An unknown error has occurred in the ParsimonyCommand class function setGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
326                 exit(1);
327         }               
328
329 }
330 /*****************************************************************/
331
332