]> git.donarmstrong.com Git - mothur.git/blob - parsimonycommand.cpp
fixed parsimony for groups
[mothur.git] / parsimonycommand.cpp
1 /*
2  *  parsimonycommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 1/26/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "parsimonycommand.h"
11
12 /***********************************************************/
13 ParsimonyCommand::ParsimonyCommand() {
14         try {
15                 globaldata = GlobalData::getInstance();
16                 
17                 //randomtree will tell us if user had their own treefile or if they just want the random distribution
18                 randomtree = globaldata->getRandomTree();
19                 
20                 //user has entered their own tree
21                 if (randomtree == "") { 
22                         T = globaldata->gTree;
23                         tmap = globaldata->gTreemap;
24                         parsFile = globaldata->getTreeFile() + ".parsimony";
25                         openOutputFile(parsFile, out);
26                         sumFile = globaldata->getTreeFile() + ".psummary";
27                         openOutputFile(sumFile, outSum);
28                         distFile = globaldata->getTreeFile() + ".pdistrib";
29                         openOutputFile(distFile, outDist);
30                         //set users groups to analyze
31                         setGroups();
32                 }else { //user wants random distribution
33                         savetmap = globaldata->gTreemap;
34                         getUserInput();
35                         parsFile = randomtree + ".rd_parsimony";
36                         openOutputFile(parsFile, out);
37                 }
38                 
39                 convert(globaldata->getIters(), iters);  //how many random trees to generate
40                 pars = new Parsimony(tmap);
41
42         }
43         catch(exception& e) {
44                 cout << "Standard Error: " << e.what() << " has occurred in the ParsimonyCommand class Function ParsimonyCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
45                 exit(1);
46         }
47         catch(...) {
48                 cout << "An unknown error has occurred in the ParsimonyCommand class function ParsimonyCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
49                 exit(1);
50         }
51 }
52 /***********************************************************/
53 int ParsimonyCommand::execute() {
54         try {
55                 
56                 //get pscore for users tree
57                 userData.resize(1,0);  //data[0] = pscore.
58                 randomData.resize(1,0);  //data[0] = pscore.
59                 
60                 //format output
61                 outDist.setf(ios::fixed, ios::floatfield); outDist.setf(ios::showpoint);
62                 outDist << "RandomTree#" << '\t' << "ParsScore" << endl;
63                 
64                 if (randomtree == "") {
65                         copyUserTree = new Tree();
66                         //get pscores for users trees
67                         for (int i = 0; i < T.size(); i++) {
68                                 //copy users tree so that you can redo pgroups 
69                                 copyUserTree->getCopy(T[i]);
70                                 cout << "Processing tree " << i+1 << endl;
71                                 userData = pars->getValues(copyUserTree);  //userData[0] = pscore
72                                 cout << "Tree " << i+1 << " parsimony score = " << userData[0] << endl;
73                                 //update uscoreFreq
74                                 it = uscoreFreq.find(userData[0]);
75                                 if (it == uscoreFreq.end()) {//new score
76                                         uscoreFreq[userData[0]] = 1;
77                                 }else{ uscoreFreq[userData[0]]++; }
78                         
79                                 //add users score to valid scores
80                                 validScores[userData[0]] = userData[0];
81                                 
82                                 //save score for summary file
83                                 userTreeScores.push_back(userData[0]);
84                         
85                         }
86                         
87                         //get pscores for random trees
88                         for (int j = 0; j < iters; j++) {
89                                 //create new tree with same num nodes and leaves as users
90                                 randT = new Tree();
91                                 //create random relationships between nodes
92                                 randT->assembleRandomTree();
93                                 //get pscore of random tree
94                                 randomData = pars->getValues(randT);
95                         
96                                 //add trees pscore to map of scores
97                                 it2 = rscoreFreq.find(randomData[0]);
98                                 if (it2 != rscoreFreq.end()) {//already have that score
99                                         rscoreFreq[randomData[0]]++;
100                                 }else{//first time we have seen this score
101                                         rscoreFreq[randomData[0]] = 1;
102                                 }
103                         
104                                 //add randoms score to validscores
105                                 validScores[randomData[0]] = randomData[0];
106                                 
107                                 //output info to pdistrib file
108                                 outDist << j+1 << '\t'<< '\t' << randomData[0] << endl;
109                                         
110                                 delete randT;
111                         }
112                 }else {
113                         //get pscores for random trees
114                         for (int j = 0; j < iters; j++) {
115                                 //create new tree with same num nodes and leaves as users
116                                 randT = new Tree();
117                                 //create random relationships between nodes
118                                 randT->assembleRandomTree();
119                                 //get pscore of random tree
120                                 randomData = pars->getValues(randT);
121                         
122                                 //add trees pscore to map of scores
123                                 it2 = rscoreFreq.find(randomData[0]);
124                                 if (it2 != rscoreFreq.end()) {//already have that score
125                                         rscoreFreq[randomData[0]]++;
126                                 }else{//first time we have seen this score
127                                         rscoreFreq[randomData[0]] = 1;
128                                 }
129                         
130                                 //add randoms score to validscores
131                                 validScores[randomData[0]] = randomData[0];
132                                         
133                                 delete randT;
134                         }
135                 }
136                 
137                 float rcumul = 0.0000;
138                 float ucumul = 0.0000;
139                 
140                 //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print.
141                 for (it = validScores.begin(); it != validScores.end(); it++) { 
142                         if (randomtree == "") {
143                                 it2 = uscoreFreq.find(it->first);
144                                 //user data has that score 
145                                 if (it2 != uscoreFreq.end()) { uscoreFreq[it->first] /= T.size(); ucumul+= it2->second;  }
146                                 else { uscoreFreq[it->first] = 0.0000; } //no user trees with that score
147                                 //make uCumul map
148                                 uCumul[it->first] = ucumul;
149                         }
150                         
151                         //make rscoreFreq map and rCumul
152                         it2 = rscoreFreq.find(it->first);
153                         //get percentage of random trees with that info
154                         if (it2 != rscoreFreq.end()) {  rscoreFreq[it->first] /= iters; rcumul+= it2->second;  }
155                         else { rscoreFreq[it->first] = 0.0000; } //no random trees with that score
156                         rCumul[it->first] = rcumul;
157                 }
158                 
159                 //find the signifigance of each user trees score when compared to the random trees and save for printing the summary file
160                 for (int h = 0; h < userTreeScores.size(); h++) {
161                         UScoreSig.push_back(rCumul[userTreeScores[h]]);
162                 }
163
164                 printParsimonyFile();
165                 printUSummaryFile();
166                 
167                 //reset globaldata's treemap if you just did random distrib
168                 if (randomtree != "") { globaldata->gTreemap = savetmap; }
169                 
170                 //reset randomTree parameter to ""
171                 globaldata->setRandomTree("");
172                 //reset groups parameter
173                 globaldata->Groups.clear();
174                 
175                 return 0;
176                 
177         }
178         catch(exception& e) {
179                 cout << "Standard Error: " << e.what() << " has occurred in the ParsimonyCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
180                 exit(1);
181         }
182         catch(...) {
183                 cout << "An unknown error has occurred in the ParsimonyCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
184                 exit(1);
185         }
186 }
187
188 /***********************************************************/
189 void ParsimonyCommand::printParsimonyFile() {
190         try {
191                 //column headers
192                 if (randomtree == "") {
193                         out << "Score" << '\t' << "UserFreq" << '\t' << "UserCumul" << '\t' << "RandFreq" << '\t' << "RandCumul" << endl;
194                 }else {
195                         out << "Score" << '\t' << "RandFreq" << '\t' << "RandCumul" << endl;
196                 }
197                 
198                 //format output
199                 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
200                 
201                 //print each line
202                 for (it = validScores.begin(); it != validScores.end(); it++) { 
203                         if (randomtree == "") {
204                                 out << setprecision(6) << it->first << '\t' << '\t' << uscoreFreq[it->first] << '\t' << uCumul[it->first] << '\t' << rscoreFreq[it->first] << '\t' << rCumul[it->first] << endl; 
205                         }else{
206                                 out << setprecision(6) << it->first << '\t' << '\t' << rscoreFreq[it->first] << '\t' << rCumul[it->first] << endl;      
207                         }
208                 } 
209                 
210                 out.close();
211                 
212         }
213         catch(exception& e) {
214                 cout << "Standard Error: " << e.what() << " has occurred in the ParsimonyCommand class Function printParsimonyFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
215                 exit(1);
216         }
217         catch(...) {
218                 cout << "An unknown error has occurred in the ParsimonyCommand class function printParsimonyFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
219                 exit(1);
220         }
221 }
222 /***********************************************************/
223 void ParsimonyCommand::printUSummaryFile() {
224         try {
225                 //column headers
226                 outSum << "Tree#" << '\t'  <<  "ParsScore" << '\t' << '\t' << "ParsSig" <<  endl;
227                 
228                 //format output
229                 outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint);
230                 
231                 //print each line
232                 for (int i = 0; i< T.size(); i++) {
233                         outSum << setprecision(6) << i+1 << '\t' << '\t' << userTreeScores[i] << '\t' << UScoreSig[i] << endl; 
234                 }
235                 
236                 outSum.close();
237         }
238         catch(exception& e) {
239                 cout << "Standard Error: " << e.what() << " has occurred in the ParsimonyCommand class Function printUSummaryFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
240                 exit(1);
241         }
242         catch(...) {
243                 cout << "An unknown error has occurred in the ParsimonyCommand class function printUSummaryFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
244                 exit(1);
245         }
246 }
247
248 /***********************************************************/
249 void ParsimonyCommand::getUserInput() {
250         try {
251         
252                 //create treemap
253                 tmap = new TreeMap();
254
255                 cout << "Please enter the number of groups you would like to analyze: ";
256                 cin >> numGroups;
257                         
258                 int num, count;
259                 count = 1;
260                 numEachGroup.resize(numGroups, 0);  
261                 
262                 for (int i = 1; i <= numGroups; i++) {
263                         cout << "Please enter the number of sequences in group " << i <<  ": ";
264                         cin >> num;
265                                 
266                         //set tmaps seqsPerGroup
267                         tmap->seqsPerGroup[toString(i)] = num;
268                         
269                         //set tmaps namesOfSeqs
270                         for (int j = 0; j < num; j++) {
271                                 tmap->namesOfSeqs.push_back(toString(count));
272                                 tmap->treemap[toString(count)].groupname = toString(i);
273                                 count++;
274                         }
275                 }
276                 
277                 //clears buffer so next command doesn't have error
278                 string s;       
279                 getline(cin, s);
280                 
281                 //save tmap for later
282                 globaldata->gTreemap = tmap;
283                 
284         }
285         catch(exception& e) {
286                 cout << "Standard Error: " << e.what() << " has occurred in the ParsimonyCommand class Function getUserInput. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
287                 exit(1);
288         }
289         catch(...) {
290                 cout << "An unknown error has occurred in the ParsimonyCommand class function getUserInput. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
291                 exit(1);
292         }
293 }
294 /***********************************************************/
295
296 void ParsimonyCommand::setGroups() {
297         try {
298                 //if the user has not entered specific groups to analyze then do them all
299                 if (globaldata->Groups.size() != 0) {
300                         //check that groups are valid
301                         for (int i = 0; i < globaldata->Groups.size(); i++) {
302                                 if (tmap->isValidGroup(globaldata->Groups[i]) != true) {
303                                         cout << globaldata->Groups[i] << " is not a valid group, and will be disregarded." << endl;
304                                         // erase the invalid group from globaldata->Groups
305                                         globaldata->Groups.erase (globaldata->Groups.begin()+i);
306                                 }
307                         }
308                         
309                         //if the user only entered invalid groups
310                         if (globaldata->Groups.size() == 0) { 
311                                 cout << "When using the groups parameter you must have at least 1 valid group. I will run the command using all the groups in your groupfile." << endl; 
312                                 for (int i = 0; i < tmap->namesOfGroups.size(); i++) {
313                                         globaldata->Groups.push_back(tmap->namesOfGroups[i]);
314                                 }
315                         }
316                                         
317                 }else {
318                         for (int i = 0; i < tmap->namesOfGroups.size(); i++) {
319                                 globaldata->Groups.push_back(tmap->namesOfGroups[i]);
320                         }
321                 }
322         }
323         catch(exception& e) {
324                 cout << "Standard Error: " << e.what() << " has occurred in the ParsimonyCommand class Function setGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
325                 exit(1);
326         }
327         catch(...) {
328                 cout << "An unknown error has occurred in the ParsimonyCommand class function setGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
329                 exit(1);
330         }               
331
332 }
333 /*****************************************************************/
334
335