]> git.donarmstrong.com Git - mothur.git/blob - parsimonycommand.cpp
e081279489deb949427f30c5208cec0f8bb50290
[mothur.git] / parsimonycommand.cpp
1 /*
2  *  parsimonycommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 1/26/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "parsimonycommand.h"
11
12 /***********************************************************/
13 ParsimonyCommand::ParsimonyCommand(string option) {
14         try {
15                 globaldata = GlobalData::getInstance();
16                 abort = false;
17                 Groups.clear();
18                 
19                 //allow user to run help
20                 if(option == "help") { help(); abort = true; }
21                 
22                 else {
23                         //valid paramters for this command
24                         string Array[] =  {"random","groups","iters"};
25                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
26                         
27                         parser = new OptionParser();
28                         parser->parse(option, parameters);  delete parser;
29                         
30                         ValidParameters* validParameter = new ValidParameters();
31                 
32                         //check to make sure all parameters are valid for command
33                         for (it4 = parameters.begin(); it4 != parameters.end(); it4++) { 
34                                 if (validParameter->isValidParameter(it4->first, myArray, it4->second) != true) {  abort = true;  }
35                         }
36                         
37                         randomtree = validParameter->validFile(parameters, "random", false);            if (randomtree == "not found") { randomtree = ""; }
38                         
39                         //are you trying to use parsimony without reading a tree or saying you want random distribution
40                         if (randomtree == "")  {
41                                 if (globaldata->gTree.size() == 0) {
42                                         cout << "You must read a treefile and a groupfile or set the randomtree parameter to the output filename you wish, before you may execute the parsimony command." << endl; abort = true;  }
43                         }
44                                                 
45                         //check for optional parameter and set defaults
46                         // ...at some point should added some additional type checking...
47                         groups = validParameter->validFile(parameters, "groups", false);                        
48                         if (groups == "not found") { groups = ""; }
49                         else { 
50                                 splitAtDash(groups, Groups);
51                                 globaldata->Groups = Groups;
52                         }
53                                 
54                         itersString = validParameter->validFile(parameters, "iters", false);                    if (itersString == "not found") { itersString = "1000"; }
55                         convert(itersString, iters); 
56                         
57                         delete validParameter;
58                         
59                         if (abort == false) {
60                                 //randomtree will tell us if user had their own treefile or if they just want the random distribution
61                                 //user has entered their own tree
62                                 if (randomtree == "") { 
63                                         T = globaldata->gTree;
64                                         tmap = globaldata->gTreemap;
65                                         output = new ColumnFile(globaldata->getTreeFile()  +  ".parsimony", itersString);
66                                         sumFile = globaldata->getTreeFile() + ".psummary";
67                                         openOutputFile(sumFile, outSum);
68                                 }else { //user wants random distribution
69                                         savetmap = globaldata->gTreemap;
70                                         getUserInput();
71                                         output = new ColumnFile(randomtree, itersString);
72                                 }
73                                 
74                                 //set users groups to analyze
75                                 util = new SharedUtil();
76                                 util->setGroups(globaldata->Groups, tmap->namesOfGroups, allGroups, numGroups, "unweighted");   //sets the groups the user wants to analyze
77                                 util->getCombos(groupComb, globaldata->Groups, numComp);
78                                 
79                                 if (numGroups == 1) { numComp++; groupComb.push_back(allGroups); }
80                                 
81                                 pars = new Parsimony(tmap);
82                                 counter = 0;
83                                 
84                         }
85                         
86                 }
87
88         }
89         catch(exception& e) {
90                 cout << "Standard Error: " << e.what() << " has occurred in the ParsimonyCommand class Function ParsimonyCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
91                 exit(1);
92         }
93         catch(...) {
94                 cout << "An unknown error has occurred in the ParsimonyCommand class function ParsimonyCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
95                 exit(1);
96         }
97 }
98
99 //**********************************************************************************************************************
100
101 void ParsimonyCommand::help(){
102         try {
103                 cout << "The parsimony command can only be executed after a successful read.tree command, unless you use the random parameter." << "\n";
104                 cout << "The parsimony command parameters are random, groups and iters.  No parameters are required." << "\n";
105                 cout << "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed.  You must enter at least 1 valid group." << "\n";
106                 cout << "The group names are separated by dashes.  The iters parameter allows you to specify how many random trees you would like compared to your tree." << "\n";
107                 cout << "The parsimony command should be in the following format: parsimony(random=yourOutputFilename, groups=yourGroups, iters=yourIters)." << "\n";
108                 cout << "Example parsimony(random=out, iters=500)." << "\n";
109                 cout << "The default value for random is "" (meaning you want to use the trees in your inputfile, randomtree=out means you just want the random distribution of trees outputted to out.rd_parsimony)," << "\n";
110                 cout << "and iters is 1000.  The parsimony command output two files: .parsimony and .psummary their descriptions are in the manual." << "\n";
111                 cout << "Note: No spaces between parameter labels (i.e. random), '=' and parameters (i.e.yourOutputFilename)." << "\n" << "\n";
112         }
113         catch(exception& e) {
114                 cout << "Standard Error: " << e.what() << " has occurred in the ParsimonyCommand class Function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
115                 exit(1);
116         }
117         catch(...) {
118                 cout << "An unknown error has occurred in the ParsimonyCommand class function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
119                 exit(1);
120         }       
121 }
122
123
124 /***********************************************************/
125 int ParsimonyCommand::execute() {
126         try {
127         
128                 if (abort == true) { return 0; }
129         
130                 Progress* reading;
131                 reading = new Progress("Comparing to random:", iters);
132                 
133                 //get pscore for users tree
134                 userData.resize(numComp,0);  //data = AB, AC, BC, ABC.
135                 randomData.resize(numComp,0);  //data = AB, AC, BC, ABC.
136                 rscoreFreq.resize(numComp);  
137                 uscoreFreq.resize(numComp);  
138                 rCumul.resize(numComp);  
139                 uCumul.resize(numComp);  
140                 userTreeScores.resize(numComp);  
141                 UScoreSig.resize(numComp); 
142                                 
143                 if (randomtree == "") {
144                         //get pscores for users trees
145                         for (int i = 0; i < T.size(); i++) {
146                                 userData = pars->getValues(T[i]);  //data = AB, AC, BC, ABC.
147
148                                 //output scores for each combination
149                                 for(int k = 0; k < numComp; k++) {
150
151                                         //update uscoreFreq
152                                         it = uscoreFreq[k].find(userData[k]);
153                                         if (it == uscoreFreq[k].end()) {//new score
154                                                 uscoreFreq[k][userData[k]] = 1;
155                                         }else{ uscoreFreq[k][userData[k]]++; }
156                                         
157                                         //add users score to valid scores
158                                         validScores[userData[k]] = userData[k];
159                                         
160                                         //save score for summary file
161                                         userTreeScores[k].push_back(userData[k]);
162                                 }
163                         }
164                         
165                         //get pscores for random trees
166                         for (int j = 0; j < iters; j++) {
167                                 //create new tree with same num nodes and leaves as users
168                                 randT = new Tree();
169
170                                 //create random relationships between nodes
171                                 randT->assembleRandomTree();
172
173                                 //get pscore of random tree
174                                 randomData = pars->getValues(randT);
175                                         
176                                 for(int r = 0; r < numComp; r++) {
177                                         //add trees pscore to map of scores
178                                         it2 = rscoreFreq[r].find(randomData[r]);
179                                         if (it2 != rscoreFreq[r].end()) {//already have that score
180                                                 rscoreFreq[r][randomData[r]]++;
181                                         }else{//first time we have seen this score
182                                                 rscoreFreq[r][randomData[r]] = 1;
183                                         }
184                         
185                                         //add randoms score to validscores
186                                         validScores[randomData[r]] = randomData[r];
187                                 }
188                                 
189                                 //update progress bar
190                                 reading->update(j);
191                                 
192                                 delete randT;
193                         }
194
195                 }else {
196                         //get pscores for random trees
197                         for (int j = 0; j < iters; j++) {
198                                 //create new tree with same num nodes and leaves as users
199                                 randT = new Tree();
200                                 //create random relationships between nodes
201
202                                 randT->assembleRandomTree();
203
204                                 //get pscore of random tree
205                                 randomData = pars->getValues(randT);
206                         
207                                 for(int r = 0; r < numComp; r++) {
208                                         //add trees pscore to map of scores
209                                         it2 = rscoreFreq[r].find(randomData[r]);
210                                         if (it2 != rscoreFreq[r].end()) {//already have that score
211                                                 rscoreFreq[r][randomData[r]]++;
212                                         }else{//first time we have seen this score
213                                                 rscoreFreq[r][randomData[r]] = 1;
214                                         }
215                         
216                                         //add randoms score to validscores
217                                         validScores[randomData[r]] = randomData[r];
218                                 }
219                                 
220                                 //update progress bar
221                                 reading->update(j);
222                                 
223                                 delete randT;
224                         }
225                 }
226
227                 for(int a = 0; a < numComp; a++) {
228                         float rcumul = 0.0000;
229                         float ucumul = 0.0000;
230                         //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print.
231                         for (it = validScores.begin(); it != validScores.end(); it++) { 
232                                 if (randomtree == "") {
233                                         it2 = uscoreFreq[a].find(it->first);
234                                         //user data has that score 
235                                         if (it2 != uscoreFreq[a].end()) { uscoreFreq[a][it->first] /= T.size(); ucumul+= it2->second;  }
236                                         else { uscoreFreq[a][it->first] = 0.0000; } //no user trees with that score
237                                         //make uCumul map
238                                         uCumul[a][it->first] = ucumul;
239                                 }
240                         
241                                 //make rscoreFreq map and rCumul
242                                 it2 = rscoreFreq[a].find(it->first);
243                                 //get percentage of random trees with that info
244                                 if (it2 != rscoreFreq[a].end()) {  rscoreFreq[a][it->first] /= iters; rcumul+= it2->second;  }
245                                 else { rscoreFreq[a][it->first] = 0.0000; } //no random trees with that score
246                                 rCumul[a][it->first] = rcumul;
247                         }
248                         
249                         //find the signifigance of each user trees score when compared to the random trees and save for printing the summary file
250                         for (int h = 0; h < userTreeScores[a].size(); h++) {
251                                 UScoreSig[a].push_back(rCumul[a][userTreeScores[a][h]]);
252                         }
253                 }
254                 
255                 //finish progress bar
256                 reading->finish();
257                 delete reading;
258
259                 
260                 printParsimonyFile();
261                 if (randomtree == "") { printUSummaryFile(); }
262                 
263                 //reset globaldata's treemap if you just did random distrib
264                 if (randomtree != "") {
265                         //memory leak prevention
266                         //if (globaldata->gTreemap != NULL) { delete globaldata->gTreemap;  }
267                         globaldata->gTreemap = savetmap;
268                 }
269                 
270                 //reset groups parameter
271                 globaldata->Groups.clear(); 
272                 
273                 return 0;
274                 
275         }
276         catch(exception& e) {
277                 cout << "Standard Error: " << e.what() << " has occurred in the ParsimonyCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
278                 exit(1);
279         }
280         catch(...) {
281                 cout << "An unknown error has occurred in the ParsimonyCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
282                 exit(1);
283         }
284 }
285
286 /***********************************************************/
287 void ParsimonyCommand::printParsimonyFile() {
288         try {
289                 vector<double> data;
290                 vector<string> tags;
291                 
292                 if (randomtree == "") {
293                         tags.push_back("Score"); tags.push_back("UserFreq"); tags.push_back("UserCumul"); tags.push_back("RandFreq"); tags.push_back("RandCumul");
294                 }else {
295                         tags.push_back("Score"); tags.push_back("RandFreq"); tags.push_back("RandCumul");
296                 }
297
298                 for(int a = 0; a < numComp; a++) {
299                         output->initFile(groupComb[a], tags);
300                         //print each line
301                         for (it = validScores.begin(); it != validScores.end(); it++) { 
302                                 if (randomtree == "") {
303                                         data.push_back(it->first);  data.push_back(uscoreFreq[a][it->first]); data.push_back(uCumul[a][it->first]); data.push_back(rscoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]); 
304                                 }else{
305                                         data.push_back(it->first);  data.push_back(rscoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]); 
306                                 }
307                                 output->output(data);
308                                 data.clear();
309                         } 
310                         output->resetFile();
311                 }
312         }
313         catch(exception& e) {
314                 cout << "Standard Error: " << e.what() << " has occurred in the ParsimonyCommand class Function printParsimonyFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
315                 exit(1);
316         }
317         catch(...) {
318                 cout << "An unknown error has occurred in the ParsimonyCommand class function printParsimonyFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
319                 exit(1);
320         }
321 }
322 /***********************************************************/
323 void ParsimonyCommand::printUSummaryFile() {
324         try {
325                 //column headers
326                 outSum << "Tree#" << '\t' << "Groups" << '\t'  <<  "ParsScore" << '\t' << "ParsSig" <<  endl;
327                 cout << "Tree#" << '\t' << "Groups" << '\t'  <<  "ParsScore" << '\t' << "ParsSig" <<  endl;
328                 
329                 //format output
330                 outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint);
331                 
332                 
333                 //print each line
334                 for (int i = 0; i< T.size(); i++) {
335                         for(int a = 0; a < numComp; a++) {
336                                 if (UScoreSig[a][i] > (1/(float)iters)) {
337                                         outSum << setprecision(6) << i+1 << '\t' << groupComb[a]  << '\t' << userTreeScores[a][i] << setprecision(itersString.length()) << '\t' << UScoreSig[a][i] << endl;
338                                         cout << setprecision(6) << i+1 << '\t' << groupComb[a]  << '\t' << userTreeScores[a][i] << setprecision(itersString.length()) << '\t' << UScoreSig[a][i] << endl;
339                                 }else {
340                                         outSum << setprecision(6) << i+1 << '\t' << groupComb[a] << '\t' << userTreeScores[a][i] << setprecision(itersString.length())  << '\t' << "<" << (1/float(iters)) << endl;
341                                         cout << setprecision(6) << i+1 << '\t' << groupComb[a] << '\t' << userTreeScores[a][i] << setprecision(itersString.length()) << '\t' << "<" << (1/float(iters)) << endl;
342                                 }
343                         }
344                 }
345                 
346                 outSum.close();
347         }
348         catch(exception& e) {
349                 cout << "Standard Error: " << e.what() << " has occurred in the ParsimonyCommand class Function printUSummaryFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
350                 exit(1);
351         }
352         catch(...) {
353                 cout << "An unknown error has occurred in the ParsimonyCommand class function printUSummaryFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
354                 exit(1);
355         }
356 }
357
358 /***********************************************************/
359 void ParsimonyCommand::getUserInput() {
360         try {
361         
362                 //create treemap
363                 tmap = new TreeMap();
364
365                 cout << "Please enter the number of groups you would like to analyze: ";
366                 cin >> numGroups;
367                         
368                 int num, count;
369                 count = 1;
370                 numEachGroup.resize(numGroups, 0);  
371                 
372                 for (int i = 1; i <= numGroups; i++) {
373                         cout << "Please enter the number of sequences in group " << i <<  ": ";
374                         cin >> num;
375                                 
376                         //set tmaps seqsPerGroup
377                         tmap->seqsPerGroup[toString(i)] = num;
378                         tmap->namesOfGroups.push_back(toString(i));
379                         
380                         //set tmaps namesOfSeqs
381                         for (int j = 0; j < num; j++) {
382                                 tmap->namesOfSeqs.push_back(toString(count));
383                                 tmap->treemap[toString(count)].groupname = toString(i);
384                                 count++;
385                         }
386                 }
387                 
388                 //clears buffer so next command doesn't have error
389                 string s;       
390                 getline(cin, s);
391                 
392                 //save tmap for later
393                 //memory leak prevention
394                 //if (globaldata->gTreemap != NULL) { delete globaldata->gTreemap;  }
395                 globaldata->gTreemap = tmap;
396                 globaldata->Treenames = tmap->namesOfSeqs; 
397                 
398         }
399         catch(exception& e) {
400                 cout << "Standard Error: " << e.what() << " has occurred in the ParsimonyCommand class Function getUserInput. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
401                 exit(1);
402         }
403         catch(...) {
404                 cout << "An unknown error has occurred in the ParsimonyCommand class function getUserInput. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
405                 exit(1);
406         }
407 }
408
409 /***********************************************************/
410
411