]> git.donarmstrong.com Git - mothur.git/blob - parsimonycommand.cpp
b879395b528b845805c622acd107e637e3af74f8
[mothur.git] / parsimonycommand.cpp
1 /*
2  *  parsimonycommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 1/26/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "parsimonycommand.h"
11
12 //**********************************************************************************************************************
13 vector<string> ParsimonyCommand::getValidParameters(){  
14         try {
15                 string Array[] =  {"random","groups","iters","outputdir","inputdir"};
16                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
17                 return myArray;
18         }
19         catch(exception& e) {
20                 m->errorOut(e, "ParsimonyCommand", "getValidParameters");
21                 exit(1);
22         }
23 }
24 //**********************************************************************************************************************
25 ParsimonyCommand::ParsimonyCommand(){   
26         try {
27                 //initialize outputTypes
28                 vector<string> tempOutNames;
29                 outputTypes["parsimony"] = tempOutNames;
30                 outputTypes["psummary"] = tempOutNames;
31         }
32         catch(exception& e) {
33                 m->errorOut(e, "ParsimonyCommand", "ParsimonyCommand");
34                 exit(1);
35         }
36 }
37 //**********************************************************************************************************************
38 vector<string> ParsimonyCommand::getRequiredParameters(){       
39         try {
40                 vector<string> myArray;
41                 return myArray;
42         }
43         catch(exception& e) {
44                 m->errorOut(e, "ParsimonyCommand", "getRequiredParameters");
45                 exit(1);
46         }
47 }
48 //**********************************************************************************************************************
49 vector<string> ParsimonyCommand::getRequiredFiles(){    
50         try {
51                 vector<string> myArray;
52                 return myArray;
53         }
54         catch(exception& e) {
55                 m->errorOut(e, "ParsimonyCommand", "getRequiredFiles");
56                 exit(1);
57         }
58 }
59 /***********************************************************/
60 ParsimonyCommand::ParsimonyCommand(string option)  {
61         try {
62                 globaldata = GlobalData::getInstance();
63                 abort = false;
64                 Groups.clear();
65                         
66                 //allow user to run help
67                 if(option == "help") { help(); abort = true; }
68                 
69                 else {
70                         //valid paramters for this command
71                         string Array[] =  {"random","groups","iters","outputdir","inputdir"};
72                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
73                         
74                         OptionParser parser(option);
75                         map<string, string> parameters = parser.getParameters();
76                         
77                         ValidParameters validParameter;
78                 
79                         //check to make sure all parameters are valid for command
80                         for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
81                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
82                         }
83                         
84                         //initialize outputTypes
85                         vector<string> tempOutNames;
86                         outputTypes["parsimony"] = tempOutNames;
87                         outputTypes["psummary"] = tempOutNames;
88                         
89                         randomtree = validParameter.validFile(parameters, "random", false);             if (randomtree == "not found") { randomtree = ""; }
90                         
91                         //are you trying to use parsimony without reading a tree or saying you want random distribution
92                         if (randomtree == "")  {
93                                 if (globaldata->gTree.size() == 0) {
94                                         m->mothurOut("You must read a treefile and a groupfile or set the randomtree parameter to the output filename you wish, before you may execute the parsimony command."); m->mothurOutEndLine(); abort = true;  }
95                         }
96                         
97                         //if the user changes the output directory command factory will send this info to us in the output parameter 
98                         string outputDir = validParameter.validFile(parameters, "outputdir", false);            if (outputDir == "not found"){  outputDir = ""; }
99                         
100                         //check for optional parameter and set defaults
101                         // ...at some point should added some additional type checking...
102                         groups = validParameter.validFile(parameters, "groups", false);                 
103                         if (groups == "not found") { groups = ""; globaldata->Groups.clear(); }
104                         else { 
105                                 m->splitAtDash(groups, Groups);
106                                 globaldata->Groups = Groups;
107                         }
108                                 
109                         itersString = validParameter.validFile(parameters, "iters", false);                     if (itersString == "not found") { itersString = "1000"; }
110                         convert(itersString, iters); 
111                                                 
112                         if (abort == false) {
113                                 //randomtree will tell us if user had their own treefile or if they just want the random distribution
114                                 //user has entered their own tree
115                                 if (randomtree == "") { 
116                                         T = globaldata->gTree;
117                                         tmap = globaldata->gTreemap;
118                                         
119                                         if(outputDir == "") { outputDir += m->hasPath(globaldata->getTreeFile()); }
120                                         output = new ColumnFile(outputDir + m->getSimpleName(globaldata->getTreeFile())  +  ".parsimony", itersString);
121                                         outputNames.push_back(outputDir + m->getSimpleName(globaldata->getTreeFile())  +  ".parsimony");
122                                         outputTypes["parsimony"].push_back(outputDir + m->getSimpleName(globaldata->getTreeFile())  +  ".parsimony");
123                                         
124                                         sumFile = outputDir + m->getSimpleName(globaldata->getTreeFile()) + ".psummary";
125                                         m->openOutputFile(sumFile, outSum);
126                                         outputNames.push_back(sumFile);
127                                         outputTypes["psummary"].push_back(sumFile);
128                                 }else { //user wants random distribution
129                                         savetmap = globaldata->gTreemap;
130                                         getUserInput();
131                                         
132                                         if(outputDir == "") { outputDir += m->hasPath(randomtree); }
133                                         output = new ColumnFile(outputDir+ m->getSimpleName(randomtree), itersString);
134                                         outputNames.push_back(outputDir+ m->getSimpleName(randomtree));
135                                         outputTypes["parsimony"].push_back(outputDir+ m->getSimpleName(randomtree));
136                                 }
137                                 
138                                 //set users groups to analyze
139                                 util = new SharedUtil();
140                                 util->setGroups(globaldata->Groups, tmap->namesOfGroups, allGroups, numGroups, "parsimony");    //sets the groups the user wants to analyze
141                                 util->getCombos(groupComb, globaldata->Groups, numComp);
142                                 
143                                 if (numGroups == 1) { numComp++; groupComb.push_back(allGroups); }
144                                 
145                                 pars = new Parsimony(tmap);
146                                 counter = 0;
147                                 
148                         }
149                         
150                 }
151
152         }
153         catch(exception& e) {
154                 m->errorOut(e, "ParsimonyCommand", "ParsimonyCommand");
155                 exit(1);
156         }
157 }
158
159 //**********************************************************************************************************************
160
161 void ParsimonyCommand::help(){
162         try {
163                 m->mothurOut("The parsimony command can only be executed after a successful read.tree command, unless you use the random parameter.\n");
164                 m->mothurOut("The parsimony command parameters are random, groups and iters.  No parameters are required.\n");
165                 m->mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed.  You must enter at least 1 valid group.\n");
166                 m->mothurOut("The group names are separated by dashes.  The iters parameter allows you to specify how many random trees you would like compared to your tree.\n");
167                 m->mothurOut("The parsimony command should be in the following format: parsimony(random=yourOutputFilename, groups=yourGroups, iters=yourIters).\n");
168                 m->mothurOut("Example parsimony(random=out, iters=500).\n");
169                 m->mothurOut("The default value for random is "" (meaning you want to use the trees in your inputfile, randomtree=out means you just want the random distribution of trees outputted to out.rd_parsimony),\n");
170                 m->mothurOut("and iters is 1000.  The parsimony command output two files: .parsimony and .psummary their descriptions are in the manual.\n");
171                 m->mothurOut("Note: No spaces between parameter labels (i.e. random), '=' and parameters (i.e.yourOutputFilename).\n\n");
172         }
173         catch(exception& e) {
174                 m->errorOut(e, "ParsimonyCommand", "help");
175                 exit(1);
176         }
177 }
178
179
180 /***********************************************************/
181 int ParsimonyCommand::execute() {
182         try {
183         
184                 if (abort == true) { return 0; }
185         
186                 Progress* reading;
187                 reading = new Progress("Comparing to random:", iters);
188                 
189                 if (m->control_pressed) { 
190                         delete reading; delete pars; delete util; delete output;
191                         if (randomtree == "") {  outSum.close();  }
192                         for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); } outputTypes.clear();
193                         globaldata->Groups.clear();
194                         return 0;
195                 }
196                         
197                 
198                 //get pscore for users tree
199                 userData.resize(numComp,0);  //data = AB, AC, BC, ABC.
200                 randomData.resize(numComp,0);  //data = AB, AC, BC, ABC.
201                 rscoreFreq.resize(numComp);  
202                 uscoreFreq.resize(numComp);  
203                 rCumul.resize(numComp);  
204                 uCumul.resize(numComp);  
205                 userTreeScores.resize(numComp);  
206                 UScoreSig.resize(numComp); 
207                                 
208                 if (randomtree == "") {
209                         //get pscores for users trees
210                         for (int i = 0; i < T.size(); i++) {
211                                 userData = pars->getValues(T[i]);  //data = AB, AC, BC, ABC.
212                                 
213                                 if (m->control_pressed) { 
214                                         delete reading; delete pars; delete util; delete output;
215                                         if (randomtree == "") {  outSum.close();  }
216                                         for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); } outputTypes.clear();
217                                         globaldata->Groups.clear();
218                                         return 0;
219                                 }
220
221
222                                 //output scores for each combination
223                                 for(int k = 0; k < numComp; k++) {
224
225                                         //update uscoreFreq
226                                         map<int,double>::iterator it = uscoreFreq[k].find(userData[k]);
227                                         if (it == uscoreFreq[k].end()) {//new score
228                                                 uscoreFreq[k][userData[k]] = 1;
229                                         }else{ uscoreFreq[k][userData[k]]++; }
230                                         
231                                         //add users score to valid scores
232                                         validScores[userData[k]] = userData[k];
233                                         
234                                         //save score for summary file
235                                         userTreeScores[k].push_back(userData[k]);
236                                 }
237                         }
238                         
239                         //get pscores for random trees
240                         for (int j = 0; j < iters; j++) {
241                                                                 
242                                 //create new tree with same num nodes and leaves as users
243                                 randT = new Tree();
244
245                                 //create random relationships between nodes
246                                 randT->assembleRandomTree();
247
248                                 //get pscore of random tree
249                                 randomData = pars->getValues(randT);
250                                 
251                                 if (m->control_pressed) { 
252                                         delete reading; delete pars; delete util; delete output; delete randT;
253                                         if (randomtree == "") {  outSum.close();  }
254                                         for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); } outputTypes.clear();
255                                         globaldata->Groups.clear();
256                                         return 0;
257                                 }
258                                         
259                                 for(int r = 0; r < numComp; r++) {
260                                         //add trees pscore to map of scores
261                                         map<int,double>::iterator it = rscoreFreq[r].find(randomData[r]);
262                                         if (it != rscoreFreq[r].end()) {//already have that score
263                                                 rscoreFreq[r][randomData[r]]++;
264                                         }else{//first time we have seen this score
265                                                 rscoreFreq[r][randomData[r]] = 1;
266                                         }
267                         
268                                         //add randoms score to validscores
269                                         validScores[randomData[r]] = randomData[r];
270                                 }
271                                 
272                                 //update progress bar
273                                 reading->update(j);
274                                 
275                                 delete randT;
276                         }
277
278                 }else {
279                         //get pscores for random trees
280                         for (int j = 0; j < iters; j++) {
281                                                                 
282                                 //create new tree with same num nodes and leaves as users
283                                 randT = new Tree();
284                                 //create random relationships between nodes
285
286                                 randT->assembleRandomTree();
287                                 
288                                 if (m->control_pressed) { 
289                                         delete reading; delete pars; delete util; delete output; delete randT;
290                                         globaldata->gTreemap = savetmap; 
291                                         for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); } outputTypes.clear();
292                                         globaldata->Groups.clear();
293                                         return 0;
294                                 }
295
296
297                                 //get pscore of random tree
298                                 randomData = pars->getValues(randT);
299                                 
300                                 if (m->control_pressed) { 
301                                         delete reading; delete pars; delete util; delete output; delete randT;
302                                         globaldata->gTreemap = savetmap; 
303                                         for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); } outputTypes.clear();
304                                         globaldata->Groups.clear();
305                                         return 0;
306                                 }
307                         
308                                 for(int r = 0; r < numComp; r++) {
309                                         //add trees pscore to map of scores
310                                         map<int,double>::iterator it = rscoreFreq[r].find(randomData[r]);
311                                         if (it != rscoreFreq[r].end()) {//already have that score
312                                                 rscoreFreq[r][randomData[r]]++;
313                                         }else{//first time we have seen this score
314                                                 rscoreFreq[r][randomData[r]] = 1;
315                                         }
316                         
317                                         //add randoms score to validscores
318                                         validScores[randomData[r]] = randomData[r];
319                                 }
320                                 
321                                 //update progress bar
322                                 reading->update(j);
323                                 
324                                 delete randT;
325                         }
326                 }
327
328                 for(int a = 0; a < numComp; a++) {
329                         float rcumul = 0.0000;
330                         float ucumul = 0.0000;
331                         //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print.
332                         for (map<int,double>::iterator it = validScores.begin(); it != validScores.end(); it++) { 
333                                 if (randomtree == "") {
334                                         map<int,double>::iterator it2 = uscoreFreq[a].find(it->first);
335                                         //user data has that score 
336                                         if (it2 != uscoreFreq[a].end()) { uscoreFreq[a][it->first] /= T.size(); ucumul+= it2->second;  }
337                                         else { uscoreFreq[a][it->first] = 0.0000; } //no user trees with that score
338                                         //make uCumul map
339                                         uCumul[a][it->first] = ucumul;
340                                 }
341                         
342                                 //make rscoreFreq map and rCumul
343                                 map<int,double>::iterator it2 = rscoreFreq[a].find(it->first);
344                                 //get percentage of random trees with that info
345                                 if (it2 != rscoreFreq[a].end()) {  rscoreFreq[a][it->first] /= iters; rcumul+= it2->second;  }
346                                 else { rscoreFreq[a][it->first] = 0.0000; } //no random trees with that score
347                                 rCumul[a][it->first] = rcumul;
348                         }
349                         
350                         //find the signifigance of each user trees score when compared to the random trees and save for printing the summary file
351                         for (int h = 0; h < userTreeScores[a].size(); h++) {
352                                 UScoreSig[a].push_back(rCumul[a][userTreeScores[a][h]]);
353                         }
354                 }
355                 
356                 if (m->control_pressed) { 
357                                 delete reading; delete pars; delete util; delete output;
358                                 if (randomtree == "") {  outSum.close();  }
359                                 else { globaldata->gTreemap = savetmap; }
360                                 for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); } outputTypes.clear();
361                                 globaldata->Groups.clear();
362                                 return 0;
363                 }
364                 
365                 //finish progress bar
366                 reading->finish();
367                 delete reading;
368
369                 
370                 printParsimonyFile();
371                 if (randomtree == "") { printUSummaryFile(); }
372                 
373                 //reset globaldata's treemap if you just did random distrib
374                 if (randomtree != "") {
375                         //memory leak prevention
376                         //if (globaldata->gTreemap != NULL) { delete globaldata->gTreemap;  }
377                         globaldata->gTreemap = savetmap;
378                 }
379                 
380                 //reset groups parameter
381                 globaldata->Groups.clear(); 
382                 
383                 if (m->control_pressed) { 
384                         delete pars; delete util; delete output;
385                         for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); } outputTypes.clear();
386                         return 0;
387                 }
388                 
389                 m->mothurOutEndLine();
390                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
391                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
392                 m->mothurOutEndLine();
393
394                 
395                 return 0;
396                 
397         }
398         catch(exception& e) {
399                 m->errorOut(e, "ParsimonyCommand", "execute");
400                 exit(1);
401         }
402 }
403
404 /***********************************************************/
405 void ParsimonyCommand::printParsimonyFile() {
406         try {
407                 vector<double> data;
408                 vector<string> tags;
409                 
410                 if (randomtree == "") {
411                         tags.push_back("Score"); tags.push_back("UserFreq"); tags.push_back("UserCumul"); tags.push_back("RandFreq"); tags.push_back("RandCumul");
412                 }else {
413                         tags.push_back("Score"); tags.push_back("RandFreq"); tags.push_back("RandCumul");
414                 }
415
416                 for(int a = 0; a < numComp; a++) {
417                         output->initFile(groupComb[a], tags);
418                         //print each line
419                         for (map<int,double>::iterator it = validScores.begin(); it != validScores.end(); it++) { 
420                                 if (randomtree == "") {
421                                         data.push_back(it->first);  data.push_back(uscoreFreq[a][it->first]); data.push_back(uCumul[a][it->first]); data.push_back(rscoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]); 
422                                 }else{
423                                         data.push_back(it->first);  data.push_back(rscoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]); 
424                                 }
425                                 output->output(data);
426                                 data.clear();
427                         } 
428                         output->resetFile();
429                 }
430         }
431         catch(exception& e) {
432                 m->errorOut(e, "ParsimonyCommand", "printParsimonyFile");
433                 exit(1);
434         }
435 }
436 /***********************************************************/
437 int ParsimonyCommand::printUSummaryFile() {
438         try {
439                 //column headers
440                 outSum << "Tree#" << '\t' << "Groups" << '\t'  <<  "ParsScore" << '\t' << "ParsSig" <<  endl;
441                 m->mothurOut("Tree#\tGroups\tParsScore\tParsSig"); m->mothurOutEndLine();
442                 
443                 //format output
444                 outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint);
445                 
446                 
447                 //print each line
448                 for (int i = 0; i< T.size(); i++) {
449                         for(int a = 0; a < numComp; a++) {
450                                 if (m->control_pressed) {  outSum.close(); return 0; }
451                                 if (UScoreSig[a][i] > (1/(float)iters)) {
452                                         outSum << setprecision(6) << i+1 << '\t' << groupComb[a]  << '\t' << userTreeScores[a][i] << setprecision(itersString.length()) << '\t' << UScoreSig[a][i] << endl;
453                                         cout << setprecision(6) << i+1 << '\t' << groupComb[a]  << '\t' << userTreeScores[a][i] << setprecision(itersString.length()) << '\t' << UScoreSig[a][i] << endl;
454                                         m->mothurOutJustToLog(toString(i+1) + "\t" + groupComb[a] + "\t" + toString(userTreeScores[a][i]) + "\t" + toString(UScoreSig[a][i])); m->mothurOutEndLine();
455                                 }else {
456                                         outSum << setprecision(6) << i+1 << '\t' << groupComb[a] << '\t' << userTreeScores[a][i] << setprecision(itersString.length())  << '\t' << "<" << (1/float(iters)) << endl;
457                                         cout << setprecision(6) << i+1 << '\t' << groupComb[a] << '\t' << userTreeScores[a][i] << setprecision(itersString.length()) << '\t' << "<" << (1/float(iters)) << endl;
458                                         m->mothurOutJustToLog(toString(i+1) + "\t" + groupComb[a] + "\t" + toString(userTreeScores[a][i]) + "\t" + toString((1/float(iters)))); m->mothurOutEndLine();
459                                 }
460                         }
461                 }
462                 
463                 outSum.close();
464                 return 0;
465         }
466         catch(exception& e) {
467                 m->errorOut(e, "ParsimonyCommand", "printUSummaryFile");
468                 exit(1);
469         }
470 }
471
472 /***********************************************************/
473 void ParsimonyCommand::getUserInput() {
474         try {
475         
476                 //create treemap
477                 tmap = new TreeMap();
478
479                 m->mothurOut("Please enter the number of groups you would like to analyze: ");
480                 cin >> numGroups;
481                 m->mothurOutJustToLog(toString(numGroups)); m->mothurOutEndLine();
482                                 
483                 int num, count;
484                 count = 1;
485                 numEachGroup.resize(numGroups, 0);  
486                 
487                 for (int i = 1; i <= numGroups; i++) {
488                         m->mothurOut("Please enter the number of sequences in group " + toString(i) +  ": ");
489                         cin >> num;
490                         m->mothurOutJustToLog(toString(num)); m->mothurOutEndLine();
491                                 
492                         //set tmaps seqsPerGroup
493                         tmap->seqsPerGroup[toString(i)] = num;
494                         tmap->namesOfGroups.push_back(toString(i));
495                         
496                         //set tmaps namesOfSeqs
497                         for (int j = 0; j < num; j++) {
498                                 tmap->namesOfSeqs.push_back(toString(count));
499                                 tmap->treemap[toString(count)].groupname = toString(i);
500                                 count++;
501                         }
502                 }
503                 
504                 //clears buffer so next command doesn't have error
505                 string s;       
506                 getline(cin, s);
507                 
508                 //save tmap for later
509                 //memory leak prevention
510                 //if (globaldata->gTreemap != NULL) { delete globaldata->gTreemap;  }
511                 globaldata->gTreemap = tmap;
512                 globaldata->Treenames = tmap->namesOfSeqs; 
513                 
514         }
515         catch(exception& e) {
516                 m->errorOut(e, "ParsimonyCommand", "getUserInput");
517                 exit(1);
518         }
519 }
520
521 /***********************************************************/
522
523