]> git.donarmstrong.com Git - mothur.git/blob - parsimonycommand.cpp
fixed craig nelsons weighted bug and paralellized parsimony
[mothur.git] / parsimonycommand.cpp
1 /*
2  *  parsimonycommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 1/26/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "parsimonycommand.h"
11
12 //**********************************************************************************************************************
13 vector<string> ParsimonyCommand::getValidParameters(){  
14         try {
15                 string Array[] =  {"random","groups","iters","processors","outputdir","inputdir"};
16                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
17                 return myArray;
18         }
19         catch(exception& e) {
20                 m->errorOut(e, "ParsimonyCommand", "getValidParameters");
21                 exit(1);
22         }
23 }
24 //**********************************************************************************************************************
25 ParsimonyCommand::ParsimonyCommand(){   
26         try {
27                 abort = true;
28                 //initialize outputTypes
29                 vector<string> tempOutNames;
30                 outputTypes["parsimony"] = tempOutNames;
31                 outputTypes["psummary"] = tempOutNames;
32         }
33         catch(exception& e) {
34                 m->errorOut(e, "ParsimonyCommand", "ParsimonyCommand");
35                 exit(1);
36         }
37 }
38 //**********************************************************************************************************************
39 vector<string> ParsimonyCommand::getRequiredParameters(){       
40         try {
41                 vector<string> myArray;
42                 return myArray;
43         }
44         catch(exception& e) {
45                 m->errorOut(e, "ParsimonyCommand", "getRequiredParameters");
46                 exit(1);
47         }
48 }
49 //**********************************************************************************************************************
50 vector<string> ParsimonyCommand::getRequiredFiles(){    
51         try {
52                 vector<string> myArray;
53                 return myArray;
54         }
55         catch(exception& e) {
56                 m->errorOut(e, "ParsimonyCommand", "getRequiredFiles");
57                 exit(1);
58         }
59 }
60 /***********************************************************/
61 ParsimonyCommand::ParsimonyCommand(string option)  {
62         try {
63                 globaldata = GlobalData::getInstance();
64                 abort = false;
65                 Groups.clear();
66                         
67                 //allow user to run help
68                 if(option == "help") { help(); abort = true; }
69                 
70                 else {
71                         //valid paramters for this command
72                         string Array[] =  {"random","groups","processors","iters","outputdir","inputdir"};
73                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
74                         
75                         OptionParser parser(option);
76                         map<string, string> parameters = parser.getParameters();
77                         
78                         ValidParameters validParameter;
79                 
80                         //check to make sure all parameters are valid for command
81                         for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
82                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
83                         }
84                         
85                         //initialize outputTypes
86                         vector<string> tempOutNames;
87                         outputTypes["parsimony"] = tempOutNames;
88                         outputTypes["psummary"] = tempOutNames;
89                         
90                         randomtree = validParameter.validFile(parameters, "random", false);             if (randomtree == "not found") { randomtree = ""; }
91                         
92                         //are you trying to use parsimony without reading a tree or saying you want random distribution
93                         if (randomtree == "")  {
94                                 if (globaldata->gTree.size() == 0) {
95                                         m->mothurOut("You must read a treefile and a groupfile or set the randomtree parameter to the output filename you wish, before you may execute the parsimony command."); m->mothurOutEndLine(); abort = true;  }
96                         }
97                         
98                         //if the user changes the output directory command factory will send this info to us in the output parameter 
99                         string outputDir = validParameter.validFile(parameters, "outputdir", false);            if (outputDir == "not found"){  outputDir = ""; if (randomtree == "")  { outputDir += m->hasPath(globaldata->inputFileName); } }
100                         
101                         //check for optional parameter and set defaults
102                         // ...at some point should added some additional type checking...
103                         groups = validParameter.validFile(parameters, "groups", false);                 
104                         if (groups == "not found") { groups = ""; globaldata->Groups.clear(); }
105                         else { 
106                                 m->splitAtDash(groups, Groups);
107                                 globaldata->Groups = Groups;
108                         }
109                                 
110                         itersString = validParameter.validFile(parameters, "iters", false);                     if (itersString == "not found") { itersString = "1000"; }
111                         convert(itersString, iters); 
112                         
113                         string temp = validParameter.validFile(parameters, "processors", false);        if (temp == "not found"){       temp = "1";                             }
114                         convert(temp, processors); 
115                                                 
116                         if (abort == false) {
117                                 //randomtree will tell us if user had their own treefile or if they just want the random distribution
118                                 //user has entered their own tree
119                                 if (randomtree == "") { 
120                                         T = globaldata->gTree;
121                                         tmap = globaldata->gTreemap;
122                                         
123                                         if(outputDir == "") { outputDir += m->hasPath(globaldata->getTreeFile()); }
124                                         output = new ColumnFile(outputDir + m->getSimpleName(globaldata->getTreeFile())  +  ".parsimony", itersString);
125                                         outputNames.push_back(outputDir + m->getSimpleName(globaldata->getTreeFile())  +  ".parsimony");
126                                         outputTypes["parsimony"].push_back(outputDir + m->getSimpleName(globaldata->getTreeFile())  +  ".parsimony");
127                                         
128                                         sumFile = outputDir + m->getSimpleName(globaldata->getTreeFile()) + ".psummary";
129                                         m->openOutputFile(sumFile, outSum);
130                                         outputNames.push_back(sumFile);
131                                         outputTypes["psummary"].push_back(sumFile);
132                                 }else { //user wants random distribution
133                                         savetmap = globaldata->gTreemap;
134                                         getUserInput();
135                                         
136                                         if(outputDir == "") { outputDir += m->hasPath(randomtree); }
137                                         output = new ColumnFile(outputDir+ m->getSimpleName(randomtree), itersString);
138                                         outputNames.push_back(outputDir+ m->getSimpleName(randomtree));
139                                         outputTypes["parsimony"].push_back(outputDir+ m->getSimpleName(randomtree));
140                                 }
141                                 
142                                 //set users groups to analyze
143                                 util = new SharedUtil();
144                                 util->setGroups(globaldata->Groups, tmap->namesOfGroups, allGroups, numGroups, "parsimony");    //sets the groups the user wants to analyze
145                                 util->getCombos(groupComb, globaldata->Groups, numComp);
146                                 
147                                 if (numGroups == 1) { numComp++; groupComb.push_back(allGroups); }
148                                 
149                                 pars = new Parsimony(tmap);
150                                 counter = 0;
151                                 
152                         }
153                         
154                 }
155
156         }
157         catch(exception& e) {
158                 m->errorOut(e, "ParsimonyCommand", "ParsimonyCommand");
159                 exit(1);
160         }
161 }
162
163 //**********************************************************************************************************************
164
165 void ParsimonyCommand::help(){
166         try {
167                 m->mothurOut("The parsimony command can only be executed after a successful read.tree command, unless you use the random parameter.\n");
168                 m->mothurOut("The parsimony command parameters are random, groups, processors and iters.  No parameters are required.\n");
169                 m->mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed.  You must enter at least 1 valid group.\n");
170                 m->mothurOut("The group names are separated by dashes.  The iters parameter allows you to specify how many random trees you would like compared to your tree.\n");
171                 m->mothurOut("The parsimony command should be in the following format: parsimony(random=yourOutputFilename, groups=yourGroups, iters=yourIters).\n");
172                 m->mothurOut("The processors parameter allows you to specify the number of processors to use. The default is 1.\n");
173                 m->mothurOut("Example parsimony(random=out, iters=500).\n");
174                 m->mothurOut("The default value for random is "" (meaning you want to use the trees in your inputfile, randomtree=out means you just want the random distribution of trees outputted to out.rd_parsimony),\n");
175                 m->mothurOut("and iters is 1000.  The parsimony command output two files: .parsimony and .psummary their descriptions are in the manual.\n");
176                 m->mothurOut("Note: No spaces between parameter labels (i.e. random), '=' and parameters (i.e.yourOutputFilename).\n\n");
177         }
178         catch(exception& e) {
179                 m->errorOut(e, "ParsimonyCommand", "help");
180                 exit(1);
181         }
182 }
183
184
185 /***********************************************************/
186 int ParsimonyCommand::execute() {
187         try {
188         
189                 if (abort == true) { return 0; }
190         
191                 Progress* reading;
192                 reading = new Progress("Comparing to random:", iters);
193                 
194                 if (m->control_pressed) { 
195                         delete reading; delete pars; delete util; delete output;
196                         if (randomtree == "") {  outSum.close();  }
197                         for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); } outputTypes.clear();
198                         globaldata->Groups.clear();
199                         return 0;
200                 }
201                         
202                 
203                 //get pscore for users tree
204                 userData.resize(numComp,0);  //data = AB, AC, BC, ABC.
205                 randomData.resize(numComp,0);  //data = AB, AC, BC, ABC.
206                 rscoreFreq.resize(numComp);  
207                 uscoreFreq.resize(numComp);  
208                 rCumul.resize(numComp);  
209                 uCumul.resize(numComp);  
210                 userTreeScores.resize(numComp);  
211                 UScoreSig.resize(numComp); 
212                                 
213                 if (randomtree == "") {
214                         //get pscores for users trees
215                         for (int i = 0; i < T.size(); i++) {
216                                 userData = pars->getValues(T[i], processors, outputDir);  //data = AB, AC, BC, ABC.
217                                 
218                                 if (m->control_pressed) { 
219                                         delete reading; delete pars; delete util; delete output;
220                                         if (randomtree == "") {  outSum.close();  }
221                                         for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); } outputTypes.clear();
222                                         globaldata->Groups.clear();
223                                         return 0;
224                                 }
225
226
227                                 //output scores for each combination
228                                 for(int k = 0; k < numComp; k++) {
229
230                                         //update uscoreFreq
231                                         map<int,double>::iterator it = uscoreFreq[k].find(userData[k]);
232                                         if (it == uscoreFreq[k].end()) {//new score
233                                                 uscoreFreq[k][userData[k]] = 1;
234                                         }else{ uscoreFreq[k][userData[k]]++; }
235                                         
236                                         //add users score to valid scores
237                                         validScores[userData[k]] = userData[k];
238                                         
239                                         //save score for summary file
240                                         userTreeScores[k].push_back(userData[k]);
241                                 }
242                         }
243                         
244                         //get pscores for random trees
245                         for (int j = 0; j < iters; j++) {
246                                                                 
247                                 //create new tree with same num nodes and leaves as users
248                                 randT = new Tree();
249
250                                 //create random relationships between nodes
251                                 randT->assembleRandomTree();
252
253                                 //get pscore of random tree
254                                 randomData = pars->getValues(randT, processors, outputDir);
255                                 
256                                 if (m->control_pressed) { 
257                                         delete reading; delete pars; delete util; delete output; delete randT;
258                                         if (randomtree == "") {  outSum.close();  }
259                                         for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); } outputTypes.clear();
260                                         globaldata->Groups.clear();
261                                         return 0;
262                                 }
263                                         
264                                 for(int r = 0; r < numComp; r++) {
265                                         //add trees pscore to map of scores
266                                         map<int,double>::iterator it = rscoreFreq[r].find(randomData[r]);
267                                         if (it != rscoreFreq[r].end()) {//already have that score
268                                                 rscoreFreq[r][randomData[r]]++;
269                                         }else{//first time we have seen this score
270                                                 rscoreFreq[r][randomData[r]] = 1;
271                                         }
272                         
273                                         //add randoms score to validscores
274                                         validScores[randomData[r]] = randomData[r];
275                                 }
276                                 
277                                 //update progress bar
278                                 reading->update(j);
279                                 
280                                 delete randT;
281                         }
282
283                 }else {
284                         //get pscores for random trees
285                         for (int j = 0; j < iters; j++) {
286                                                                 
287                                 //create new tree with same num nodes and leaves as users
288                                 randT = new Tree();
289                                 //create random relationships between nodes
290
291                                 randT->assembleRandomTree();
292                                 
293                                 if (m->control_pressed) { 
294                                         delete reading; delete pars; delete util; delete output; delete randT;
295                                         globaldata->gTreemap = savetmap; 
296                                         for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); } outputTypes.clear();
297                                         globaldata->Groups.clear();
298                                         return 0;
299                                 }
300
301
302                                 //get pscore of random tree
303                                 randomData = pars->getValues(randT, processors, outputDir);
304                                 
305                                 if (m->control_pressed) { 
306                                         delete reading; delete pars; delete util; delete output; delete randT;
307                                         globaldata->gTreemap = savetmap; 
308                                         for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); } outputTypes.clear();
309                                         globaldata->Groups.clear();
310                                         return 0;
311                                 }
312                         
313                                 for(int r = 0; r < numComp; r++) {
314                                         //add trees pscore to map of scores
315                                         map<int,double>::iterator it = rscoreFreq[r].find(randomData[r]);
316                                         if (it != rscoreFreq[r].end()) {//already have that score
317                                                 rscoreFreq[r][randomData[r]]++;
318                                         }else{//first time we have seen this score
319                                                 rscoreFreq[r][randomData[r]] = 1;
320                                         }
321                         
322                                         //add randoms score to validscores
323                                         validScores[randomData[r]] = randomData[r];
324                                 }
325                                 
326                                 //update progress bar
327                                 reading->update(j);
328                                 
329                                 delete randT;
330                         }
331                 }
332
333                 for(int a = 0; a < numComp; a++) {
334                         float rcumul = 0.0000;
335                         float ucumul = 0.0000;
336                         //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print.
337                         for (map<int,double>::iterator it = validScores.begin(); it != validScores.end(); it++) { 
338                                 if (randomtree == "") {
339                                         map<int,double>::iterator it2 = uscoreFreq[a].find(it->first);
340                                         //user data has that score 
341                                         if (it2 != uscoreFreq[a].end()) { uscoreFreq[a][it->first] /= T.size(); ucumul+= it2->second;  }
342                                         else { uscoreFreq[a][it->first] = 0.0000; } //no user trees with that score
343                                         //make uCumul map
344                                         uCumul[a][it->first] = ucumul;
345                                 }
346                         
347                                 //make rscoreFreq map and rCumul
348                                 map<int,double>::iterator it2 = rscoreFreq[a].find(it->first);
349                                 //get percentage of random trees with that info
350                                 if (it2 != rscoreFreq[a].end()) {  rscoreFreq[a][it->first] /= iters; rcumul+= it2->second;  }
351                                 else { rscoreFreq[a][it->first] = 0.0000; } //no random trees with that score
352                                 rCumul[a][it->first] = rcumul;
353                         }
354                         
355                         //find the signifigance of each user trees score when compared to the random trees and save for printing the summary file
356                         for (int h = 0; h < userTreeScores[a].size(); h++) {
357                                 UScoreSig[a].push_back(rCumul[a][userTreeScores[a][h]]);
358                         }
359                 }
360                 
361                 if (m->control_pressed) { 
362                                 delete reading; delete pars; delete util; delete output;
363                                 if (randomtree == "") {  outSum.close();  }
364                                 else { globaldata->gTreemap = savetmap; }
365                                 for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); } outputTypes.clear();
366                                 globaldata->Groups.clear();
367                                 return 0;
368                 }
369                 
370                 //finish progress bar
371                 reading->finish();
372                 delete reading;
373
374                 
375                 printParsimonyFile();
376                 if (randomtree == "") { printUSummaryFile(); }
377                 
378                 //reset globaldata's treemap if you just did random distrib
379                 if (randomtree != "") {
380                         //memory leak prevention
381                         //if (globaldata->gTreemap != NULL) { delete globaldata->gTreemap;  }
382                         globaldata->gTreemap = savetmap;
383                 }
384                 
385                 //reset groups parameter
386                 globaldata->Groups.clear(); 
387                 
388                 if (m->control_pressed) { 
389                         delete pars; delete util; delete output;
390                         for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); } outputTypes.clear();
391                         return 0;
392                 }
393                 
394                 m->mothurOutEndLine();
395                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
396                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
397                 m->mothurOutEndLine();
398
399                 
400                 return 0;
401                 
402         }
403         catch(exception& e) {
404                 m->errorOut(e, "ParsimonyCommand", "execute");
405                 exit(1);
406         }
407 }
408
409 /***********************************************************/
410 void ParsimonyCommand::printParsimonyFile() {
411         try {
412                 vector<double> data;
413                 vector<string> tags;
414                 
415                 if (randomtree == "") {
416                         tags.push_back("Score"); tags.push_back("UserFreq"); tags.push_back("UserCumul"); tags.push_back("RandFreq"); tags.push_back("RandCumul");
417                 }else {
418                         tags.push_back("Score"); tags.push_back("RandFreq"); tags.push_back("RandCumul");
419                 }
420
421                 for(int a = 0; a < numComp; a++) {
422                         output->initFile(groupComb[a], tags);
423                         //print each line
424                         for (map<int,double>::iterator it = validScores.begin(); it != validScores.end(); it++) { 
425                                 if (randomtree == "") {
426                                         data.push_back(it->first);  data.push_back(uscoreFreq[a][it->first]); data.push_back(uCumul[a][it->first]); data.push_back(rscoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]); 
427                                 }else{
428                                         data.push_back(it->first);  data.push_back(rscoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]); 
429                                 }
430                                 output->output(data);
431                                 data.clear();
432                         } 
433                         output->resetFile();
434                 }
435         }
436         catch(exception& e) {
437                 m->errorOut(e, "ParsimonyCommand", "printParsimonyFile");
438                 exit(1);
439         }
440 }
441 /***********************************************************/
442 int ParsimonyCommand::printUSummaryFile() {
443         try {
444                 //column headers
445                 outSum << "Tree#" << '\t' << "Groups" << '\t'  <<  "ParsScore" << '\t' << "ParsSig" <<  endl;
446                 m->mothurOut("Tree#\tGroups\tParsScore\tParsSig"); m->mothurOutEndLine();
447                 
448                 //format output
449                 outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint);
450                 
451                 
452                 //print each line
453                 for (int i = 0; i< T.size(); i++) {
454                         for(int a = 0; a < numComp; a++) {
455                                 if (m->control_pressed) {  outSum.close(); return 0; }
456                                 if (UScoreSig[a][i] > (1/(float)iters)) {
457                                         outSum << setprecision(6) << i+1 << '\t' << groupComb[a]  << '\t' << userTreeScores[a][i] << setprecision(itersString.length()) << '\t' << UScoreSig[a][i] << endl;
458                                         cout << setprecision(6) << i+1 << '\t' << groupComb[a]  << '\t' << userTreeScores[a][i] << setprecision(itersString.length()) << '\t' << UScoreSig[a][i] << endl;
459                                         m->mothurOutJustToLog(toString(i+1) + "\t" + groupComb[a] + "\t" + toString(userTreeScores[a][i]) + "\t" + toString(UScoreSig[a][i])); m->mothurOutEndLine();
460                                 }else {
461                                         outSum << setprecision(6) << i+1 << '\t' << groupComb[a] << '\t' << userTreeScores[a][i] << setprecision(itersString.length())  << '\t' << "<" << (1/float(iters)) << endl;
462                                         cout << setprecision(6) << i+1 << '\t' << groupComb[a] << '\t' << userTreeScores[a][i] << setprecision(itersString.length()) << '\t' << "<" << (1/float(iters)) << endl;
463                                         m->mothurOutJustToLog(toString(i+1) + "\t" + groupComb[a] + "\t" + toString(userTreeScores[a][i]) + "\t" + toString((1/float(iters)))); m->mothurOutEndLine();
464                                 }
465                         }
466                 }
467                 
468                 outSum.close();
469                 return 0;
470         }
471         catch(exception& e) {
472                 m->errorOut(e, "ParsimonyCommand", "printUSummaryFile");
473                 exit(1);
474         }
475 }
476
477 /***********************************************************/
478 void ParsimonyCommand::getUserInput() {
479         try {
480         
481                 //create treemap
482                 tmap = new TreeMap();
483
484                 m->mothurOut("Please enter the number of groups you would like to analyze: ");
485                 cin >> numGroups;
486                 m->mothurOutJustToLog(toString(numGroups)); m->mothurOutEndLine();
487                                 
488                 int num, count;
489                 count = 1;
490                 numEachGroup.resize(numGroups, 0);  
491                 
492                 for (int i = 1; i <= numGroups; i++) {
493                         m->mothurOut("Please enter the number of sequences in group " + toString(i) +  ": ");
494                         cin >> num;
495                         m->mothurOutJustToLog(toString(num)); m->mothurOutEndLine();
496                                 
497                         //set tmaps seqsPerGroup
498                         tmap->seqsPerGroup[toString(i)] = num;
499                         tmap->namesOfGroups.push_back(toString(i));
500                         
501                         //set tmaps namesOfSeqs
502                         for (int j = 0; j < num; j++) {
503                                 tmap->namesOfSeqs.push_back(toString(count));
504                                 tmap->treemap[toString(count)].groupname = toString(i);
505                                 count++;
506                         }
507                 }
508                 
509                 //clears buffer so next command doesn't have error
510                 string s;       
511                 getline(cin, s);
512                 
513                 //save tmap for later
514                 //memory leak prevention
515                 //if (globaldata->gTreemap != NULL) { delete globaldata->gTreemap;  }
516                 globaldata->gTreemap = tmap;
517                 globaldata->Treenames = tmap->namesOfSeqs; 
518                 
519         }
520         catch(exception& e) {
521                 m->errorOut(e, "ParsimonyCommand", "getUserInput");
522                 exit(1);
523         }
524 }
525
526 /***********************************************************/
527
528