]> git.donarmstrong.com Git - mothur.git/blob - parsimonycommand.cpp
c0de504f32f379651d15fc95767821ec9abd56c7
[mothur.git] / parsimonycommand.cpp
1 /*
2  *  parsimonycommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 1/26/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "parsimonycommand.h"
11
12 //**********************************************************************************************************************
13 vector<string> ParsimonyCommand::getValidParameters(){  
14         try {
15                 string Array[] =  {"random","groups","iters","processors","outputdir","inputdir"};
16                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
17                 return myArray;
18         }
19         catch(exception& e) {
20                 m->errorOut(e, "ParsimonyCommand", "getValidParameters");
21                 exit(1);
22         }
23 }
24 //**********************************************************************************************************************
25 ParsimonyCommand::ParsimonyCommand(){   
26         try {
27                 abort = true; calledHelp = true; 
28                 vector<string> tempOutNames;
29                 outputTypes["parsimony"] = tempOutNames;
30                 outputTypes["psummary"] = tempOutNames;
31         }
32         catch(exception& e) {
33                 m->errorOut(e, "ParsimonyCommand", "ParsimonyCommand");
34                 exit(1);
35         }
36 }
37 //**********************************************************************************************************************
38 vector<string> ParsimonyCommand::getRequiredParameters(){       
39         try {
40                 vector<string> myArray;
41                 return myArray;
42         }
43         catch(exception& e) {
44                 m->errorOut(e, "ParsimonyCommand", "getRequiredParameters");
45                 exit(1);
46         }
47 }
48 //**********************************************************************************************************************
49 vector<string> ParsimonyCommand::getRequiredFiles(){    
50         try {
51                 vector<string> myArray;
52                 return myArray;
53         }
54         catch(exception& e) {
55                 m->errorOut(e, "ParsimonyCommand", "getRequiredFiles");
56                 exit(1);
57         }
58 }
59 /***********************************************************/
60 ParsimonyCommand::ParsimonyCommand(string option)  {
61         try {
62                 globaldata = GlobalData::getInstance();
63                 abort = false; calledHelp = false;   
64                 Groups.clear();
65                         
66                 //allow user to run help
67                 if(option == "help") { help(); abort = true; calledHelp = true; }
68                 
69                 else {
70                         //valid paramters for this command
71                         string Array[] =  {"random","groups","processors","iters","outputdir","inputdir"};
72                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
73                         
74                         OptionParser parser(option);
75                         map<string, string> parameters = parser.getParameters();
76                         
77                         ValidParameters validParameter;
78                 
79                         //check to make sure all parameters are valid for command
80                         for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
81                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
82                         }
83                         
84                         //initialize outputTypes
85                         vector<string> tempOutNames;
86                         outputTypes["parsimony"] = tempOutNames;
87                         outputTypes["psummary"] = tempOutNames;
88                         
89                         randomtree = validParameter.validFile(parameters, "random", false);             if (randomtree == "not found") { randomtree = ""; }
90                         
91                         //are you trying to use parsimony without reading a tree or saying you want random distribution
92                         if (randomtree == "")  {
93                                 if (globaldata->gTree.size() == 0) {
94                                         m->mothurOut("You must read a treefile and a groupfile or set the randomtree parameter to the output filename you wish, before you may execute the parsimony command."); m->mothurOutEndLine(); abort = true;  }
95                         }
96                         
97                         //if the user changes the output directory command factory will send this info to us in the output parameter 
98                         string outputDir = validParameter.validFile(parameters, "outputdir", false);            if (outputDir == "not found"){  outputDir = ""; if (randomtree == "")  { outputDir += m->hasPath(globaldata->inputFileName); } }
99                         
100                         //check for optional parameter and set defaults
101                         // ...at some point should added some additional type checking...
102                         groups = validParameter.validFile(parameters, "groups", false);                 
103                         if (groups == "not found") { groups = ""; globaldata->Groups.clear(); }
104                         else { 
105                                 m->splitAtDash(groups, Groups);
106                                 globaldata->Groups = Groups;
107                         }
108                                 
109                         itersString = validParameter.validFile(parameters, "iters", false);                     if (itersString == "not found") { itersString = "1000"; }
110                         convert(itersString, iters); 
111                         
112                         string temp = validParameter.validFile(parameters, "processors", false);        if (temp == "not found"){       temp = "1";                             }
113                         convert(temp, processors); 
114                                                 
115                         if (abort == false) {
116                                 //randomtree will tell us if user had their own treefile or if they just want the random distribution
117                                 //user has entered their own tree
118                                 if (randomtree == "") { 
119                                         T = globaldata->gTree;
120                                         tmap = globaldata->gTreemap;
121                                         
122                                         if(outputDir == "") { outputDir += m->hasPath(globaldata->getTreeFile()); }
123                                         output = new ColumnFile(outputDir + m->getSimpleName(globaldata->getTreeFile())  +  ".parsimony", itersString);
124                                         outputNames.push_back(outputDir + m->getSimpleName(globaldata->getTreeFile())  +  ".parsimony");
125                                         outputTypes["parsimony"].push_back(outputDir + m->getSimpleName(globaldata->getTreeFile())  +  ".parsimony");
126                                         
127                                         sumFile = outputDir + m->getSimpleName(globaldata->getTreeFile()) + ".psummary";
128                                         m->openOutputFile(sumFile, outSum);
129                                         outputNames.push_back(sumFile);
130                                         outputTypes["psummary"].push_back(sumFile);
131                                 }else { //user wants random distribution
132                                         savetmap = globaldata->gTreemap;
133                                         getUserInput();
134                                         
135                                         if(outputDir == "") { outputDir += m->hasPath(randomtree); }
136                                         output = new ColumnFile(outputDir+ m->getSimpleName(randomtree), itersString);
137                                         outputNames.push_back(outputDir+ m->getSimpleName(randomtree));
138                                         outputTypes["parsimony"].push_back(outputDir+ m->getSimpleName(randomtree));
139                                 }
140                                 
141                                 //set users groups to analyze
142                                 util = new SharedUtil();
143                                 util->setGroups(globaldata->Groups, tmap->namesOfGroups, allGroups, numGroups, "parsimony");    //sets the groups the user wants to analyze
144                                 util->getCombos(groupComb, globaldata->Groups, numComp);
145                                 
146                                 if (numGroups == 1) { numComp++; groupComb.push_back(allGroups); }
147                                 
148                                 pars = new Parsimony(tmap);
149                                 counter = 0;
150                                 
151                         }
152                         
153                 }
154
155         }
156         catch(exception& e) {
157                 m->errorOut(e, "ParsimonyCommand", "ParsimonyCommand");
158                 exit(1);
159         }
160 }
161
162 //**********************************************************************************************************************
163
164 void ParsimonyCommand::help(){
165         try {
166                 m->mothurOut("The parsimony command can only be executed after a successful read.tree command, unless you use the random parameter.\n");
167                 m->mothurOut("The parsimony command parameters are random, groups, processors and iters.  No parameters are required.\n");
168                 m->mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed.  You must enter at least 1 valid group.\n");
169                 m->mothurOut("The group names are separated by dashes.  The iters parameter allows you to specify how many random trees you would like compared to your tree.\n");
170                 m->mothurOut("The parsimony command should be in the following format: parsimony(random=yourOutputFilename, groups=yourGroups, iters=yourIters).\n");
171                 m->mothurOut("The processors parameter allows you to specify the number of processors to use. The default is 1.\n");
172                 m->mothurOut("Example parsimony(random=out, iters=500).\n");
173                 m->mothurOut("The default value for random is "" (meaning you want to use the trees in your inputfile, randomtree=out means you just want the random distribution of trees outputted to out.rd_parsimony),\n");
174                 m->mothurOut("and iters is 1000.  The parsimony command output two files: .parsimony and .psummary their descriptions are in the manual.\n");
175                 m->mothurOut("Note: No spaces between parameter labels (i.e. random), '=' and parameters (i.e.yourOutputFilename).\n\n");
176         }
177         catch(exception& e) {
178                 m->errorOut(e, "ParsimonyCommand", "help");
179                 exit(1);
180         }
181 }
182
183
184 /***********************************************************/
185 int ParsimonyCommand::execute() {
186         try {
187         
188                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
189         
190                 Progress* reading;
191                 reading = new Progress("Comparing to random:", iters);
192                 
193                 if (m->control_pressed) { 
194                         delete reading; delete pars; delete util; delete output;
195                         if (randomtree == "") {  outSum.close();  }
196                         for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); } outputTypes.clear();
197                         globaldata->Groups.clear();
198                         return 0;
199                 }
200                         
201                 
202                 //get pscore for users tree
203                 userData.resize(numComp,0);  //data = AB, AC, BC, ABC.
204                 randomData.resize(numComp,0);  //data = AB, AC, BC, ABC.
205                 rscoreFreq.resize(numComp);  
206                 uscoreFreq.resize(numComp);  
207                 rCumul.resize(numComp);  
208                 uCumul.resize(numComp);  
209                 userTreeScores.resize(numComp);  
210                 UScoreSig.resize(numComp); 
211                                 
212                 if (randomtree == "") {
213                         //get pscores for users trees
214                         for (int i = 0; i < T.size(); i++) {
215                                 userData = pars->getValues(T[i], processors, outputDir);  //data = AB, AC, BC, ABC.
216                                 
217                                 if (m->control_pressed) { 
218                                         delete reading; delete pars; delete util; delete output;
219                                         if (randomtree == "") {  outSum.close();  }
220                                         for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); } outputTypes.clear();
221                                         globaldata->Groups.clear();
222                                         return 0;
223                                 }
224
225
226                                 //output scores for each combination
227                                 for(int k = 0; k < numComp; k++) {
228
229                                         //update uscoreFreq
230                                         map<int,double>::iterator it = uscoreFreq[k].find(userData[k]);
231                                         if (it == uscoreFreq[k].end()) {//new score
232                                                 uscoreFreq[k][userData[k]] = 1;
233                                         }else{ uscoreFreq[k][userData[k]]++; }
234                                         
235                                         //add users score to valid scores
236                                         validScores[userData[k]] = userData[k];
237                                         
238                                         //save score for summary file
239                                         userTreeScores[k].push_back(userData[k]);
240                                 }
241                         }
242                         
243                         //get pscores for random trees
244                         for (int j = 0; j < iters; j++) {
245                                                                 
246                                 //create new tree with same num nodes and leaves as users
247                                 randT = new Tree();
248
249                                 //create random relationships between nodes
250                                 randT->assembleRandomTree();
251
252                                 //get pscore of random tree
253                                 randomData = pars->getValues(randT, processors, outputDir);
254                                 
255                                 if (m->control_pressed) { 
256                                         delete reading; delete pars; delete util; delete output; delete randT;
257                                         if (randomtree == "") {  outSum.close();  }
258                                         for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); } outputTypes.clear();
259                                         globaldata->Groups.clear();
260                                         return 0;
261                                 }
262                                         
263                                 for(int r = 0; r < numComp; r++) {
264                                         //add trees pscore to map of scores
265                                         map<int,double>::iterator it = rscoreFreq[r].find(randomData[r]);
266                                         if (it != rscoreFreq[r].end()) {//already have that score
267                                                 rscoreFreq[r][randomData[r]]++;
268                                         }else{//first time we have seen this score
269                                                 rscoreFreq[r][randomData[r]] = 1;
270                                         }
271                         
272                                         //add randoms score to validscores
273                                         validScores[randomData[r]] = randomData[r];
274                                 }
275                                 
276                                 //update progress bar
277                                 reading->update(j);
278                                 
279                                 delete randT;
280                         }
281
282                 }else {
283                         //get pscores for random trees
284                         for (int j = 0; j < iters; j++) {
285                                                                 
286                                 //create new tree with same num nodes and leaves as users
287                                 randT = new Tree();
288                                 //create random relationships between nodes
289
290                                 randT->assembleRandomTree();
291                                 
292                                 if (m->control_pressed) { 
293                                         delete reading; delete pars; delete util; delete output; delete randT;
294                                         globaldata->gTreemap = savetmap; 
295                                         for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); } outputTypes.clear();
296                                         globaldata->Groups.clear();
297                                         return 0;
298                                 }
299
300
301                                 //get pscore of random tree
302                                 randomData = pars->getValues(randT, processors, outputDir);
303                                 
304                                 if (m->control_pressed) { 
305                                         delete reading; delete pars; delete util; delete output; delete randT;
306                                         globaldata->gTreemap = savetmap; 
307                                         for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); } outputTypes.clear();
308                                         globaldata->Groups.clear();
309                                         return 0;
310                                 }
311                         
312                                 for(int r = 0; r < numComp; r++) {
313                                         //add trees pscore to map of scores
314                                         map<int,double>::iterator it = rscoreFreq[r].find(randomData[r]);
315                                         if (it != rscoreFreq[r].end()) {//already have that score
316                                                 rscoreFreq[r][randomData[r]]++;
317                                         }else{//first time we have seen this score
318                                                 rscoreFreq[r][randomData[r]] = 1;
319                                         }
320                         
321                                         //add randoms score to validscores
322                                         validScores[randomData[r]] = randomData[r];
323                                 }
324                                 
325                                 //update progress bar
326                                 reading->update(j);
327                                 
328                                 delete randT;
329                         }
330                 }
331
332                 for(int a = 0; a < numComp; a++) {
333                         float rcumul = 0.0000;
334                         float ucumul = 0.0000;
335                         //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print.
336                         for (map<int,double>::iterator it = validScores.begin(); it != validScores.end(); it++) { 
337                                 if (randomtree == "") {
338                                         map<int,double>::iterator it2 = uscoreFreq[a].find(it->first);
339                                         //user data has that score 
340                                         if (it2 != uscoreFreq[a].end()) { uscoreFreq[a][it->first] /= T.size(); ucumul+= it2->second;  }
341                                         else { uscoreFreq[a][it->first] = 0.0000; } //no user trees with that score
342                                         //make uCumul map
343                                         uCumul[a][it->first] = ucumul;
344                                 }
345                         
346                                 //make rscoreFreq map and rCumul
347                                 map<int,double>::iterator it2 = rscoreFreq[a].find(it->first);
348                                 //get percentage of random trees with that info
349                                 if (it2 != rscoreFreq[a].end()) {  rscoreFreq[a][it->first] /= iters; rcumul+= it2->second;  }
350                                 else { rscoreFreq[a][it->first] = 0.0000; } //no random trees with that score
351                                 rCumul[a][it->first] = rcumul;
352                         }
353                         
354                         //find the signifigance of each user trees score when compared to the random trees and save for printing the summary file
355                         for (int h = 0; h < userTreeScores[a].size(); h++) {
356                                 UScoreSig[a].push_back(rCumul[a][userTreeScores[a][h]]);
357                         }
358                 }
359                 
360                 if (m->control_pressed) { 
361                                 delete reading; delete pars; delete util; delete output;
362                                 if (randomtree == "") {  outSum.close();  }
363                                 else { globaldata->gTreemap = savetmap; }
364                                 for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); } outputTypes.clear();
365                                 globaldata->Groups.clear();
366                                 return 0;
367                 }
368                 
369                 //finish progress bar
370                 reading->finish();
371                 delete reading;
372
373                 
374                 printParsimonyFile();
375                 if (randomtree == "") { printUSummaryFile(); }
376                 
377                 //reset globaldata's treemap if you just did random distrib
378                 if (randomtree != "") {
379                         //memory leak prevention
380                         //if (globaldata->gTreemap != NULL) { delete globaldata->gTreemap;  }
381                         globaldata->gTreemap = savetmap;
382                 }
383                 
384                 //reset groups parameter
385                 globaldata->Groups.clear(); 
386                 
387                 if (m->control_pressed) { 
388                         delete pars; delete util; delete output;
389                         for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); } outputTypes.clear();
390                         return 0;
391                 }
392                 
393                 m->mothurOutEndLine();
394                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
395                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
396                 m->mothurOutEndLine();
397
398                 
399                 return 0;
400                 
401         }
402         catch(exception& e) {
403                 m->errorOut(e, "ParsimonyCommand", "execute");
404                 exit(1);
405         }
406 }
407
408 /***********************************************************/
409 void ParsimonyCommand::printParsimonyFile() {
410         try {
411                 vector<double> data;
412                 vector<string> tags;
413                 
414                 if (randomtree == "") {
415                         tags.push_back("Score"); tags.push_back("UserFreq"); tags.push_back("UserCumul"); tags.push_back("RandFreq"); tags.push_back("RandCumul");
416                 }else {
417                         tags.push_back("Score"); tags.push_back("RandFreq"); tags.push_back("RandCumul");
418                 }
419
420                 for(int a = 0; a < numComp; a++) {
421                         output->initFile(groupComb[a], tags);
422                         //print each line
423                         for (map<int,double>::iterator it = validScores.begin(); it != validScores.end(); it++) { 
424                                 if (randomtree == "") {
425                                         data.push_back(it->first);  data.push_back(uscoreFreq[a][it->first]); data.push_back(uCumul[a][it->first]); data.push_back(rscoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]); 
426                                 }else{
427                                         data.push_back(it->first);  data.push_back(rscoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]); 
428                                 }
429                                 output->output(data);
430                                 data.clear();
431                         } 
432                         output->resetFile();
433                 }
434         }
435         catch(exception& e) {
436                 m->errorOut(e, "ParsimonyCommand", "printParsimonyFile");
437                 exit(1);
438         }
439 }
440 /***********************************************************/
441 int ParsimonyCommand::printUSummaryFile() {
442         try {
443                 //column headers
444                 outSum << "Tree#" << '\t' << "Groups" << '\t'  <<  "ParsScore" << '\t' << "ParsSig" <<  endl;
445                 m->mothurOut("Tree#\tGroups\tParsScore\tParsSig"); m->mothurOutEndLine();
446                 
447                 //format output
448                 outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint);
449                 
450                 
451                 //print each line
452                 for (int i = 0; i< T.size(); i++) {
453                         for(int a = 0; a < numComp; a++) {
454                                 if (m->control_pressed) {  outSum.close(); return 0; }
455                                 if (UScoreSig[a][i] > (1/(float)iters)) {
456                                         outSum << setprecision(6) << i+1 << '\t' << groupComb[a]  << '\t' << userTreeScores[a][i] << setprecision(itersString.length()) << '\t' << UScoreSig[a][i] << endl;
457                                         cout << setprecision(6) << i+1 << '\t' << groupComb[a]  << '\t' << userTreeScores[a][i] << setprecision(itersString.length()) << '\t' << UScoreSig[a][i] << endl;
458                                         m->mothurOutJustToLog(toString(i+1) + "\t" + groupComb[a] + "\t" + toString(userTreeScores[a][i]) + "\t" + toString(UScoreSig[a][i])); m->mothurOutEndLine();
459                                 }else {
460                                         outSum << setprecision(6) << i+1 << '\t' << groupComb[a] << '\t' << userTreeScores[a][i] << setprecision(itersString.length())  << '\t' << "<" << (1/float(iters)) << endl;
461                                         cout << setprecision(6) << i+1 << '\t' << groupComb[a] << '\t' << userTreeScores[a][i] << setprecision(itersString.length()) << '\t' << "<" << (1/float(iters)) << endl;
462                                         m->mothurOutJustToLog(toString(i+1) + "\t" + groupComb[a] + "\t" + toString(userTreeScores[a][i]) + "\t" + toString((1/float(iters)))); m->mothurOutEndLine();
463                                 }
464                         }
465                 }
466                 
467                 outSum.close();
468                 return 0;
469         }
470         catch(exception& e) {
471                 m->errorOut(e, "ParsimonyCommand", "printUSummaryFile");
472                 exit(1);
473         }
474 }
475
476 /***********************************************************/
477 void ParsimonyCommand::getUserInput() {
478         try {
479         
480                 //create treemap
481                 tmap = new TreeMap();
482
483                 m->mothurOut("Please enter the number of groups you would like to analyze: ");
484                 cin >> numGroups;
485                 m->mothurOutJustToLog(toString(numGroups)); m->mothurOutEndLine();
486                                 
487                 int num, count;
488                 count = 1;
489                 numEachGroup.resize(numGroups, 0);  
490                 
491                 for (int i = 1; i <= numGroups; i++) {
492                         m->mothurOut("Please enter the number of sequences in group " + toString(i) +  ": ");
493                         cin >> num;
494                         m->mothurOutJustToLog(toString(num)); m->mothurOutEndLine();
495                                 
496                         //set tmaps seqsPerGroup
497                         tmap->seqsPerGroup[toString(i)] = num;
498                         tmap->namesOfGroups.push_back(toString(i));
499                         
500                         //set tmaps namesOfSeqs
501                         for (int j = 0; j < num; j++) {
502                                 tmap->namesOfSeqs.push_back(toString(count));
503                                 tmap->treemap[toString(count)].groupname = toString(i);
504                                 count++;
505                         }
506                 }
507                 
508                 //clears buffer so next command doesn't have error
509                 string s;       
510                 getline(cin, s);
511                 
512                 //save tmap for later
513                 //memory leak prevention
514                 //if (globaldata->gTreemap != NULL) { delete globaldata->gTreemap;  }
515                 globaldata->gTreemap = tmap;
516                 globaldata->Treenames = tmap->namesOfSeqs; 
517                 
518         }
519         catch(exception& e) {
520                 m->errorOut(e, "ParsimonyCommand", "getUserInput");
521                 exit(1);
522         }
523 }
524
525 /***********************************************************/
526
527