]> git.donarmstrong.com Git - mothur.git/blob - parsimonycommand.cpp
moved utilities out of mothur.h and into mothurOut class.
[mothur.git] / parsimonycommand.cpp
1 /*
2  *  parsimonycommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 1/26/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "parsimonycommand.h"
11
12 /***********************************************************/
13 ParsimonyCommand::ParsimonyCommand(string option)  {
14         try {
15                 globaldata = GlobalData::getInstance();
16                 abort = false;
17                 Groups.clear();
18                         
19                 //allow user to run help
20                 if(option == "help") { help(); abort = true; }
21                 
22                 else {
23                         //valid paramters for this command
24                         string Array[] =  {"random","groups","iters","outputdir","inputdir"};
25                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
26                         
27                         OptionParser parser(option);
28                         map<string, string> parameters = parser.getParameters();
29                         
30                         ValidParameters validParameter;
31                 
32                         //check to make sure all parameters are valid for command
33                         for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
34                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
35                         }
36                         
37                         randomtree = validParameter.validFile(parameters, "random", false);             if (randomtree == "not found") { randomtree = ""; }
38                         
39                         //are you trying to use parsimony without reading a tree or saying you want random distribution
40                         if (randomtree == "")  {
41                                 if (globaldata->gTree.size() == 0) {
42                                         m->mothurOut("You must read a treefile and a groupfile or set the randomtree parameter to the output filename you wish, before you may execute the parsimony command."); m->mothurOutEndLine(); abort = true;  }
43                         }
44                         
45                         //if the user changes the output directory command factory will send this info to us in the output parameter 
46                         string outputDir = validParameter.validFile(parameters, "outputdir", false);            if (outputDir == "not found"){  outputDir = ""; }
47                         
48                         //check for optional parameter and set defaults
49                         // ...at some point should added some additional type checking...
50                         groups = validParameter.validFile(parameters, "groups", false);                 
51                         if (groups == "not found") { groups = ""; globaldata->Groups.clear(); }
52                         else { 
53                                 m->splitAtDash(groups, Groups);
54                                 globaldata->Groups = Groups;
55                         }
56                                 
57                         itersString = validParameter.validFile(parameters, "iters", false);                     if (itersString == "not found") { itersString = "1000"; }
58                         convert(itersString, iters); 
59                                                 
60                         if (abort == false) {
61                                 //randomtree will tell us if user had their own treefile or if they just want the random distribution
62                                 //user has entered their own tree
63                                 if (randomtree == "") { 
64                                         T = globaldata->gTree;
65                                         tmap = globaldata->gTreemap;
66                                         
67                                         if(outputDir == "") { outputDir += m->hasPath(globaldata->getTreeFile()); }
68                                         output = new ColumnFile(outputDir + m->getSimpleName(globaldata->getTreeFile())  +  ".parsimony", itersString);
69                                         outputNames.push_back(outputDir + m->getSimpleName(globaldata->getTreeFile())  +  ".parsimony");
70                                         
71                                         sumFile = outputDir + m->getSimpleName(globaldata->getTreeFile()) + ".psummary";
72                                         m->openOutputFile(sumFile, outSum);
73                                         outputNames.push_back(sumFile);
74                                 }else { //user wants random distribution
75                                         savetmap = globaldata->gTreemap;
76                                         getUserInput();
77                                         
78                                         if(outputDir == "") { outputDir += m->hasPath(randomtree); }
79                                         output = new ColumnFile(outputDir+ m->getSimpleName(randomtree), itersString);
80                                         outputNames.push_back(outputDir+ m->getSimpleName(randomtree));
81                                 }
82                                 
83                                 //set users groups to analyze
84                                 util = new SharedUtil();
85                                 util->setGroups(globaldata->Groups, tmap->namesOfGroups, allGroups, numGroups, "parsimony");    //sets the groups the user wants to analyze
86                                 util->getCombos(groupComb, globaldata->Groups, numComp);
87                                 
88                                 if (numGroups == 1) { numComp++; groupComb.push_back(allGroups); }
89                                 
90                                 pars = new Parsimony(tmap);
91                                 counter = 0;
92                                 
93                         }
94                         
95                 }
96
97         }
98         catch(exception& e) {
99                 m->errorOut(e, "ParsimonyCommand", "ParsimonyCommand");
100                 exit(1);
101         }
102 }
103
104 //**********************************************************************************************************************
105
106 void ParsimonyCommand::help(){
107         try {
108                 m->mothurOut("The parsimony command can only be executed after a successful read.tree command, unless you use the random parameter.\n");
109                 m->mothurOut("The parsimony command parameters are random, groups and iters.  No parameters are required.\n");
110                 m->mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed.  You must enter at least 1 valid group.\n");
111                 m->mothurOut("The group names are separated by dashes.  The iters parameter allows you to specify how many random trees you would like compared to your tree.\n");
112                 m->mothurOut("The parsimony command should be in the following format: parsimony(random=yourOutputFilename, groups=yourGroups, iters=yourIters).\n");
113                 m->mothurOut("Example parsimony(random=out, iters=500).\n");
114                 m->mothurOut("The default value for random is "" (meaning you want to use the trees in your inputfile, randomtree=out means you just want the random distribution of trees outputted to out.rd_parsimony),\n");
115                 m->mothurOut("and iters is 1000.  The parsimony command output two files: .parsimony and .psummary their descriptions are in the manual.\n");
116                 m->mothurOut("Note: No spaces between parameter labels (i.e. random), '=' and parameters (i.e.yourOutputFilename).\n\n");
117         }
118         catch(exception& e) {
119                 m->errorOut(e, "ParsimonyCommand", "help");
120                 exit(1);
121         }
122 }
123
124
125 /***********************************************************/
126 int ParsimonyCommand::execute() {
127         try {
128         
129                 if (abort == true) { return 0; }
130         
131                 Progress* reading;
132                 reading = new Progress("Comparing to random:", iters);
133                 
134                 if (m->control_pressed) { 
135                         delete reading; delete pars; delete util; delete output;
136                         if (randomtree == "") {  outSum.close();  }
137                         for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); }
138                         globaldata->Groups.clear();
139                         return 0;
140                 }
141                         
142                 
143                 //get pscore for users tree
144                 userData.resize(numComp,0);  //data = AB, AC, BC, ABC.
145                 randomData.resize(numComp,0);  //data = AB, AC, BC, ABC.
146                 rscoreFreq.resize(numComp);  
147                 uscoreFreq.resize(numComp);  
148                 rCumul.resize(numComp);  
149                 uCumul.resize(numComp);  
150                 userTreeScores.resize(numComp);  
151                 UScoreSig.resize(numComp); 
152                                 
153                 if (randomtree == "") {
154                         //get pscores for users trees
155                         for (int i = 0; i < T.size(); i++) {
156                                 userData = pars->getValues(T[i]);  //data = AB, AC, BC, ABC.
157                                 
158                                 if (m->control_pressed) { 
159                                         delete reading; delete pars; delete util; delete output;
160                                         if (randomtree == "") {  outSum.close();  }
161                                         for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); }
162                                         globaldata->Groups.clear();
163                                         return 0;
164                                 }
165
166
167                                 //output scores for each combination
168                                 for(int k = 0; k < numComp; k++) {
169
170                                         //update uscoreFreq
171                                         map<int,double>::iterator it = uscoreFreq[k].find(userData[k]);
172                                         if (it == uscoreFreq[k].end()) {//new score
173                                                 uscoreFreq[k][userData[k]] = 1;
174                                         }else{ uscoreFreq[k][userData[k]]++; }
175                                         
176                                         //add users score to valid scores
177                                         validScores[userData[k]] = userData[k];
178                                         
179                                         //save score for summary file
180                                         userTreeScores[k].push_back(userData[k]);
181                                 }
182                         }
183                         
184                         //get pscores for random trees
185                         for (int j = 0; j < iters; j++) {
186                                                                 
187                                 //create new tree with same num nodes and leaves as users
188                                 randT = new Tree();
189
190                                 //create random relationships between nodes
191                                 randT->assembleRandomTree();
192
193                                 //get pscore of random tree
194                                 randomData = pars->getValues(randT);
195                                 
196                                 if (m->control_pressed) { 
197                                         delete reading; delete pars; delete util; delete output; delete randT;
198                                         if (randomtree == "") {  outSum.close();  }
199                                         for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); }
200                                         globaldata->Groups.clear();
201                                         return 0;
202                                 }
203                                         
204                                 for(int r = 0; r < numComp; r++) {
205                                         //add trees pscore to map of scores
206                                         map<int,double>::iterator it = rscoreFreq[r].find(randomData[r]);
207                                         if (it != rscoreFreq[r].end()) {//already have that score
208                                                 rscoreFreq[r][randomData[r]]++;
209                                         }else{//first time we have seen this score
210                                                 rscoreFreq[r][randomData[r]] = 1;
211                                         }
212                         
213                                         //add randoms score to validscores
214                                         validScores[randomData[r]] = randomData[r];
215                                 }
216                                 
217                                 //update progress bar
218                                 reading->update(j);
219                                 
220                                 delete randT;
221                         }
222
223                 }else {
224                         //get pscores for random trees
225                         for (int j = 0; j < iters; j++) {
226                                                                 
227                                 //create new tree with same num nodes and leaves as users
228                                 randT = new Tree();
229                                 //create random relationships between nodes
230
231                                 randT->assembleRandomTree();
232                                 
233                                 if (m->control_pressed) { 
234                                         delete reading; delete pars; delete util; delete output; delete randT;
235                                         globaldata->gTreemap = savetmap; 
236                                         for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); }
237                                         globaldata->Groups.clear();
238                                         return 0;
239                                 }
240
241
242                                 //get pscore of random tree
243                                 randomData = pars->getValues(randT);
244                                 
245                                 if (m->control_pressed) { 
246                                         delete reading; delete pars; delete util; delete output; delete randT;
247                                         globaldata->gTreemap = savetmap; 
248                                         for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); }
249                                         globaldata->Groups.clear();
250                                         return 0;
251                                 }
252                         
253                                 for(int r = 0; r < numComp; r++) {
254                                         //add trees pscore to map of scores
255                                         map<int,double>::iterator it = rscoreFreq[r].find(randomData[r]);
256                                         if (it != rscoreFreq[r].end()) {//already have that score
257                                                 rscoreFreq[r][randomData[r]]++;
258                                         }else{//first time we have seen this score
259                                                 rscoreFreq[r][randomData[r]] = 1;
260                                         }
261                         
262                                         //add randoms score to validscores
263                                         validScores[randomData[r]] = randomData[r];
264                                 }
265                                 
266                                 //update progress bar
267                                 reading->update(j);
268                                 
269                                 delete randT;
270                         }
271                 }
272
273                 for(int a = 0; a < numComp; a++) {
274                         float rcumul = 0.0000;
275                         float ucumul = 0.0000;
276                         //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print.
277                         for (map<int,double>::iterator it = validScores.begin(); it != validScores.end(); it++) { 
278                                 if (randomtree == "") {
279                                         map<int,double>::iterator it2 = uscoreFreq[a].find(it->first);
280                                         //user data has that score 
281                                         if (it2 != uscoreFreq[a].end()) { uscoreFreq[a][it->first] /= T.size(); ucumul+= it2->second;  }
282                                         else { uscoreFreq[a][it->first] = 0.0000; } //no user trees with that score
283                                         //make uCumul map
284                                         uCumul[a][it->first] = ucumul;
285                                 }
286                         
287                                 //make rscoreFreq map and rCumul
288                                 map<int,double>::iterator it2 = rscoreFreq[a].find(it->first);
289                                 //get percentage of random trees with that info
290                                 if (it2 != rscoreFreq[a].end()) {  rscoreFreq[a][it->first] /= iters; rcumul+= it2->second;  }
291                                 else { rscoreFreq[a][it->first] = 0.0000; } //no random trees with that score
292                                 rCumul[a][it->first] = rcumul;
293                         }
294                         
295                         //find the signifigance of each user trees score when compared to the random trees and save for printing the summary file
296                         for (int h = 0; h < userTreeScores[a].size(); h++) {
297                                 UScoreSig[a].push_back(rCumul[a][userTreeScores[a][h]]);
298                         }
299                 }
300                 
301                 if (m->control_pressed) { 
302                                 delete reading; delete pars; delete util; delete output;
303                                 if (randomtree == "") {  outSum.close();  }
304                                 else { globaldata->gTreemap = savetmap; }
305                                 for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); }
306                                 globaldata->Groups.clear();
307                                 return 0;
308                 }
309                 
310                 //finish progress bar
311                 reading->finish();
312                 delete reading;
313
314                 
315                 printParsimonyFile();
316                 if (randomtree == "") { printUSummaryFile(); }
317                 
318                 //reset globaldata's treemap if you just did random distrib
319                 if (randomtree != "") {
320                         //memory leak prevention
321                         //if (globaldata->gTreemap != NULL) { delete globaldata->gTreemap;  }
322                         globaldata->gTreemap = savetmap;
323                 }
324                 
325                 //reset groups parameter
326                 globaldata->Groups.clear(); 
327                 
328                 if (m->control_pressed) { 
329                         delete pars; delete util; delete output;
330                         for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); }
331                         return 0;
332                 }
333                 
334                 m->mothurOutEndLine();
335                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
336                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
337                 m->mothurOutEndLine();
338
339                 
340                 return 0;
341                 
342         }
343         catch(exception& e) {
344                 m->errorOut(e, "ParsimonyCommand", "execute");
345                 exit(1);
346         }
347 }
348
349 /***********************************************************/
350 void ParsimonyCommand::printParsimonyFile() {
351         try {
352                 vector<double> data;
353                 vector<string> tags;
354                 
355                 if (randomtree == "") {
356                         tags.push_back("Score"); tags.push_back("UserFreq"); tags.push_back("UserCumul"); tags.push_back("RandFreq"); tags.push_back("RandCumul");
357                 }else {
358                         tags.push_back("Score"); tags.push_back("RandFreq"); tags.push_back("RandCumul");
359                 }
360
361                 for(int a = 0; a < numComp; a++) {
362                         output->initFile(groupComb[a], tags);
363                         //print each line
364                         for (map<int,double>::iterator it = validScores.begin(); it != validScores.end(); it++) { 
365                                 if (randomtree == "") {
366                                         data.push_back(it->first);  data.push_back(uscoreFreq[a][it->first]); data.push_back(uCumul[a][it->first]); data.push_back(rscoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]); 
367                                 }else{
368                                         data.push_back(it->first);  data.push_back(rscoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]); 
369                                 }
370                                 output->output(data);
371                                 data.clear();
372                         } 
373                         output->resetFile();
374                 }
375         }
376         catch(exception& e) {
377                 m->errorOut(e, "ParsimonyCommand", "printParsimonyFile");
378                 exit(1);
379         }
380 }
381 /***********************************************************/
382 int ParsimonyCommand::printUSummaryFile() {
383         try {
384                 //column headers
385                 outSum << "Tree#" << '\t' << "Groups" << '\t'  <<  "ParsScore" << '\t' << "ParsSig" <<  endl;
386                 m->mothurOut("Tree#\tGroups\tParsScore\tParsSig"); m->mothurOutEndLine();
387                 
388                 //format output
389                 outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint);
390                 
391                 
392                 //print each line
393                 for (int i = 0; i< T.size(); i++) {
394                         for(int a = 0; a < numComp; a++) {
395                                 if (m->control_pressed) {  outSum.close(); return 0; }
396                                 if (UScoreSig[a][i] > (1/(float)iters)) {
397                                         outSum << setprecision(6) << i+1 << '\t' << groupComb[a]  << '\t' << userTreeScores[a][i] << setprecision(itersString.length()) << '\t' << UScoreSig[a][i] << endl;
398                                         cout << setprecision(6) << i+1 << '\t' << groupComb[a]  << '\t' << userTreeScores[a][i] << setprecision(itersString.length()) << '\t' << UScoreSig[a][i] << endl;
399                                         m->mothurOutJustToLog(toString(i+1) + "\t" + groupComb[a] + "\t" + toString(userTreeScores[a][i]) + "\t" + toString(UScoreSig[a][i])); m->mothurOutEndLine();
400                                 }else {
401                                         outSum << setprecision(6) << i+1 << '\t' << groupComb[a] << '\t' << userTreeScores[a][i] << setprecision(itersString.length())  << '\t' << "<" << (1/float(iters)) << endl;
402                                         cout << setprecision(6) << i+1 << '\t' << groupComb[a] << '\t' << userTreeScores[a][i] << setprecision(itersString.length()) << '\t' << "<" << (1/float(iters)) << endl;
403                                         m->mothurOutJustToLog(toString(i+1) + "\t" + groupComb[a] + "\t" + toString(userTreeScores[a][i]) + "\t" + toString((1/float(iters)))); m->mothurOutEndLine();
404                                 }
405                         }
406                 }
407                 
408                 outSum.close();
409                 return 0;
410         }
411         catch(exception& e) {
412                 m->errorOut(e, "ParsimonyCommand", "printUSummaryFile");
413                 exit(1);
414         }
415 }
416
417 /***********************************************************/
418 void ParsimonyCommand::getUserInput() {
419         try {
420         
421                 //create treemap
422                 tmap = new TreeMap();
423
424                 m->mothurOut("Please enter the number of groups you would like to analyze: ");
425                 cin >> numGroups;
426                 m->mothurOutJustToLog(toString(numGroups)); m->mothurOutEndLine();
427                                 
428                 int num, count;
429                 count = 1;
430                 numEachGroup.resize(numGroups, 0);  
431                 
432                 for (int i = 1; i <= numGroups; i++) {
433                         m->mothurOut("Please enter the number of sequences in group " + toString(i) +  ": ");
434                         cin >> num;
435                         m->mothurOutJustToLog(toString(num)); m->mothurOutEndLine();
436                                 
437                         //set tmaps seqsPerGroup
438                         tmap->seqsPerGroup[toString(i)] = num;
439                         tmap->namesOfGroups.push_back(toString(i));
440                         
441                         //set tmaps namesOfSeqs
442                         for (int j = 0; j < num; j++) {
443                                 tmap->namesOfSeqs.push_back(toString(count));
444                                 tmap->treemap[toString(count)].groupname = toString(i);
445                                 count++;
446                         }
447                 }
448                 
449                 //clears buffer so next command doesn't have error
450                 string s;       
451                 getline(cin, s);
452                 
453                 //save tmap for later
454                 //memory leak prevention
455                 //if (globaldata->gTreemap != NULL) { delete globaldata->gTreemap;  }
456                 globaldata->gTreemap = tmap;
457                 globaldata->Treenames = tmap->namesOfSeqs; 
458                 
459         }
460         catch(exception& e) {
461                 m->errorOut(e, "ParsimonyCommand", "getUserInput");
462                 exit(1);
463         }
464 }
465
466 /***********************************************************/
467
468