]> git.donarmstrong.com Git - mothur.git/blob - parsimonycommand.cpp
added set.dir command and modified commands to redirect input and output, removed...
[mothur.git] / parsimonycommand.cpp
1 /*
2  *  parsimonycommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 1/26/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "parsimonycommand.h"
11
12 /***********************************************************/
13 ParsimonyCommand::ParsimonyCommand(string option) {
14         try {
15                 globaldata = GlobalData::getInstance();
16                 abort = false;
17                 Groups.clear();
18                 
19                 //allow user to run help
20                 if(option == "help") { help(); abort = true; }
21                 
22                 else {
23                         //valid paramters for this command
24                         string Array[] =  {"random","groups","iters","outputdir","inputdir"};
25                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
26                         
27                         OptionParser parser(option);
28                         map<string, string> parameters = parser.getParameters();
29                         
30                         ValidParameters validParameter;
31                 
32                         //check to make sure all parameters are valid for command
33                         for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
34                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
35                         }
36                         
37                         randomtree = validParameter.validFile(parameters, "random", false);             if (randomtree == "not found") { randomtree = ""; }
38                         
39                         //are you trying to use parsimony without reading a tree or saying you want random distribution
40                         if (randomtree == "")  {
41                                 if (globaldata->gTree.size() == 0) {
42                                         mothurOut("You must read a treefile and a groupfile or set the randomtree parameter to the output filename you wish, before you may execute the parsimony command."); mothurOutEndLine(); abort = true;  }
43                         }
44                         
45                         //if the user changes the output directory command factory will send this info to us in the output parameter 
46                         string outputDir = validParameter.validFile(parameters, "outputdir", false);            if (outputDir == "not found"){  outputDir = ""; }
47                         
48                         //check for optional parameter and set defaults
49                         // ...at some point should added some additional type checking...
50                         groups = validParameter.validFile(parameters, "groups", false);                 
51                         if (groups == "not found") { groups = ""; globaldata->Groups.clear(); }
52                         else { 
53                                 splitAtDash(groups, Groups);
54                                 globaldata->Groups = Groups;
55                         }
56                                 
57                         itersString = validParameter.validFile(parameters, "iters", false);                     if (itersString == "not found") { itersString = "1000"; }
58                         convert(itersString, iters); 
59                                                 
60                         if (abort == false) {
61                                 //randomtree will tell us if user had their own treefile or if they just want the random distribution
62                                 //user has entered their own tree
63                                 if (randomtree == "") { 
64                                         T = globaldata->gTree;
65                                         tmap = globaldata->gTreemap;
66                                         
67                                         if(outputDir == "") { outputDir += hasPath(globaldata->getTreeFile()); }
68                                         output = new ColumnFile(outputDir + getSimpleName(globaldata->getTreeFile())  +  ".parsimony", itersString);
69                                         
70                                         sumFile = outputDir + getSimpleName(globaldata->getTreeFile()) + ".psummary";
71                                         openOutputFile(sumFile, outSum);
72                                 }else { //user wants random distribution
73                                         savetmap = globaldata->gTreemap;
74                                         getUserInput();
75                                         
76                                         if(outputDir == "") { outputDir += hasPath(randomtree); }
77                                         output = new ColumnFile(outputDir+ getSimpleName(randomtree), itersString);
78                                 }
79                                 
80                                 //set users groups to analyze
81                                 util = new SharedUtil();
82                                 util->setGroups(globaldata->Groups, tmap->namesOfGroups, allGroups, numGroups, "parsimony");    //sets the groups the user wants to analyze
83                                 util->getCombos(groupComb, globaldata->Groups, numComp);
84                                 
85                                 if (numGroups == 1) { numComp++; groupComb.push_back(allGroups); }
86                                 
87                                 pars = new Parsimony(tmap);
88                                 counter = 0;
89                                 
90                         }
91                         
92                 }
93
94         }
95         catch(exception& e) {
96                 errorOut(e, "ParsimonyCommand", "ParsimonyCommand");
97                 exit(1);
98         }
99 }
100
101 //**********************************************************************************************************************
102
103 void ParsimonyCommand::help(){
104         try {
105                 mothurOut("The parsimony command can only be executed after a successful read.tree command, unless you use the random parameter.\n");
106                 mothurOut("The parsimony command parameters are random, groups and iters.  No parameters are required.\n");
107                 mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed.  You must enter at least 1 valid group.\n");
108                 mothurOut("The group names are separated by dashes.  The iters parameter allows you to specify how many random trees you would like compared to your tree.\n");
109                 mothurOut("The parsimony command should be in the following format: parsimony(random=yourOutputFilename, groups=yourGroups, iters=yourIters).\n");
110                 mothurOut("Example parsimony(random=out, iters=500).\n");
111                 mothurOut("The default value for random is "" (meaning you want to use the trees in your inputfile, randomtree=out means you just want the random distribution of trees outputted to out.rd_parsimony),\n");
112                 mothurOut("and iters is 1000.  The parsimony command output two files: .parsimony and .psummary their descriptions are in the manual.\n");
113                 mothurOut("Note: No spaces between parameter labels (i.e. random), '=' and parameters (i.e.yourOutputFilename).\n\n");
114         }
115         catch(exception& e) {
116                 errorOut(e, "ParsimonyCommand", "help");
117                 exit(1);
118         }
119 }
120
121
122 /***********************************************************/
123 int ParsimonyCommand::execute() {
124         try {
125         
126                 if (abort == true) { return 0; }
127         
128                 Progress* reading;
129                 reading = new Progress("Comparing to random:", iters);
130                 
131                 //get pscore for users tree
132                 userData.resize(numComp,0);  //data = AB, AC, BC, ABC.
133                 randomData.resize(numComp,0);  //data = AB, AC, BC, ABC.
134                 rscoreFreq.resize(numComp);  
135                 uscoreFreq.resize(numComp);  
136                 rCumul.resize(numComp);  
137                 uCumul.resize(numComp);  
138                 userTreeScores.resize(numComp);  
139                 UScoreSig.resize(numComp); 
140                                 
141                 if (randomtree == "") {
142                         //get pscores for users trees
143                         for (int i = 0; i < T.size(); i++) {
144                                 userData = pars->getValues(T[i]);  //data = AB, AC, BC, ABC.
145
146                                 //output scores for each combination
147                                 for(int k = 0; k < numComp; k++) {
148
149                                         //update uscoreFreq
150                                         map<int,double>::iterator it = uscoreFreq[k].find(userData[k]);
151                                         if (it == uscoreFreq[k].end()) {//new score
152                                                 uscoreFreq[k][userData[k]] = 1;
153                                         }else{ uscoreFreq[k][userData[k]]++; }
154                                         
155                                         //add users score to valid scores
156                                         validScores[userData[k]] = userData[k];
157                                         
158                                         //save score for summary file
159                                         userTreeScores[k].push_back(userData[k]);
160                                 }
161                         }
162                         
163                         //get pscores for random trees
164                         for (int j = 0; j < iters; j++) {
165                                 //create new tree with same num nodes and leaves as users
166                                 randT = new Tree();
167
168                                 //create random relationships between nodes
169                                 randT->assembleRandomTree();
170
171                                 //get pscore of random tree
172                                 randomData = pars->getValues(randT);
173                                         
174                                 for(int r = 0; r < numComp; r++) {
175                                         //add trees pscore to map of scores
176                                         map<int,double>::iterator it = rscoreFreq[r].find(randomData[r]);
177                                         if (it != rscoreFreq[r].end()) {//already have that score
178                                                 rscoreFreq[r][randomData[r]]++;
179                                         }else{//first time we have seen this score
180                                                 rscoreFreq[r][randomData[r]] = 1;
181                                         }
182                         
183                                         //add randoms score to validscores
184                                         validScores[randomData[r]] = randomData[r];
185                                 }
186                                 
187                                 //update progress bar
188                                 reading->update(j);
189                                 
190                                 delete randT;
191                         }
192
193                 }else {
194                         //get pscores for random trees
195                         for (int j = 0; j < iters; j++) {
196                                 //create new tree with same num nodes and leaves as users
197                                 randT = new Tree();
198                                 //create random relationships between nodes
199
200                                 randT->assembleRandomTree();
201
202                                 //get pscore of random tree
203                                 randomData = pars->getValues(randT);
204                         
205                                 for(int r = 0; r < numComp; r++) {
206                                         //add trees pscore to map of scores
207                                         map<int,double>::iterator it = rscoreFreq[r].find(randomData[r]);
208                                         if (it != rscoreFreq[r].end()) {//already have that score
209                                                 rscoreFreq[r][randomData[r]]++;
210                                         }else{//first time we have seen this score
211                                                 rscoreFreq[r][randomData[r]] = 1;
212                                         }
213                         
214                                         //add randoms score to validscores
215                                         validScores[randomData[r]] = randomData[r];
216                                 }
217                                 
218                                 //update progress bar
219                                 reading->update(j);
220                                 
221                                 delete randT;
222                         }
223                 }
224
225                 for(int a = 0; a < numComp; a++) {
226                         float rcumul = 0.0000;
227                         float ucumul = 0.0000;
228                         //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print.
229                         for (map<int,double>::iterator it = validScores.begin(); it != validScores.end(); it++) { 
230                                 if (randomtree == "") {
231                                         map<int,double>::iterator it2 = uscoreFreq[a].find(it->first);
232                                         //user data has that score 
233                                         if (it2 != uscoreFreq[a].end()) { uscoreFreq[a][it->first] /= T.size(); ucumul+= it2->second;  }
234                                         else { uscoreFreq[a][it->first] = 0.0000; } //no user trees with that score
235                                         //make uCumul map
236                                         uCumul[a][it->first] = ucumul;
237                                 }
238                         
239                                 //make rscoreFreq map and rCumul
240                                 map<int,double>::iterator it2 = rscoreFreq[a].find(it->first);
241                                 //get percentage of random trees with that info
242                                 if (it2 != rscoreFreq[a].end()) {  rscoreFreq[a][it->first] /= iters; rcumul+= it2->second;  }
243                                 else { rscoreFreq[a][it->first] = 0.0000; } //no random trees with that score
244                                 rCumul[a][it->first] = rcumul;
245                         }
246                         
247                         //find the signifigance of each user trees score when compared to the random trees and save for printing the summary file
248                         for (int h = 0; h < userTreeScores[a].size(); h++) {
249                                 UScoreSig[a].push_back(rCumul[a][userTreeScores[a][h]]);
250                         }
251                 }
252                 
253                 //finish progress bar
254                 reading->finish();
255                 delete reading;
256
257                 
258                 printParsimonyFile();
259                 if (randomtree == "") { printUSummaryFile(); }
260                 
261                 //reset globaldata's treemap if you just did random distrib
262                 if (randomtree != "") {
263                         //memory leak prevention
264                         //if (globaldata->gTreemap != NULL) { delete globaldata->gTreemap;  }
265                         globaldata->gTreemap = savetmap;
266                 }
267                 
268                 //reset groups parameter
269                 globaldata->Groups.clear(); 
270                 
271                 return 0;
272                 
273         }
274         catch(exception& e) {
275                 errorOut(e, "ParsimonyCommand", "execute");
276                 exit(1);
277         }
278 }
279
280 /***********************************************************/
281 void ParsimonyCommand::printParsimonyFile() {
282         try {
283                 vector<double> data;
284                 vector<string> tags;
285                 
286                 if (randomtree == "") {
287                         tags.push_back("Score"); tags.push_back("UserFreq"); tags.push_back("UserCumul"); tags.push_back("RandFreq"); tags.push_back("RandCumul");
288                 }else {
289                         tags.push_back("Score"); tags.push_back("RandFreq"); tags.push_back("RandCumul");
290                 }
291
292                 for(int a = 0; a < numComp; a++) {
293                         output->initFile(groupComb[a], tags);
294                         //print each line
295                         for (map<int,double>::iterator it = validScores.begin(); it != validScores.end(); it++) { 
296                                 if (randomtree == "") {
297                                         data.push_back(it->first);  data.push_back(uscoreFreq[a][it->first]); data.push_back(uCumul[a][it->first]); data.push_back(rscoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]); 
298                                 }else{
299                                         data.push_back(it->first);  data.push_back(rscoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]); 
300                                 }
301                                 output->output(data);
302                                 data.clear();
303                         } 
304                         output->resetFile();
305                 }
306         }
307         catch(exception& e) {
308                 errorOut(e, "ParsimonyCommand", "printParsimonyFile");
309                 exit(1);
310         }
311 }
312 /***********************************************************/
313 void ParsimonyCommand::printUSummaryFile() {
314         try {
315                 //column headers
316                 outSum << "Tree#" << '\t' << "Groups" << '\t'  <<  "ParsScore" << '\t' << "ParsSig" <<  endl;
317                 mothurOut("Tree#\tGroups\tParsScore\tParsSig"); mothurOutEndLine();
318                 
319                 //format output
320                 outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint);
321                 
322                 
323                 //print each line
324                 for (int i = 0; i< T.size(); i++) {
325                         for(int a = 0; a < numComp; a++) {
326                                 if (UScoreSig[a][i] > (1/(float)iters)) {
327                                         outSum << setprecision(6) << i+1 << '\t' << groupComb[a]  << '\t' << userTreeScores[a][i] << setprecision(itersString.length()) << '\t' << UScoreSig[a][i] << endl;
328                                         cout << setprecision(6) << i+1 << '\t' << groupComb[a]  << '\t' << userTreeScores[a][i] << setprecision(itersString.length()) << '\t' << UScoreSig[a][i] << endl;
329                                         mothurOutJustToLog(toString(i+1) + "\t" + groupComb[a] + "\t" + toString(userTreeScores[a][i]) + "\t" + toString(UScoreSig[a][i])); mothurOutEndLine();
330                                 }else {
331                                         outSum << setprecision(6) << i+1 << '\t' << groupComb[a] << '\t' << userTreeScores[a][i] << setprecision(itersString.length())  << '\t' << "<" << (1/float(iters)) << endl;
332                                         cout << setprecision(6) << i+1 << '\t' << groupComb[a] << '\t' << userTreeScores[a][i] << setprecision(itersString.length()) << '\t' << "<" << (1/float(iters)) << endl;
333                                         mothurOutJustToLog(toString(i+1) + "\t" + groupComb[a] + "\t" + toString(userTreeScores[a][i]) + "\t" + toString((1/float(iters)))); mothurOutEndLine();
334                                 }
335                         }
336                 }
337                 
338                 outSum.close();
339         }
340         catch(exception& e) {
341                 errorOut(e, "ParsimonyCommand", "printUSummaryFile");
342                 exit(1);
343         }
344 }
345
346 /***********************************************************/
347 void ParsimonyCommand::getUserInput() {
348         try {
349         
350                 //create treemap
351                 tmap = new TreeMap();
352
353                 mothurOut("Please enter the number of groups you would like to analyze: ");
354                 cin >> numGroups;
355                 mothurOutJustToLog(toString(numGroups)); mothurOutEndLine();
356                                 
357                 int num, count;
358                 count = 1;
359                 numEachGroup.resize(numGroups, 0);  
360                 
361                 for (int i = 1; i <= numGroups; i++) {
362                         mothurOut("Please enter the number of sequences in group " + toString(i) +  ": ");
363                         cin >> num;
364                         mothurOutJustToLog(toString(num)); mothurOutEndLine();
365                                 
366                         //set tmaps seqsPerGroup
367                         tmap->seqsPerGroup[toString(i)] = num;
368                         tmap->namesOfGroups.push_back(toString(i));
369                         
370                         //set tmaps namesOfSeqs
371                         for (int j = 0; j < num; j++) {
372                                 tmap->namesOfSeqs.push_back(toString(count));
373                                 tmap->treemap[toString(count)].groupname = toString(i);
374                                 count++;
375                         }
376                 }
377                 
378                 //clears buffer so next command doesn't have error
379                 string s;       
380                 getline(cin, s);
381                 
382                 //save tmap for later
383                 //memory leak prevention
384                 //if (globaldata->gTreemap != NULL) { delete globaldata->gTreemap;  }
385                 globaldata->gTreemap = tmap;
386                 globaldata->Treenames = tmap->namesOfSeqs; 
387                 
388         }
389         catch(exception& e) {
390                 errorOut(e, "ParsimonyCommand", "getUserInput");
391                 exit(1);
392         }
393 }
394
395 /***********************************************************/
396
397