]> git.donarmstrong.com Git - mothur.git/blob - summarytaxcommand.cpp
added summary.tax command and fixed bug with root level totals in tax.summary file
[mothur.git] / summarytaxcommand.cpp
1 /*
2  *  summarytaxcommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 9/23/11.
6  *  Copyright 2011 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "summarytaxcommand.h"
11 #include "phylosummary.h"
12
13 //**********************************************************************************************************************
14 vector<string> SummaryTaxCommand::setParameters(){      
15         try {
16                 CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptaxonomy);
17                 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
18                 CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
19                 CommandParameter preftaxonomy("reftaxonomy", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(preftaxonomy);
20                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
21                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
22                 
23                 vector<string> myArray;
24                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
25                 return myArray;
26         }
27         catch(exception& e) {
28                 m->errorOut(e, "SummaryTaxCommand", "setParameters");
29                 exit(1);
30         }
31 }
32 //**********************************************************************************************************************
33 string SummaryTaxCommand::getHelpString(){      
34         try {
35                 string helpString = "";
36                 helpString += "The summary.tax command reads a taxonomy file and an optional name file, and summarizes the taxonomy information.\n";
37                 helpString += "The summary.tax command parameters are taxonomy, group and name. taxonomy is required, unless you have a valid current taxonomy file.\n";
38                 helpString += "The name parameter allows you to enter a name file associated with your taxonomy file. \n";
39                 helpString += "The group parameter allows you add a group file so you can have the summary totals broken up by group.\n";
40                 helpString += "The reftaxonomy parameter allows you give the name of the reference taxonomy file used when you classified your sequences. It is not required, but providing it will keep the rankIDs in the summary file static.\n";
41                 helpString += "The summary.tax command should be in the following format: \n";
42                 helpString += "summary.tax(taxonomy=yourTaxonomyFile) \n";
43                 helpString += "Note: No spaces between parameter labels (i.e. taxonomy), '=' and parameters (i.e.yourTaxonomyFile).\n"; 
44                 return helpString;
45         }
46         catch(exception& e) {
47                 m->errorOut(e, "SummaryTaxCommand", "getHelpString");
48                 exit(1);
49         }
50 }
51
52 //**********************************************************************************************************************
53 SummaryTaxCommand::SummaryTaxCommand(){ 
54         try {
55                 abort = true; calledHelp = true; 
56                 setParameters();
57                 vector<string> tempOutNames;
58                 outputTypes["summary"] = tempOutNames;
59         }
60         catch(exception& e) {
61                 m->errorOut(e, "SummaryTaxCommand", "SummaryTaxCommand");
62                 exit(1);
63         }
64 }
65 //***************************************************************************************************************
66
67 SummaryTaxCommand::SummaryTaxCommand(string option)  {
68         try {
69                 abort = false; calledHelp = false;   
70                 
71                 //allow user to run help
72                 if(option == "help") { help(); abort = true; calledHelp = true; }
73                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
74                 
75                 else {
76                         vector<string> myArray = setParameters();
77                         
78                         OptionParser parser(option);
79                         map<string,string> parameters = parser.getParameters();
80                         
81                         ValidParameters validParameter;
82                         map<string,string>::iterator it;
83                         
84                         //check to make sure all parameters are valid for command
85                         for (it = parameters.begin(); it != parameters.end(); it++) { 
86                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
87                         }
88                         
89                         //if the user changes the input directory command factory will send this info to us in the output parameter 
90                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
91                         if (inputDir == "not found"){   inputDir = "";          }
92                         else {
93                                 string path;
94                                 it = parameters.find("taxonomy");
95                                 //user has given a template file
96                                 if(it != parameters.end()){ 
97                                         path = m->hasPath(it->second);
98                                         //if the user has not given a path then, add inputdir. else leave path alone.
99                                         if (path == "") {       parameters["taxonomy"] = inputDir + it->second;         }
100                                 }
101                                 
102                                 it = parameters.find("name");
103                                 //user has given a template file
104                                 if(it != parameters.end()){ 
105                                         path = m->hasPath(it->second);
106                                         //if the user has not given a path then, add inputdir. else leave path alone.
107                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
108                                 }
109                                 
110                                 it = parameters.find("group");
111                                 //user has given a template file
112                                 if(it != parameters.end()){ 
113                                         path = m->hasPath(it->second);
114                                         //if the user has not given a path then, add inputdir. else leave path alone.
115                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
116                                 }
117                                 
118                                 it = parameters.find("reftaxonomy");
119                                 //user has given a template file
120                                 if(it != parameters.end()){ 
121                                         path = m->hasPath(it->second);
122                                         //if the user has not given a path then, add inputdir. else leave path alone.
123                                         if (path == "") {       parameters["reftaxonomy"] = inputDir + it->second;              }
124                                 }
125                                 
126                         }
127                         
128                         //initialize outputTypes
129                         vector<string> tempOutNames;
130                         outputTypes["summary"] = tempOutNames;
131                         
132                         //check for required parameters
133                         taxfile = validParameter.validFile(parameters, "taxonomy", true);
134                         if (taxfile == "not open") { abort = true; }
135                         else if (taxfile == "not found") {                              
136                                 taxfile = m->getTaxonomyFile(); 
137                                 if (taxfile != "") { m->mothurOut("Using " + taxfile + " as input file for the taxonomy parameter."); m->mothurOutEndLine(); }
138                                 else {  m->mothurOut("You have no current taxonomy file and the taxonomy parameter is required."); m->mothurOutEndLine(); abort = true; }
139                         }else { m->setTaxonomyFile(taxfile); }  
140                         
141                         namefile = validParameter.validFile(parameters, "name", true);
142                         if (namefile == "not open") { namefile = ""; abort = true; }
143                         else if (namefile == "not found") { namefile = "";  }   
144                         else { m->setNameFile(namefile); }
145                         
146                         groupfile = validParameter.validFile(parameters, "group", true);
147                         if (groupfile == "not open") { groupfile = ""; abort = true; }
148                         else if (groupfile == "not found") { groupfile = ""; }
149                         else { m->setGroupFile(groupfile); }
150                         
151                         refTaxonomy = validParameter.validFile(parameters, "reftaxonomy", true);
152                         if (refTaxonomy == "not found") { refTaxonomy = ""; m->mothurOut("reftaxonomy is not required, but if given will keep the rankIDs in the summary file static."); m->mothurOutEndLine(); }
153                         else if (refTaxonomy == "not open") { refTaxonomy = ""; abort = true; }
154                         
155                         //if the user changes the output directory command factory will send this info to us in the output parameter 
156                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
157                                 outputDir = ""; 
158                                 outputDir += m->hasPath(taxfile); //if user entered a file with a path then preserve it 
159                         }
160                 }
161         }
162         catch(exception& e) {
163                 m->errorOut(e, "SummaryTaxCommand", "SummaryTaxCommand");
164                 exit(1);
165         }
166 }
167 //***************************************************************************************************************
168
169 int SummaryTaxCommand::execute(){
170         try{
171                 
172                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
173                 int start = time(NULL);
174                 
175                 PhyloSummary* taxaSum;
176                 if (refTaxonomy != "") {
177                         taxaSum = new PhyloSummary(refTaxonomy, groupfile);
178                 }else {
179                         taxaSum = new PhyloSummary(groupfile);
180                 }
181                 
182                 if (m->control_pressed) { delete taxaSum; return 0; }
183                 
184                 int numSeqs = 0;
185                 if (namefile == "") { numSeqs = taxaSum->summarize(taxfile);  }
186                 else {
187                         map<string, vector<string> > nameMap;
188                         map<string, vector<string> >::iterator itNames;
189                         m->readNames(namefile, nameMap);
190                         
191                         if (m->control_pressed) { delete taxaSum; return 0; }
192                         
193                         ifstream in;
194                         m->openInputFile(taxfile, in);
195                         
196                         //read in users taxonomy file and add sequences to tree
197                         string name, taxon;
198                         
199                         while(!in.eof()){
200                                 in >> name >> taxon; m->gobble(in);
201                                 
202                                 itNames = nameMap.find(name);
203                                 
204                                 if (itNames == nameMap.end()) { 
205                                         m->mothurOut("[ERROR]: " + name + " is not in your name file please correct."); m->mothurOutEndLine(); exit(1);
206                                 }else{
207                                         for (int i = 0; i < itNames->second.size(); i++) { 
208                                                 numSeqs++;
209                                                 taxaSum->addSeqToTree(itNames->second[i], taxon);  //add it as many times as there are identical seqs
210                                         }
211                                         itNames->second.clear();
212                                         nameMap.erase(itNames->first);
213                                 }
214                         }
215                         in.close();
216                 }
217                 
218                 if (m->control_pressed) {  delete taxaSum; return 0; }
219                 
220                 //print summary file
221                 ofstream outTaxTree;
222                 string summaryFile = outputDir + m->getRootName(m->getSimpleName(taxfile)) + "tax.summary";
223                 m->openOutputFile(summaryFile, outTaxTree);
224                 taxaSum->print(outTaxTree);
225                 outTaxTree.close();
226                 
227                 delete taxaSum;
228                 
229                 if (m->control_pressed) {  m->mothurRemove(summaryFile); return 0; }
230                 
231                 m->mothurOutEndLine();
232                 m->mothurOut("It took " + toString(time(NULL) - start) + " secs to create the summary file for " + toString(numSeqs) + " sequences."); m->mothurOutEndLine(); m->mothurOutEndLine();
233                 m->mothurOutEndLine();
234                 m->mothurOut("Output File Name: "); m->mothurOutEndLine();
235                 m->mothurOut(summaryFile); m->mothurOutEndLine();       outputNames.push_back(summaryFile); outputTypes["summary"].push_back(summaryFile);
236                 m->mothurOutEndLine();
237                                         
238                 return 0;
239         }
240         catch(exception& e) {
241                 m->errorOut(e, "SummaryTaxCommand", "execute");
242                 exit(1);
243         }
244 }
245 /**************************************************************************************/
246
247