]> git.donarmstrong.com Git - mothur.git/blob - summarytaxcommand.cpp
fix to summary.tax for 1.28.0
[mothur.git] / summarytaxcommand.cpp
1 /*
2  *  summarytaxcommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 9/23/11.
6  *  Copyright 2011 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "summarytaxcommand.h"
11 #include "phylosummary.h"
12
13 //**********************************************************************************************************************
14 vector<string> SummaryTaxCommand::setParameters(){      
15         try {
16                 CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptaxonomy);
17         CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none",false,false); parameters.push_back(pname);
18         CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none",false,false); parameters.push_back(pcount);
19                 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none",false,false); parameters.push_back(pgroup);
20                 CommandParameter preftaxonomy("reftaxonomy", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(preftaxonomy);
21                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
22                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
23                 
24                 vector<string> myArray;
25                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
26                 return myArray;
27         }
28         catch(exception& e) {
29                 m->errorOut(e, "SummaryTaxCommand", "setParameters");
30                 exit(1);
31         }
32 }
33 //**********************************************************************************************************************
34 string SummaryTaxCommand::getHelpString(){      
35         try {
36                 string helpString = "";
37                 helpString += "The summary.tax command reads a taxonomy file and an optional name file, and summarizes the taxonomy information.\n";
38                 helpString += "The summary.tax command parameters are taxonomy, count, group and name. taxonomy is required, unless you have a valid current taxonomy file.\n";
39                 helpString += "The name parameter allows you to enter a name file associated with your taxonomy file. \n";
40                 helpString += "The group parameter allows you add a group file so you can have the summary totals broken up by group.\n";
41         helpString += "The count parameter allows you add a count file so you can have the summary totals broken up by group.\n";
42                 helpString += "The reftaxonomy parameter allows you give the name of the reference taxonomy file used when you classified your sequences. It is not required, but providing it will keep the rankIDs in the summary file static.\n";
43                 helpString += "The summary.tax command should be in the following format: \n";
44                 helpString += "summary.tax(taxonomy=yourTaxonomyFile) \n";
45                 helpString += "Note: No spaces between parameter labels (i.e. taxonomy), '=' and parameters (i.e.yourTaxonomyFile).\n"; 
46                 return helpString;
47         }
48         catch(exception& e) {
49                 m->errorOut(e, "SummaryTaxCommand", "getHelpString");
50                 exit(1);
51         }
52 }
53 //**********************************************************************************************************************
54 string SummaryTaxCommand::getOutputFileNameTag(string type, string inputName=""){       
55         try {
56         string outputFileName = "";
57                 map<string, vector<string> >::iterator it;
58         
59         //is this a type this command creates
60         it = outputTypes.find(type);
61         if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
62         else {
63             if (type == "summary")            {   outputFileName =  "tax.summary";   }
64             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
65         }
66         return outputFileName;
67         }
68         catch(exception& e) {
69                 m->errorOut(e, "SummaryTaxCommand", "getOutputFileNameTag");
70                 exit(1);
71         }
72 }
73 //**********************************************************************************************************************
74 SummaryTaxCommand::SummaryTaxCommand(){ 
75         try {
76                 abort = true; calledHelp = true; 
77                 setParameters();
78                 vector<string> tempOutNames;
79                 outputTypes["summary"] = tempOutNames;
80         }
81         catch(exception& e) {
82                 m->errorOut(e, "SummaryTaxCommand", "SummaryTaxCommand");
83                 exit(1);
84         }
85 }
86 //***************************************************************************************************************
87
88 SummaryTaxCommand::SummaryTaxCommand(string option)  {
89         try {
90                 abort = false; calledHelp = false;   
91                 
92                 //allow user to run help
93                 if(option == "help") { help(); abort = true; calledHelp = true; }
94                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
95                 
96                 else {
97                         vector<string> myArray = setParameters();
98                         
99                         OptionParser parser(option);
100                         map<string,string> parameters = parser.getParameters();
101                         
102                         ValidParameters validParameter;
103                         map<string,string>::iterator it;
104                         
105                         //check to make sure all parameters are valid for command
106                         for (it = parameters.begin(); it != parameters.end(); it++) { 
107                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
108                         }
109                         
110                         //if the user changes the input directory command factory will send this info to us in the output parameter 
111                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
112                         if (inputDir == "not found"){   inputDir = "";          }
113                         else {
114                                 string path;
115                                 it = parameters.find("taxonomy");
116                                 //user has given a template file
117                                 if(it != parameters.end()){ 
118                                         path = m->hasPath(it->second);
119                                         //if the user has not given a path then, add inputdir. else leave path alone.
120                                         if (path == "") {       parameters["taxonomy"] = inputDir + it->second;         }
121                                 }
122                                 
123                                 it = parameters.find("name");
124                                 //user has given a template file
125                                 if(it != parameters.end()){ 
126                                         path = m->hasPath(it->second);
127                                         //if the user has not given a path then, add inputdir. else leave path alone.
128                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
129                                 }
130                                 
131                                 it = parameters.find("group");
132                                 //user has given a template file
133                                 if(it != parameters.end()){ 
134                                         path = m->hasPath(it->second);
135                                         //if the user has not given a path then, add inputdir. else leave path alone.
136                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
137                                 }
138                                 
139                                 it = parameters.find("reftaxonomy");
140                                 //user has given a template file
141                                 if(it != parameters.end()){ 
142                                         path = m->hasPath(it->second);
143                                         //if the user has not given a path then, add inputdir. else leave path alone.
144                                         if (path == "") {       parameters["reftaxonomy"] = inputDir + it->second;              }
145                                 }
146                                 
147                 it = parameters.find("count");
148                                 //user has given a template file
149                                 if(it != parameters.end()){ 
150                                         path = m->hasPath(it->second);
151                                         //if the user has not given a path then, add inputdir. else leave path alone.
152                                         if (path == "") {       parameters["count"] = inputDir + it->second;            }
153                                 }
154
155                         }
156                         
157                         //initialize outputTypes
158                         vector<string> tempOutNames;
159                         outputTypes["summary"] = tempOutNames;
160                         
161                         //check for required parameters
162                         taxfile = validParameter.validFile(parameters, "taxonomy", true);
163                         if (taxfile == "not open") { abort = true; }
164                         else if (taxfile == "not found") {                              
165                                 taxfile = m->getTaxonomyFile(); 
166                                 if (taxfile != "") { m->mothurOut("Using " + taxfile + " as input file for the taxonomy parameter."); m->mothurOutEndLine(); }
167                                 else {  m->mothurOut("You have no current taxonomy file and the taxonomy parameter is required."); m->mothurOutEndLine(); abort = true; }
168                         }else { m->setTaxonomyFile(taxfile); }  
169                         
170                         namefile = validParameter.validFile(parameters, "name", true);
171                         if (namefile == "not open") { namefile = ""; abort = true; }
172                         else if (namefile == "not found") { namefile = "";  }   
173                         else { m->setNameFile(namefile); }
174                         
175                         groupfile = validParameter.validFile(parameters, "group", true);
176                         if (groupfile == "not open") { groupfile = ""; abort = true; }
177                         else if (groupfile == "not found") { groupfile = ""; }
178                         else { m->setGroupFile(groupfile); }
179             
180             countfile = validParameter.validFile(parameters, "count", true);
181                         if (countfile == "not open") { countfile = ""; abort = true; }
182                         else if (countfile == "not found") { countfile = "";  } 
183                         else { m->setCountTableFile(countfile); }
184             
185             if ((namefile != "") && (countfile != "")) {
186                 m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
187             }
188                         
189             if ((groupfile != "") && (countfile != "")) {
190                 m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
191             }
192             
193                         refTaxonomy = validParameter.validFile(parameters, "reftaxonomy", true);
194                         if (refTaxonomy == "not found") { refTaxonomy = ""; m->mothurOut("reftaxonomy is not required, but if given will keep the rankIDs in the summary file static."); m->mothurOutEndLine(); }
195                         else if (refTaxonomy == "not open") { refTaxonomy = ""; abort = true; }
196                         
197                         //if the user changes the output directory command factory will send this info to us in the output parameter 
198                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
199                                 outputDir = ""; 
200                                 outputDir += m->hasPath(taxfile); //if user entered a file with a path then preserve it 
201                         }
202                         
203             if (countfile == "") {
204                 if (namefile == "") {
205                     vector<string> files; files.push_back(taxfile);
206                     parser.getNameFile(files);
207                 }
208                         }
209                 }
210         }
211         catch(exception& e) {
212                 m->errorOut(e, "SummaryTaxCommand", "SummaryTaxCommand");
213                 exit(1);
214         }
215 }
216 //***************************************************************************************************************
217
218 int SummaryTaxCommand::execute(){
219         try{
220                 
221                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
222                 int start = time(NULL);
223                 
224         GroupMap* groupMap = NULL;
225         CountTable* ct = NULL;
226         if (groupfile != "") {
227             groupMap = new GroupMap(groupfile);
228             groupMap->readMap();
229         }else if (countfile != "") {
230             ct = new CountTable();
231             ct->readTable(countfile);
232         }
233                 
234         PhyloSummary* taxaSum;
235         if (countfile != "") {
236             if (refTaxonomy != "") { taxaSum = new PhyloSummary(refTaxonomy, ct); }
237             else { taxaSum = new PhyloSummary(ct); }
238         }else {
239             if (refTaxonomy != "") { taxaSum = new PhyloSummary(refTaxonomy, groupMap); }
240             else { taxaSum = new PhyloSummary(groupMap); }
241                 }
242         
243                 if (m->control_pressed) { if (groupMap != NULL) { delete groupMap; } if (ct != NULL) { delete ct; } delete taxaSum; return 0; }
244                 
245                 int numSeqs = 0;
246                 if ((namefile == "") || (countfile != "")) { numSeqs = taxaSum->summarize(taxfile);  }
247                 else if (namefile != "") {
248                         map<string, vector<string> > nameMap;
249                         map<string, vector<string> >::iterator itNames;
250                         m->readNames(namefile, nameMap);
251                         
252                         if (m->control_pressed) { if (groupMap != NULL) { delete groupMap; } if (ct != NULL) { delete ct; } delete taxaSum; return 0; }
253                         
254                         ifstream in;
255                         m->openInputFile(taxfile, in);
256                         
257                         //read in users taxonomy file and add sequences to tree
258                         string name, taxon;
259                         
260                         while(!in.eof()){
261                 
262                 if (m->control_pressed) { break; }
263                 
264                                 in >> name >> taxon; m->gobble(in);
265                                 
266                                 itNames = nameMap.find(name);
267                                 
268                                 if (itNames == nameMap.end()) { 
269                                         m->mothurOut("[ERROR]: " + name + " is not in your name file please correct."); m->mothurOutEndLine(); exit(1);
270                                 }else{
271                                         for (int i = 0; i < itNames->second.size(); i++) { 
272                                                 numSeqs++;
273                                                 taxaSum->addSeqToTree(itNames->second[i], taxon);  //add it as many times as there are identical seqs
274                                         }
275                                         itNames->second.clear();
276                                         nameMap.erase(itNames->first);
277                                 }
278                         }
279                         in.close();
280                 }else { numSeqs = taxaSum->summarize(taxfile);  }
281                 
282                 if (m->control_pressed) {  if (groupMap != NULL) { delete groupMap; } if (ct != NULL) { delete ct; } delete taxaSum; return 0; }
283                 
284                 //print summary file
285                 ofstream outTaxTree;
286                 string summaryFile = outputDir + m->getRootName(m->getSimpleName(taxfile)) + getOutputFileNameTag("summary");
287                 m->openOutputFile(summaryFile, outTaxTree);
288                 taxaSum->print(outTaxTree);
289                 outTaxTree.close();
290                 
291                 delete taxaSum;
292         if (groupMap != NULL) { delete groupMap; } if (ct != NULL) { delete ct; }
293                 
294                 if (m->control_pressed) {  m->mothurRemove(summaryFile); return 0; }
295                 
296                 m->mothurOutEndLine();
297                 m->mothurOut("It took " + toString(time(NULL) - start) + " secs to create the summary file for " + toString(numSeqs) + " sequences."); m->mothurOutEndLine(); m->mothurOutEndLine();
298                 m->mothurOutEndLine();
299                 m->mothurOut("Output File Name: "); m->mothurOutEndLine();
300                 m->mothurOut(summaryFile); m->mothurOutEndLine();       outputNames.push_back(summaryFile); outputTypes["summary"].push_back(summaryFile);
301                 m->mothurOutEndLine();
302                                         
303                 return 0;
304         }
305         catch(exception& e) {
306                 m->errorOut(e, "SummaryTaxCommand", "execute");
307                 exit(1);
308         }
309 }
310 /**************************************************************************************/
311
312