]> git.donarmstrong.com Git - mothur.git/blob - summarytaxcommand.cpp
fixes while testing 1.33.0
[mothur.git] / summarytaxcommand.cpp
1 /*
2  *  summarytaxcommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 9/23/11.
6  *  Copyright 2011 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "summarytaxcommand.h"
11 #include "phylosummary.h"
12
13 //**********************************************************************************************************************
14 vector<string> SummaryTaxCommand::setParameters(){      
15         try {
16                 CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "none", "none","summary",false,true,true); parameters.push_back(ptaxonomy);
17         CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none","",false,false,true); parameters.push_back(pname);
18         CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none","",false,false,true); parameters.push_back(pcount);
19                 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","",false,false,true); parameters.push_back(pgroup);
20                 CommandParameter preftaxonomy("reftaxonomy", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(preftaxonomy);
21         CommandParameter prelabund("relabund", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(prelabund);
22
23                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
24                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
25                 
26                 vector<string> myArray;
27                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
28                 return myArray;
29         }
30         catch(exception& e) {
31                 m->errorOut(e, "SummaryTaxCommand", "setParameters");
32                 exit(1);
33         }
34 }
35 //**********************************************************************************************************************
36 string SummaryTaxCommand::getHelpString(){      
37         try {
38                 string helpString = "";
39                 helpString += "The summary.tax command reads a taxonomy file and an optional name file, and summarizes the taxonomy information.\n";
40                 helpString += "The summary.tax command parameters are taxonomy, count, group, name and relabund. taxonomy is required, unless you have a valid current taxonomy file.\n";
41                 helpString += "The name parameter allows you to enter a name file associated with your taxonomy file. \n";
42                 helpString += "The group parameter allows you add a group file so you can have the summary totals broken up by group.\n";
43         helpString += "The count parameter allows you add a count file so you can have the summary totals broken up by group.\n";
44                 helpString += "The reftaxonomy parameter allows you give the name of the reference taxonomy file used when you classified your sequences. It is not required, but providing it will keep the rankIDs in the summary file static.\n";
45         helpString += "The relabund parameter allows you to indicate you want the summary file values to be relative abundances rather than raw abundances. Default=F. \n";
46                 helpString += "The summary.tax command should be in the following format: \n";
47                 helpString += "summary.tax(taxonomy=yourTaxonomyFile) \n";
48                 helpString += "Note: No spaces between parameter labels (i.e. taxonomy), '=' and parameters (i.e.yourTaxonomyFile).\n"; 
49                 return helpString;
50         }
51         catch(exception& e) {
52                 m->errorOut(e, "SummaryTaxCommand", "getHelpString");
53                 exit(1);
54         }
55 }
56 //**********************************************************************************************************************
57 string SummaryTaxCommand::getOutputPattern(string type) {
58     try {
59         string pattern = "";
60         
61         if (type == "summary") {  pattern = "[filename],tax.summary"; } 
62         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
63         
64         return pattern;
65     }
66     catch(exception& e) {
67         m->errorOut(e, "SummaryTaxCommand", "getOutputPattern");
68         exit(1);
69     }
70 }
71 //**********************************************************************************************************************
72 SummaryTaxCommand::SummaryTaxCommand(){ 
73         try {
74                 abort = true; calledHelp = true; 
75                 setParameters();
76                 vector<string> tempOutNames;
77                 outputTypes["summary"] = tempOutNames;
78         }
79         catch(exception& e) {
80                 m->errorOut(e, "SummaryTaxCommand", "SummaryTaxCommand");
81                 exit(1);
82         }
83 }
84 //***************************************************************************************************************
85
86 SummaryTaxCommand::SummaryTaxCommand(string option)  {
87         try {
88                 abort = false; calledHelp = false;   
89                 
90                 //allow user to run help
91                 if(option == "help") { help(); abort = true; calledHelp = true; }
92                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
93                 
94                 else {
95                         vector<string> myArray = setParameters();
96                         
97                         OptionParser parser(option);
98                         map<string,string> parameters = parser.getParameters();
99                         
100                         ValidParameters validParameter;
101                         map<string,string>::iterator it;
102                         
103                         //check to make sure all parameters are valid for command
104                         for (it = parameters.begin(); it != parameters.end(); it++) { 
105                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
106                         }
107                         
108                         //if the user changes the input directory command factory will send this info to us in the output parameter 
109                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
110                         if (inputDir == "not found"){   inputDir = "";          }
111                         else {
112                                 string path;
113                                 it = parameters.find("taxonomy");
114                                 //user has given a template file
115                                 if(it != parameters.end()){ 
116                                         path = m->hasPath(it->second);
117                                         //if the user has not given a path then, add inputdir. else leave path alone.
118                                         if (path == "") {       parameters["taxonomy"] = inputDir + it->second;         }
119                                 }
120                                 
121                                 it = parameters.find("name");
122                                 //user has given a template file
123                                 if(it != parameters.end()){ 
124                                         path = m->hasPath(it->second);
125                                         //if the user has not given a path then, add inputdir. else leave path alone.
126                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
127                                 }
128                                 
129                                 it = parameters.find("group");
130                                 //user has given a template file
131                                 if(it != parameters.end()){ 
132                                         path = m->hasPath(it->second);
133                                         //if the user has not given a path then, add inputdir. else leave path alone.
134                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
135                                 }
136                                 
137                                 it = parameters.find("reftaxonomy");
138                                 //user has given a template file
139                                 if(it != parameters.end()){ 
140                                         path = m->hasPath(it->second);
141                                         //if the user has not given a path then, add inputdir. else leave path alone.
142                                         if (path == "") {       parameters["reftaxonomy"] = inputDir + it->second;              }
143                                 }
144                                 
145                 it = parameters.find("count");
146                                 //user has given a template file
147                                 if(it != parameters.end()){ 
148                                         path = m->hasPath(it->second);
149                                         //if the user has not given a path then, add inputdir. else leave path alone.
150                                         if (path == "") {       parameters["count"] = inputDir + it->second;            }
151                                 }
152
153                         }
154                         
155                         //initialize outputTypes
156                         vector<string> tempOutNames;
157                         outputTypes["summary"] = tempOutNames;
158                         
159                         //check for required parameters
160                         taxfile = validParameter.validFile(parameters, "taxonomy", true);
161                         if (taxfile == "not open") { abort = true; }
162                         else if (taxfile == "not found") {                              
163                                 taxfile = m->getTaxonomyFile(); 
164                                 if (taxfile != "") { m->mothurOut("Using " + taxfile + " as input file for the taxonomy parameter."); m->mothurOutEndLine(); }
165                                 else {  m->mothurOut("You have no current taxonomy file and the taxonomy parameter is required."); m->mothurOutEndLine(); abort = true; }
166                         }else { m->setTaxonomyFile(taxfile); }  
167                         
168                         namefile = validParameter.validFile(parameters, "name", true);
169                         if (namefile == "not open") { namefile = ""; abort = true; }
170                         else if (namefile == "not found") { namefile = "";  }   
171                         else { m->setNameFile(namefile); }
172                         
173                         groupfile = validParameter.validFile(parameters, "group", true);
174                         if (groupfile == "not open") { groupfile = ""; abort = true; }
175                         else if (groupfile == "not found") { groupfile = ""; }
176                         else { m->setGroupFile(groupfile); }
177             
178             countfile = validParameter.validFile(parameters, "count", true);
179                         if (countfile == "not open") { countfile = ""; abort = true; }
180                         else if (countfile == "not found") { countfile = "";  } 
181                         else { m->setCountTableFile(countfile); }
182             
183             if ((namefile != "") && (countfile != "")) {
184                 m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
185             }
186                         
187             if ((groupfile != "") && (countfile != "")) {
188                 m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
189             }
190             
191                         refTaxonomy = validParameter.validFile(parameters, "reftaxonomy", true);
192                         if (refTaxonomy == "not found") { refTaxonomy = ""; m->mothurOut("reftaxonomy is not required, but if given will keep the rankIDs in the summary file static."); m->mothurOutEndLine(); }
193                         else if (refTaxonomy == "not open") { refTaxonomy = ""; abort = true; }
194                         
195                         //if the user changes the output directory command factory will send this info to us in the output parameter 
196                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
197                                 outputDir = ""; 
198                                 outputDir += m->hasPath(taxfile); //if user entered a file with a path then preserve it 
199                         }
200             
201             string temp = validParameter.validFile(parameters, "relabund", false);              if (temp == "not found"){       temp = "false";                 }
202                         relabund = m->isTrue(temp);
203                         
204             if (countfile == "") {
205                 if (namefile == "") {
206                     vector<string> files; files.push_back(taxfile);
207                     parser.getNameFile(files);
208                 }
209                         }
210                 }
211         }
212         catch(exception& e) {
213                 m->errorOut(e, "SummaryTaxCommand", "SummaryTaxCommand");
214                 exit(1);
215         }
216 }
217 //***************************************************************************************************************
218
219 int SummaryTaxCommand::execute(){
220         try{
221                 
222                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
223                 int start = time(NULL);
224                 
225         GroupMap* groupMap = NULL;
226         CountTable* ct = NULL;
227         if (groupfile != "") {
228             groupMap = new GroupMap(groupfile);
229             groupMap->readMap();
230         }else if (countfile != "") {
231             ct = new CountTable();
232             ct->readTable(countfile, true, false);
233         }
234                 
235         PhyloSummary* taxaSum;
236         if (countfile != "") {
237             if (refTaxonomy != "") { taxaSum = new PhyloSummary(refTaxonomy, ct, relabund); }
238             else { taxaSum = new PhyloSummary(ct, relabund); }
239         }else {
240             if (refTaxonomy != "") { taxaSum = new PhyloSummary(refTaxonomy, groupMap, relabund); }
241             else { taxaSum = new PhyloSummary(groupMap, relabund); }
242                 }
243         
244                 if (m->control_pressed) { if (groupMap != NULL) { delete groupMap; } if (ct != NULL) { delete ct; } delete taxaSum; return 0; }
245                 
246                 int numSeqs = 0;
247                 if ((namefile == "") || (countfile != "")) { numSeqs = taxaSum->summarize(taxfile);  }
248                 else if (namefile != "") {
249                         map<string, vector<string> > nameMap;
250                         map<string, vector<string> >::iterator itNames;
251                         m->readNames(namefile, nameMap);
252                         
253                         if (m->control_pressed) { if (groupMap != NULL) { delete groupMap; } if (ct != NULL) { delete ct; } delete taxaSum; return 0; }
254                         
255                         ifstream in;
256                         m->openInputFile(taxfile, in);
257                         
258                         //read in users taxonomy file and add sequences to tree
259                         string name, taxon;
260                         
261                         while(!in.eof()){
262                 
263                 if (m->control_pressed) { break; }
264                 
265                                 in >> name >> taxon; m->gobble(in);
266                                 
267                                 itNames = nameMap.find(name);
268                                 
269                                 if (itNames == nameMap.end()) { 
270                                         m->mothurOut("[ERROR]: " + name + " is not in your name file please correct."); m->mothurOutEndLine(); exit(1);
271                                 }else{
272                                         for (int i = 0; i < itNames->second.size(); i++) { 
273                                                 numSeqs++;
274                                                 taxaSum->addSeqToTree(itNames->second[i], taxon);  //add it as many times as there are identical seqs
275                                         }
276                                         itNames->second.clear();
277                                         nameMap.erase(itNames->first);
278                                 }
279                         }
280                         in.close();
281                 }else { numSeqs = taxaSum->summarize(taxfile);  }
282                 
283                 if (m->control_pressed) {  if (groupMap != NULL) { delete groupMap; } if (ct != NULL) { delete ct; } delete taxaSum; return 0; }
284                 
285                 //print summary file
286                 ofstream outTaxTree;
287         map<string, string> variables; 
288                 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(taxfile));
289                 string summaryFile = getOutputFileName("summary",variables);
290                 m->openOutputFile(summaryFile, outTaxTree);
291                 taxaSum->print(outTaxTree);
292                 outTaxTree.close();
293                 
294                 delete taxaSum;
295         if (groupMap != NULL) { delete groupMap; } if (ct != NULL) { delete ct; }
296                 
297                 if (m->control_pressed) {  m->mothurRemove(summaryFile); return 0; }
298                 
299                 m->mothurOutEndLine();
300                 m->mothurOut("It took " + toString(time(NULL) - start) + " secs to create the summary file for " + toString(numSeqs) + " sequences."); m->mothurOutEndLine(); m->mothurOutEndLine();
301                 m->mothurOutEndLine();
302                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
303                 m->mothurOut(summaryFile); m->mothurOutEndLine();       outputNames.push_back(summaryFile); outputTypes["summary"].push_back(summaryFile);
304                 m->mothurOutEndLine();
305                                         
306                 return 0;
307         }
308         catch(exception& e) {
309                 m->errorOut(e, "SummaryTaxCommand", "execute");
310                 exit(1);
311         }
312 }
313 /**************************************************************************************/
314
315