]> git.donarmstrong.com Git - mothur.git/blob - summarytaxcommand.cpp
changing command name classify.shared to classifyrf.shared
[mothur.git] / summarytaxcommand.cpp
1 /*
2  *  summarytaxcommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 9/23/11.
6  *  Copyright 2011 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "summarytaxcommand.h"
11 #include "phylosummary.h"
12
13 //**********************************************************************************************************************
14 vector<string> SummaryTaxCommand::setParameters(){      
15         try {
16                 CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "none", "none","summary",false,true,true); parameters.push_back(ptaxonomy);
17         CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none","",false,false,true); parameters.push_back(pname);
18         CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none","",false,false,true); parameters.push_back(pcount);
19                 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","",false,false,true); parameters.push_back(pgroup);
20                 CommandParameter preftaxonomy("reftaxonomy", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(preftaxonomy);
21                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
22                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
23                 
24                 vector<string> myArray;
25                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
26                 return myArray;
27         }
28         catch(exception& e) {
29                 m->errorOut(e, "SummaryTaxCommand", "setParameters");
30                 exit(1);
31         }
32 }
33 //**********************************************************************************************************************
34 string SummaryTaxCommand::getHelpString(){      
35         try {
36                 string helpString = "";
37                 helpString += "The summary.tax command reads a taxonomy file and an optional name file, and summarizes the taxonomy information.\n";
38                 helpString += "The summary.tax command parameters are taxonomy, count, group and name. taxonomy is required, unless you have a valid current taxonomy file.\n";
39                 helpString += "The name parameter allows you to enter a name file associated with your taxonomy file. \n";
40                 helpString += "The group parameter allows you add a group file so you can have the summary totals broken up by group.\n";
41         helpString += "The count parameter allows you add a count file so you can have the summary totals broken up by group.\n";
42                 helpString += "The reftaxonomy parameter allows you give the name of the reference taxonomy file used when you classified your sequences. It is not required, but providing it will keep the rankIDs in the summary file static.\n";
43                 helpString += "The summary.tax command should be in the following format: \n";
44                 helpString += "summary.tax(taxonomy=yourTaxonomyFile) \n";
45                 helpString += "Note: No spaces between parameter labels (i.e. taxonomy), '=' and parameters (i.e.yourTaxonomyFile).\n"; 
46                 return helpString;
47         }
48         catch(exception& e) {
49                 m->errorOut(e, "SummaryTaxCommand", "getHelpString");
50                 exit(1);
51         }
52 }
53 //**********************************************************************************************************************
54 string SummaryTaxCommand::getOutputPattern(string type) {
55     try {
56         string pattern = "";
57         
58         if (type == "summary") {  pattern = "[filename],tax.summary"; } 
59         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
60         
61         return pattern;
62     }
63     catch(exception& e) {
64         m->errorOut(e, "SummaryTaxCommand", "getOutputPattern");
65         exit(1);
66     }
67 }
68 //**********************************************************************************************************************
69 SummaryTaxCommand::SummaryTaxCommand(){ 
70         try {
71                 abort = true; calledHelp = true; 
72                 setParameters();
73                 vector<string> tempOutNames;
74                 outputTypes["summary"] = tempOutNames;
75         }
76         catch(exception& e) {
77                 m->errorOut(e, "SummaryTaxCommand", "SummaryTaxCommand");
78                 exit(1);
79         }
80 }
81 //***************************************************************************************************************
82
83 SummaryTaxCommand::SummaryTaxCommand(string option)  {
84         try {
85                 abort = false; calledHelp = false;   
86                 
87                 //allow user to run help
88                 if(option == "help") { help(); abort = true; calledHelp = true; }
89                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
90                 
91                 else {
92                         vector<string> myArray = setParameters();
93                         
94                         OptionParser parser(option);
95                         map<string,string> parameters = parser.getParameters();
96                         
97                         ValidParameters validParameter;
98                         map<string,string>::iterator it;
99                         
100                         //check to make sure all parameters are valid for command
101                         for (it = parameters.begin(); it != parameters.end(); it++) { 
102                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
103                         }
104                         
105                         //if the user changes the input directory command factory will send this info to us in the output parameter 
106                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
107                         if (inputDir == "not found"){   inputDir = "";          }
108                         else {
109                                 string path;
110                                 it = parameters.find("taxonomy");
111                                 //user has given a template file
112                                 if(it != parameters.end()){ 
113                                         path = m->hasPath(it->second);
114                                         //if the user has not given a path then, add inputdir. else leave path alone.
115                                         if (path == "") {       parameters["taxonomy"] = inputDir + it->second;         }
116                                 }
117                                 
118                                 it = parameters.find("name");
119                                 //user has given a template file
120                                 if(it != parameters.end()){ 
121                                         path = m->hasPath(it->second);
122                                         //if the user has not given a path then, add inputdir. else leave path alone.
123                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
124                                 }
125                                 
126                                 it = parameters.find("group");
127                                 //user has given a template file
128                                 if(it != parameters.end()){ 
129                                         path = m->hasPath(it->second);
130                                         //if the user has not given a path then, add inputdir. else leave path alone.
131                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
132                                 }
133                                 
134                                 it = parameters.find("reftaxonomy");
135                                 //user has given a template file
136                                 if(it != parameters.end()){ 
137                                         path = m->hasPath(it->second);
138                                         //if the user has not given a path then, add inputdir. else leave path alone.
139                                         if (path == "") {       parameters["reftaxonomy"] = inputDir + it->second;              }
140                                 }
141                                 
142                 it = parameters.find("count");
143                                 //user has given a template file
144                                 if(it != parameters.end()){ 
145                                         path = m->hasPath(it->second);
146                                         //if the user has not given a path then, add inputdir. else leave path alone.
147                                         if (path == "") {       parameters["count"] = inputDir + it->second;            }
148                                 }
149
150                         }
151                         
152                         //initialize outputTypes
153                         vector<string> tempOutNames;
154                         outputTypes["summary"] = tempOutNames;
155                         
156                         //check for required parameters
157                         taxfile = validParameter.validFile(parameters, "taxonomy", true);
158                         if (taxfile == "not open") { abort = true; }
159                         else if (taxfile == "not found") {                              
160                                 taxfile = m->getTaxonomyFile(); 
161                                 if (taxfile != "") { m->mothurOut("Using " + taxfile + " as input file for the taxonomy parameter."); m->mothurOutEndLine(); }
162                                 else {  m->mothurOut("You have no current taxonomy file and the taxonomy parameter is required."); m->mothurOutEndLine(); abort = true; }
163                         }else { m->setTaxonomyFile(taxfile); }  
164                         
165                         namefile = validParameter.validFile(parameters, "name", true);
166                         if (namefile == "not open") { namefile = ""; abort = true; }
167                         else if (namefile == "not found") { namefile = "";  }   
168                         else { m->setNameFile(namefile); }
169                         
170                         groupfile = validParameter.validFile(parameters, "group", true);
171                         if (groupfile == "not open") { groupfile = ""; abort = true; }
172                         else if (groupfile == "not found") { groupfile = ""; }
173                         else { m->setGroupFile(groupfile); }
174             
175             countfile = validParameter.validFile(parameters, "count", true);
176                         if (countfile == "not open") { countfile = ""; abort = true; }
177                         else if (countfile == "not found") { countfile = "";  } 
178                         else { m->setCountTableFile(countfile); }
179             
180             if ((namefile != "") && (countfile != "")) {
181                 m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
182             }
183                         
184             if ((groupfile != "") && (countfile != "")) {
185                 m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
186             }
187             
188                         refTaxonomy = validParameter.validFile(parameters, "reftaxonomy", true);
189                         if (refTaxonomy == "not found") { refTaxonomy = ""; m->mothurOut("reftaxonomy is not required, but if given will keep the rankIDs in the summary file static."); m->mothurOutEndLine(); }
190                         else if (refTaxonomy == "not open") { refTaxonomy = ""; abort = true; }
191                         
192                         //if the user changes the output directory command factory will send this info to us in the output parameter 
193                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
194                                 outputDir = ""; 
195                                 outputDir += m->hasPath(taxfile); //if user entered a file with a path then preserve it 
196                         }
197                         
198             if (countfile == "") {
199                 if (namefile == "") {
200                     vector<string> files; files.push_back(taxfile);
201                     parser.getNameFile(files);
202                 }
203                         }
204                 }
205         }
206         catch(exception& e) {
207                 m->errorOut(e, "SummaryTaxCommand", "SummaryTaxCommand");
208                 exit(1);
209         }
210 }
211 //***************************************************************************************************************
212
213 int SummaryTaxCommand::execute(){
214         try{
215                 
216                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
217                 int start = time(NULL);
218                 
219         GroupMap* groupMap = NULL;
220         CountTable* ct = NULL;
221         if (groupfile != "") {
222             groupMap = new GroupMap(groupfile);
223             groupMap->readMap();
224         }else if (countfile != "") {
225             ct = new CountTable();
226             ct->readTable(countfile, true);
227         }
228                 
229         PhyloSummary* taxaSum;
230         if (countfile != "") {
231             if (refTaxonomy != "") { taxaSum = new PhyloSummary(refTaxonomy, ct); }
232             else { taxaSum = new PhyloSummary(ct); }
233         }else {
234             if (refTaxonomy != "") { taxaSum = new PhyloSummary(refTaxonomy, groupMap); }
235             else { taxaSum = new PhyloSummary(groupMap); }
236                 }
237         
238                 if (m->control_pressed) { if (groupMap != NULL) { delete groupMap; } if (ct != NULL) { delete ct; } delete taxaSum; return 0; }
239                 
240                 int numSeqs = 0;
241                 if ((namefile == "") || (countfile != "")) { numSeqs = taxaSum->summarize(taxfile);  }
242                 else if (namefile != "") {
243                         map<string, vector<string> > nameMap;
244                         map<string, vector<string> >::iterator itNames;
245                         m->readNames(namefile, nameMap);
246                         
247                         if (m->control_pressed) { if (groupMap != NULL) { delete groupMap; } if (ct != NULL) { delete ct; } delete taxaSum; return 0; }
248                         
249                         ifstream in;
250                         m->openInputFile(taxfile, in);
251                         
252                         //read in users taxonomy file and add sequences to tree
253                         string name, taxon;
254                         
255                         while(!in.eof()){
256                 
257                 if (m->control_pressed) { break; }
258                 
259                                 in >> name >> taxon; m->gobble(in);
260                                 
261                                 itNames = nameMap.find(name);
262                                 
263                                 if (itNames == nameMap.end()) { 
264                                         m->mothurOut("[ERROR]: " + name + " is not in your name file please correct."); m->mothurOutEndLine(); exit(1);
265                                 }else{
266                                         for (int i = 0; i < itNames->second.size(); i++) { 
267                                                 numSeqs++;
268                                                 taxaSum->addSeqToTree(itNames->second[i], taxon);  //add it as many times as there are identical seqs
269                                         }
270                                         itNames->second.clear();
271                                         nameMap.erase(itNames->first);
272                                 }
273                         }
274                         in.close();
275                 }else { numSeqs = taxaSum->summarize(taxfile);  }
276                 
277                 if (m->control_pressed) {  if (groupMap != NULL) { delete groupMap; } if (ct != NULL) { delete ct; } delete taxaSum; return 0; }
278                 
279                 //print summary file
280                 ofstream outTaxTree;
281         map<string, string> variables; 
282                 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(taxfile));
283                 string summaryFile = getOutputFileName("summary",variables);
284                 m->openOutputFile(summaryFile, outTaxTree);
285                 taxaSum->print(outTaxTree);
286                 outTaxTree.close();
287                 
288                 delete taxaSum;
289         if (groupMap != NULL) { delete groupMap; } if (ct != NULL) { delete ct; }
290                 
291                 if (m->control_pressed) {  m->mothurRemove(summaryFile); return 0; }
292                 
293                 m->mothurOutEndLine();
294                 m->mothurOut("It took " + toString(time(NULL) - start) + " secs to create the summary file for " + toString(numSeqs) + " sequences."); m->mothurOutEndLine(); m->mothurOutEndLine();
295                 m->mothurOutEndLine();
296                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
297                 m->mothurOut(summaryFile); m->mothurOutEndLine();       outputNames.push_back(summaryFile); outputTypes["summary"].push_back(summaryFile);
298                 m->mothurOutEndLine();
299                                         
300                 return 0;
301         }
302         catch(exception& e) {
303                 m->errorOut(e, "SummaryTaxCommand", "execute");
304                 exit(1);
305         }
306 }
307 /**************************************************************************************/
308
309