5 * Created by Sarah Westcott on 1/2/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "summarycommand.h"
15 #include "bootstrap.h"
17 #include "simpsoneven.h"
18 #include "invsimpson.h"
19 #include "npshannon.h"
22 #include "smithwilson.h"
23 #include "shannoneven.h"
24 #include "jackknife.h"
28 #include "bergerparker.h"
30 #include "goodscoverage.h"
37 //**********************************************************************************************************************
39 SummaryCommand::SummaryCommand(string option) {
41 globaldata = GlobalData::getInstance();
47 //allow user to run help
48 if(option == "help") { validCalculator = new ValidCalculators(); help(); delete validCalculator; abort = true; }
51 //valid paramters for this command
52 string Array[] = {"label","calc","abund","size","outputdir","groupmode","inputdir"};
53 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
55 OptionParser parser(option);
56 map<string,string> parameters = parser.getParameters();
58 ValidParameters validParameter;
60 //check to make sure all parameters are valid for command
61 for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
62 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
65 //make sure the user has already run the read.otu command
66 if ((globaldata->getSharedFile() == "") && (globaldata->getListFile() == "") && (globaldata->getRabundFile() == "") && (globaldata->getSabundFile() == "")) { m->mothurOut("You must read a list, sabund, rabund or shared file before you can use the summary.single command."); m->mothurOutEndLine(); abort = true; }
68 //if the user changes the output directory command factory will send this info to us in the output parameter
69 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
71 outputDir += m->hasPath(globaldata->inputFileName); //if user entered a file with a path then preserve it
74 //check for optional parameter and set defaults
75 // ...at some point should added some additional type checking...
76 label = validParameter.validFile(parameters, "label", false);
77 if (label == "not found") { label = ""; }
79 if(label != "all") { m->splitAtDash(label, labels); allLines = 0; }
80 else { allLines = 1; }
83 //if the user has not specified any labels use the ones from read.otu
85 allLines = globaldata->allLines;
86 labels = globaldata->labels;
89 calc = validParameter.validFile(parameters, "calc", false);
90 if (calc == "not found") { calc = "sobs-chao-ace-jack-shannon-npshannon-simpson"; }
92 if (calc == "default") { calc = "sobs-chao-ace-jack-shannon-npshannon-simpson"; }
94 m->splitAtDash(calc, Estimators);
97 temp = validParameter.validFile(parameters, "abund", false); if (temp == "not found") { temp = "10"; }
100 temp = validParameter.validFile(parameters, "size", false); if (temp == "not found") { temp = "0"; }
103 temp = validParameter.validFile(parameters, "groupmode", false); if (temp == "not found") { temp = "F"; }
104 groupMode = m->isTrue(temp);
109 catch(exception& e) {
110 m->errorOut(e, "SummaryCommand", "SummaryCommand");
114 //**********************************************************************************************************************
116 void SummaryCommand::help(){
118 m->mothurOut("The summary.single command can only be executed after a successful read.otu WTIH ONE EXECEPTION.\n");
119 m->mothurOut("The summary.single command can be executed after a successful cluster command. It will use the .list file from the output of the cluster.\n");
120 m->mothurOut("The summary.single command parameters are label, calc, abund and groupmode. No parameters are required.\n");
121 m->mothurOut("The summary.single command should be in the following format: \n");
122 m->mothurOut("summary.single(label=yourLabel, calc=yourEstimators).\n");
123 m->mothurOut("Example summary.single(label=unique-.01-.03, calc=sobs-chao-ace-jack-bootstrap-shannon-npshannon-simpson).\n");
124 validCalculator->printCalc("summary", cout);
125 m->mothurOut("The default value calc is sobs-chao-ace-jack-shannon-npshannon-simpson\n");
126 m->mothurOut("If you are running summary.single with a shared file and would like your summary results collated in one file, set groupmode=t. (Default=False).\n");
127 m->mothurOut("The label parameter is used to analyze specific labels in your input.\n");
128 m->mothurOut("Note: No spaces between parameter labels (i.e. label), '=' and parameters (i.e.yourLabels).\n\n");
130 catch(exception& e) {
131 m->errorOut(e, "SummaryCommand", "help");
136 //**********************************************************************************************************************
138 SummaryCommand::~SummaryCommand(){}
140 //**********************************************************************************************************************
142 int SummaryCommand::execute(){
145 if (abort == true) { return 0; }
147 vector<string> outputNames;
149 string hadShared = "";
150 if ((globaldata->getFormat() != "sharedfile")) { inputFileNames.push_back(globaldata->inputFileName); }
151 else { hadShared = globaldata->getSharedFile(); inputFileNames = parseSharedFile(globaldata->getSharedFile()); globaldata->setFormat("rabund"); }
153 if (m->control_pressed) { if (hadShared != "") { globaldata->setSharedFile(hadShared); globaldata->setFormat("sharedfile"); } return 0; }
158 for (int p = 0; p < inputFileNames.size(); p++) {
163 string fileNameRoot = outputDir + m->getRootName(m->getSimpleName(inputFileNames[p])) + "summary";
164 globaldata->inputFileName = inputFileNames[p];
165 outputNames.push_back(fileNameRoot);
167 if (inputFileNames.size() > 1) {
168 m->mothurOutEndLine(); m->mothurOut("Processing group " + groups[p]); m->mothurOutEndLine(); m->mothurOutEndLine();
171 sumCalculators.clear();
173 validCalculator = new ValidCalculators();
175 for (int i=0; i<Estimators.size(); i++) {
176 if (validCalculator->isValidCalculator("summary", Estimators[i]) == true) {
177 if(Estimators[i] == "sobs"){
178 sumCalculators.push_back(new Sobs());
179 }else if(Estimators[i] == "chao"){
180 sumCalculators.push_back(new Chao1());
181 }else if(Estimators[i] == "coverage"){
182 sumCalculators.push_back(new Coverage());
183 }else if(Estimators[i] == "geometric"){
184 sumCalculators.push_back(new Geom());
185 }else if(Estimators[i] == "logseries"){
186 sumCalculators.push_back(new LogSD());
187 }else if(Estimators[i] == "qstat"){
188 sumCalculators.push_back(new QStat());
189 }else if(Estimators[i] == "bergerparker"){
190 sumCalculators.push_back(new BergerParker());
191 }else if(Estimators[i] == "bstick"){
192 sumCalculators.push_back(new BStick());
193 }else if(Estimators[i] == "ace"){
196 sumCalculators.push_back(new Ace(abund));
197 }else if(Estimators[i] == "jack"){
198 sumCalculators.push_back(new Jackknife());
199 }else if(Estimators[i] == "shannon"){
200 sumCalculators.push_back(new Shannon());
201 }else if(Estimators[i] == "shannoneven"){
202 sumCalculators.push_back(new ShannonEven());
203 }else if(Estimators[i] == "npshannon"){
204 sumCalculators.push_back(new NPShannon());
205 }else if(Estimators[i] == "heip"){
206 sumCalculators.push_back(new Heip());
207 }else if(Estimators[i] == "smithwilson"){
208 sumCalculators.push_back(new SmithWilson());
209 }else if(Estimators[i] == "simpson"){
210 sumCalculators.push_back(new Simpson());
211 }else if(Estimators[i] == "simpsoneven"){
212 sumCalculators.push_back(new SimpsonEven());
213 }else if(Estimators[i] == "invsimpson"){
214 sumCalculators.push_back(new InvSimpson());
215 }else if(Estimators[i] == "bootstrap"){
216 sumCalculators.push_back(new Bootstrap());
217 }else if (Estimators[i] == "nseqs") {
218 sumCalculators.push_back(new NSeqs());
219 }else if (Estimators[i] == "goodscoverage") {
220 sumCalculators.push_back(new GoodsCoverage());
221 }else if (Estimators[i] == "efron") {
222 sumCalculators.push_back(new Efron(size));
223 }else if (Estimators[i] == "boneh") {
224 sumCalculators.push_back(new Boneh(size));
225 }else if (Estimators[i] == "solow") {
226 sumCalculators.push_back(new Solow(size));
227 }else if (Estimators[i] == "shen") {
228 sumCalculators.push_back(new Shen(size, abund));
233 //if the users entered no valid calculators don't execute command
234 if (sumCalculators.size() == 0) { if (hadShared != "") { globaldata->setSharedFile(hadShared); globaldata->setFormat("sharedfile"); } return 0; }
236 ofstream outputFileHandle;
237 m->openOutputFile(fileNameRoot, outputFileHandle);
238 outputFileHandle << "label";
240 read = new ReadOTUFile(globaldata->inputFileName);
241 read->read(&*globaldata);
243 sabund = globaldata->sabund;
244 string lastLabel = sabund->getLabel();
245 input = globaldata->ginput;
247 for(int i=0;i<sumCalculators.size();i++){
248 if(sumCalculators[i]->getCols() == 1){
249 outputFileHandle << '\t' << sumCalculators[i]->getName();
253 outputFileHandle << '\t' << sumCalculators[i]->getName() << "\t" << sumCalculators[i]->getName() << "_lci\t" << sumCalculators[i]->getName() << "_hci";
257 outputFileHandle << endl;
259 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
260 set<string> processedLabels;
261 set<string> userLabels = labels;
263 if (m->control_pressed) { if (hadShared != "") { globaldata->setSharedFile(hadShared); globaldata->setFormat("sharedfile"); } outputFileHandle.close(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } for(int i=0;i<sumCalculators.size();i++){ delete sumCalculators[i]; } delete validCalculator; delete read; delete sabund; globaldata->sabund = NULL; delete input; globaldata->ginput = NULL; return 0; }
265 while((sabund != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
267 if (m->control_pressed) { if (hadShared != "") { globaldata->setSharedFile(hadShared); globaldata->setFormat("sharedfile"); } outputFileHandle.close(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } for(int i=0;i<sumCalculators.size();i++){ delete sumCalculators[i]; } delete validCalculator; delete read; delete sabund; globaldata->sabund = NULL; delete input; globaldata->ginput = NULL; return 0; }
269 if(allLines == 1 || labels.count(sabund->getLabel()) == 1){
271 m->mothurOut(sabund->getLabel()); m->mothurOutEndLine();
272 processedLabels.insert(sabund->getLabel());
273 userLabels.erase(sabund->getLabel());
275 outputFileHandle << sabund->getLabel();
276 for(int i=0;i<sumCalculators.size();i++){
277 vector<double> data = sumCalculators[i]->getValues(sabund);
279 if (m->control_pressed) { if (hadShared != "") { globaldata->setSharedFile(hadShared); globaldata->setFormat("sharedfile"); } outputFileHandle.close(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } for(int i=0;i<sumCalculators.size();i++){ delete sumCalculators[i]; } delete validCalculator; delete read; delete sabund; globaldata->sabund = NULL; delete input; globaldata->ginput = NULL; return 0; }
281 outputFileHandle << '\t';
282 sumCalculators[i]->print(outputFileHandle);
284 outputFileHandle << endl;
288 if ((m->anyLabelsToProcess(sabund->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
289 string saveLabel = sabund->getLabel();
292 sabund = input->getSAbundVector(lastLabel);
294 m->mothurOut(sabund->getLabel()); m->mothurOutEndLine();
295 processedLabels.insert(sabund->getLabel());
296 userLabels.erase(sabund->getLabel());
298 outputFileHandle << sabund->getLabel();
299 for(int i=0;i<sumCalculators.size();i++){
300 vector<double> data = sumCalculators[i]->getValues(sabund);
302 if (m->control_pressed) { if (hadShared != "") { globaldata->setSharedFile(hadShared); globaldata->setFormat("sharedfile"); } outputFileHandle.close(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } for(int i=0;i<sumCalculators.size();i++){ delete sumCalculators[i]; } delete validCalculator; delete read; delete sabund; globaldata->sabund = NULL; delete input; globaldata->ginput = NULL; return 0; }
304 outputFileHandle << '\t';
305 sumCalculators[i]->print(outputFileHandle);
307 outputFileHandle << endl;
310 //restore real lastlabel to save below
311 sabund->setLabel(saveLabel);
314 lastLabel = sabund->getLabel();
317 sabund = input->getSAbundVector();
320 if (m->control_pressed) { if (hadShared != "") { globaldata->setSharedFile(hadShared); globaldata->setFormat("sharedfile"); } outputFileHandle.close(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } for(int i=0;i<sumCalculators.size();i++){ delete sumCalculators[i]; } delete validCalculator; delete read; delete input; globaldata->ginput = NULL; return 0; }
322 //output error messages about any remaining user labels
323 set<string>::iterator it;
324 bool needToRun = false;
325 for (it = userLabels.begin(); it != userLabels.end(); it++) {
326 m->mothurOut("Your file does not include the label " + *it);
327 if (processedLabels.count(lastLabel) != 1) {
328 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
331 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
335 //run last label if you need to
336 if (needToRun == true) {
337 if (sabund != NULL) { delete sabund; }
338 sabund = input->getSAbundVector(lastLabel);
340 m->mothurOut(sabund->getLabel()); m->mothurOutEndLine();
341 outputFileHandle << sabund->getLabel();
342 for(int i=0;i<sumCalculators.size();i++){
343 vector<double> data = sumCalculators[i]->getValues(sabund);
345 if (m->control_pressed) { if (hadShared != "") { globaldata->setSharedFile(hadShared); globaldata->setFormat("sharedfile"); } outputFileHandle.close(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } for(int i=0;i<sumCalculators.size();i++){ delete sumCalculators[i]; } delete validCalculator; delete read; delete sabund; globaldata->sabund = NULL; delete input; globaldata->ginput = NULL; return 0; }
347 outputFileHandle << '\t';
348 sumCalculators[i]->print(outputFileHandle);
350 outputFileHandle << endl;
355 outputFileHandle.close();
357 if (m->control_pressed) { if (hadShared != "") { globaldata->setSharedFile(hadShared); globaldata->setFormat("sharedfile"); } for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } for(int i=0;i<sumCalculators.size();i++){ delete sumCalculators[i]; } delete validCalculator; delete read; delete input; globaldata->ginput = NULL; return 0; }
360 delete input; globaldata->ginput = NULL;
362 delete validCalculator;
363 globaldata->sabund = NULL;
364 for(int i=0;i<sumCalculators.size();i++){ delete sumCalculators[i]; }
367 if (hadShared != "") { globaldata->setSharedFile(hadShared); globaldata->setFormat("sharedfile"); }
369 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
371 //create summary file containing all the groups data for each label - this function just combines the info from the files already created.
372 if ((hadShared != "") && (groupMode)) { outputNames.push_back(createGroupSummaryFile(numLines, numCols, outputNames)); }
374 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
376 m->mothurOutEndLine();
377 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
378 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
379 m->mothurOutEndLine();
383 catch(exception& e) {
384 m->errorOut(e, "SummaryCommand", "execute");
388 //**********************************************************************************************************************
389 vector<string> SummaryCommand::parseSharedFile(string filename) {
391 vector<string> filenames;
393 map<string, ofstream*> filehandles;
394 map<string, ofstream*>::iterator it3;
398 read = new ReadOTUFile(filename);
399 read->read(&*globaldata);
401 input = globaldata->ginput;
402 vector<SharedRAbundVector*> lookup = input->getSharedRAbundVectors();
404 string sharedFileRoot = m->getRootName(filename);
406 //clears file before we start to write to it below
407 for (int i=0; i<lookup.size(); i++) {
408 remove((sharedFileRoot + lookup[i]->getGroup() + ".rabund").c_str());
409 filenames.push_back((sharedFileRoot + lookup[i]->getGroup() + ".rabund"));
413 for (int i=0; i<lookup.size(); i++) {
415 filehandles[lookup[i]->getGroup()] = temp;
416 groups.push_back(lookup[i]->getGroup());
419 while(lookup[0] != NULL) {
421 for (int i = 0; i < lookup.size(); i++) {
422 RAbundVector rav = lookup[i]->getRAbundVector();
423 m->openOutputFileAppend(sharedFileRoot + lookup[i]->getGroup() + ".rabund", *(filehandles[lookup[i]->getGroup()]));
424 rav.print(*(filehandles[lookup[i]->getGroup()]));
425 (*(filehandles[lookup[i]->getGroup()])).close();
428 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
429 lookup = input->getSharedRAbundVectors();
433 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
438 globaldata->ginput = NULL;
442 catch(exception& e) {
443 m->errorOut(e, "SummaryCommand", "parseSharedFile");
447 //**********************************************************************************************************************
448 string SummaryCommand::createGroupSummaryFile(int numLines, int numCols, vector<string> outputNames) {
452 string combineFileName = outputDir + m->getRootName(m->getSimpleName(globaldata->inputFileName)) + "groups.summary";
455 m->openOutputFile(combineFileName, out);
457 //open each groups summary file
458 string newLabel = "";
460 map<string, ifstream*> filehandles;
461 for (int i=0; i<outputNames.size(); i++) {
463 filehandles[outputNames[i]] = temp;
464 m->openInputFile(outputNames[i], *(temp));
466 //read through first line - labels
468 if (i == 0) { //we want to save the labels to output below
469 for (int j = 0; j < numCols+1; j++) {
470 *(temp) >> tempLabel;
472 if (j == 1) { newLabel += "group\t" + tempLabel + '\t';
473 }else{ newLabel += tempLabel + '\t'; }
475 }else{ for (int j = 0; j < numCols+1; j++) { *(temp) >> tempLabel; } }
480 //output label line to new file
481 out << newLabel << endl;
484 for (int i = 0; i < numLines; i++) {
486 //grab summary data for each group
487 for (int i=0; i<outputNames.size(); i++) {
490 for (int j = 0; j < numCols+1; j++) {
491 *(filehandles[outputNames[i]]) >> tempLabel;
493 //print to combined file
494 if (j == 1) { out << groups[i] << '\t' << tempLabel << '\t'; }
495 else{ out << tempLabel << '\t'; }
499 m->gobble(*(filehandles[outputNames[i]]));
503 //close each groups summary file
504 for (int i=0; i<outputNames.size(); i++) { (*(filehandles[outputNames[i]])).close(); }
507 //return combine file name
508 return combineFileName;
511 catch(exception& e) {
512 m->errorOut(e, "SummaryCommand", "createGroupSummaryFile");
516 //**********************************************************************************************************************