5 * Created by Sarah Westcott on 1/2/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "summarycommand.h"
15 #include "bootstrap.h"
17 #include "simpsoneven.h"
18 #include "invsimpson.h"
19 #include "npshannon.h"
22 #include "smithwilson.h"
23 #include "shannoneven.h"
24 #include "jackknife.h"
28 #include "bergerparker.h"
30 #include "goodscoverage.h"
37 //**********************************************************************************************************************
38 vector<string> SummaryCommand::getValidParameters(){
40 string Array[] = {"label","calc","abund","size","outputdir","groupmode","inputdir"};
41 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
45 m->errorOut(e, "SummaryCommand", "getValidParameters");
49 //**********************************************************************************************************************
50 SummaryCommand::SummaryCommand(){
53 //initialize outputTypes
54 vector<string> tempOutNames;
55 outputTypes["summary"] = tempOutNames;
58 m->errorOut(e, "SummaryCommand", "SummaryCommand");
62 //**********************************************************************************************************************
63 vector<string> SummaryCommand::getRequiredParameters(){
65 vector<string> myArray;
69 m->errorOut(e, "SummaryCommand", "getRequiredParameters");
73 //**********************************************************************************************************************
74 vector<string> SummaryCommand::getRequiredFiles(){
76 string AlignArray[] = {"shared","list","rabund","sabund","or"};
77 vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
81 m->errorOut(e, "SummaryCommand", "getRequiredFiles");
85 //**********************************************************************************************************************
87 SummaryCommand::SummaryCommand(string option) {
89 globaldata = GlobalData::getInstance();
95 //allow user to run help
96 if(option == "help") { validCalculator = new ValidCalculators(); help(); delete validCalculator; abort = true; }
99 //valid paramters for this command
100 string Array[] = {"label","calc","abund","size","outputdir","groupmode","inputdir"};
101 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
103 OptionParser parser(option);
104 map<string,string> parameters = parser.getParameters();
106 ValidParameters validParameter;
108 //check to make sure all parameters are valid for command
109 for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
110 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
113 //initialize outputTypes
114 vector<string> tempOutNames;
115 outputTypes["summary"] = tempOutNames;
117 //make sure the user has already run the read.otu command
118 if ((globaldata->getSharedFile() == "") && (globaldata->getListFile() == "") && (globaldata->getRabundFile() == "") && (globaldata->getSabundFile() == "")) { m->mothurOut("You must read a list, sabund, rabund or shared file before you can use the summary.single command."); m->mothurOutEndLine(); abort = true; }
120 //if the user changes the output directory command factory will send this info to us in the output parameter
121 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
123 outputDir += m->hasPath(globaldata->inputFileName); //if user entered a file with a path then preserve it
126 //check for optional parameter and set defaults
127 // ...at some point should added some additional type checking...
128 label = validParameter.validFile(parameters, "label", false);
129 if (label == "not found") { label = ""; }
131 if(label != "all") { m->splitAtDash(label, labels); allLines = 0; }
132 else { allLines = 1; }
135 //if the user has not specified any labels use the ones from read.otu
137 allLines = globaldata->allLines;
138 labels = globaldata->labels;
141 calc = validParameter.validFile(parameters, "calc", false);
142 if (calc == "not found") { calc = "sobs-chao-ace-jack-shannon-npshannon-simpson"; }
144 if (calc == "default") { calc = "sobs-chao-ace-jack-shannon-npshannon-simpson"; }
146 m->splitAtDash(calc, Estimators);
149 temp = validParameter.validFile(parameters, "abund", false); if (temp == "not found") { temp = "10"; }
150 convert(temp, abund);
152 temp = validParameter.validFile(parameters, "size", false); if (temp == "not found") { temp = "0"; }
155 temp = validParameter.validFile(parameters, "groupmode", false); if (temp == "not found") { temp = "T"; }
156 groupMode = m->isTrue(temp);
161 catch(exception& e) {
162 m->errorOut(e, "SummaryCommand", "SummaryCommand");
166 //**********************************************************************************************************************
168 void SummaryCommand::help(){
170 m->mothurOut("The summary.single command can only be executed after a successful read.otu WTIH ONE EXECEPTION.\n");
171 m->mothurOut("The summary.single command can be executed after a successful cluster command. It will use the .list file from the output of the cluster.\n");
172 m->mothurOut("The summary.single command parameters are label, calc, abund and groupmode. No parameters are required.\n");
173 m->mothurOut("The summary.single command should be in the following format: \n");
174 m->mothurOut("summary.single(label=yourLabel, calc=yourEstimators).\n");
175 m->mothurOut("Example summary.single(label=unique-.01-.03, calc=sobs-chao-ace-jack-bootstrap-shannon-npshannon-simpson).\n");
176 validCalculator->printCalc("summary", cout);
177 m->mothurOut("The default value calc is sobs-chao-ace-jack-shannon-npshannon-simpson\n");
178 m->mothurOut("If you are running summary.single with a shared file and would like your summary results collated in one file, set groupmode=t. (Default=true).\n");
179 m->mothurOut("The label parameter is used to analyze specific labels in your input.\n");
180 m->mothurOut("Note: No spaces between parameter labels (i.e. label), '=' and parameters (i.e.yourLabels).\n\n");
182 catch(exception& e) {
183 m->errorOut(e, "SummaryCommand", "help");
188 //**********************************************************************************************************************
190 SummaryCommand::~SummaryCommand(){}
192 //**********************************************************************************************************************
194 int SummaryCommand::execute(){
197 if (abort == true) { return 0; }
199 string hadShared = "";
200 if ((globaldata->getFormat() != "sharedfile")) { inputFileNames.push_back(globaldata->inputFileName); }
201 else { hadShared = globaldata->getSharedFile(); inputFileNames = parseSharedFile(globaldata->getSharedFile()); globaldata->setFormat("rabund"); }
203 if (m->control_pressed) { if (hadShared != "") { globaldata->setSharedFile(hadShared); globaldata->setFormat("sharedfile"); } return 0; }
208 for (int p = 0; p < inputFileNames.size(); p++) {
213 string fileNameRoot = outputDir + m->getRootName(m->getSimpleName(inputFileNames[p])) + "summary";
214 globaldata->inputFileName = inputFileNames[p];
215 outputNames.push_back(fileNameRoot); outputTypes["summary"].push_back(fileNameRoot);
217 if (inputFileNames.size() > 1) {
218 m->mothurOutEndLine(); m->mothurOut("Processing group " + groups[p]); m->mothurOutEndLine(); m->mothurOutEndLine();
221 sumCalculators.clear();
223 validCalculator = new ValidCalculators();
225 for (int i=0; i<Estimators.size(); i++) {
226 if (validCalculator->isValidCalculator("summary", Estimators[i]) == true) {
227 if(Estimators[i] == "sobs"){
228 sumCalculators.push_back(new Sobs());
229 }else if(Estimators[i] == "chao"){
230 sumCalculators.push_back(new Chao1());
231 }else if(Estimators[i] == "coverage"){
232 sumCalculators.push_back(new Coverage());
233 }else if(Estimators[i] == "geometric"){
234 sumCalculators.push_back(new Geom());
235 }else if(Estimators[i] == "logseries"){
236 sumCalculators.push_back(new LogSD());
237 }else if(Estimators[i] == "qstat"){
238 sumCalculators.push_back(new QStat());
239 }else if(Estimators[i] == "bergerparker"){
240 sumCalculators.push_back(new BergerParker());
241 }else if(Estimators[i] == "bstick"){
242 sumCalculators.push_back(new BStick());
243 }else if(Estimators[i] == "ace"){
246 sumCalculators.push_back(new Ace(abund));
247 }else if(Estimators[i] == "jack"){
248 sumCalculators.push_back(new Jackknife());
249 }else if(Estimators[i] == "shannon"){
250 sumCalculators.push_back(new Shannon());
251 }else if(Estimators[i] == "shannoneven"){
252 sumCalculators.push_back(new ShannonEven());
253 }else if(Estimators[i] == "npshannon"){
254 sumCalculators.push_back(new NPShannon());
255 }else if(Estimators[i] == "heip"){
256 sumCalculators.push_back(new Heip());
257 }else if(Estimators[i] == "smithwilson"){
258 sumCalculators.push_back(new SmithWilson());
259 }else if(Estimators[i] == "simpson"){
260 sumCalculators.push_back(new Simpson());
261 }else if(Estimators[i] == "simpsoneven"){
262 sumCalculators.push_back(new SimpsonEven());
263 }else if(Estimators[i] == "invsimpson"){
264 sumCalculators.push_back(new InvSimpson());
265 }else if(Estimators[i] == "bootstrap"){
266 sumCalculators.push_back(new Bootstrap());
267 }else if (Estimators[i] == "nseqs") {
268 sumCalculators.push_back(new NSeqs());
269 }else if (Estimators[i] == "goodscoverage") {
270 sumCalculators.push_back(new GoodsCoverage());
271 }else if (Estimators[i] == "efron") {
272 sumCalculators.push_back(new Efron(size));
273 }else if (Estimators[i] == "boneh") {
274 sumCalculators.push_back(new Boneh(size));
275 }else if (Estimators[i] == "solow") {
276 sumCalculators.push_back(new Solow(size));
277 }else if (Estimators[i] == "shen") {
278 sumCalculators.push_back(new Shen(size, abund));
283 //if the users entered no valid calculators don't execute command
284 if (sumCalculators.size() == 0) { if (hadShared != "") { globaldata->setSharedFile(hadShared); globaldata->setFormat("sharedfile"); } return 0; }
286 ofstream outputFileHandle;
287 m->openOutputFile(fileNameRoot, outputFileHandle);
288 outputFileHandle << "label";
290 read = new ReadOTUFile(globaldata->inputFileName);
291 read->read(&*globaldata);
293 sabund = globaldata->sabund;
294 string lastLabel = sabund->getLabel();
295 input = globaldata->ginput;
297 for(int i=0;i<sumCalculators.size();i++){
298 if(sumCalculators[i]->getCols() == 1){
299 outputFileHandle << '\t' << sumCalculators[i]->getName();
303 outputFileHandle << '\t' << sumCalculators[i]->getName() << "\t" << sumCalculators[i]->getName() << "_lci\t" << sumCalculators[i]->getName() << "_hci";
307 outputFileHandle << endl;
309 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
310 set<string> processedLabels;
311 set<string> userLabels = labels;
313 if (m->control_pressed) { if (hadShared != "") { globaldata->setSharedFile(hadShared); globaldata->setFormat("sharedfile"); } outputFileHandle.close(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } for(int i=0;i<sumCalculators.size();i++){ delete sumCalculators[i]; } delete validCalculator; delete read; delete sabund; globaldata->sabund = NULL; delete input; globaldata->ginput = NULL; return 0; }
315 while((sabund != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
317 if (m->control_pressed) { if (hadShared != "") { globaldata->setSharedFile(hadShared); globaldata->setFormat("sharedfile"); } outputFileHandle.close(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } for(int i=0;i<sumCalculators.size();i++){ delete sumCalculators[i]; } delete validCalculator; delete read; delete sabund; globaldata->sabund = NULL; delete input; globaldata->ginput = NULL; return 0; }
319 if(allLines == 1 || labels.count(sabund->getLabel()) == 1){
321 m->mothurOut(sabund->getLabel()); m->mothurOutEndLine();
322 processedLabels.insert(sabund->getLabel());
323 userLabels.erase(sabund->getLabel());
325 outputFileHandle << sabund->getLabel();
326 for(int i=0;i<sumCalculators.size();i++){
327 vector<double> data = sumCalculators[i]->getValues(sabund);
329 if (m->control_pressed) { if (hadShared != "") { globaldata->setSharedFile(hadShared); globaldata->setFormat("sharedfile"); } outputFileHandle.close(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } for(int i=0;i<sumCalculators.size();i++){ delete sumCalculators[i]; } delete validCalculator; delete read; delete sabund; globaldata->sabund = NULL; delete input; globaldata->ginput = NULL; return 0; }
331 outputFileHandle << '\t';
332 sumCalculators[i]->print(outputFileHandle);
334 outputFileHandle << endl;
338 if ((m->anyLabelsToProcess(sabund->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
339 string saveLabel = sabund->getLabel();
342 sabund = input->getSAbundVector(lastLabel);
344 m->mothurOut(sabund->getLabel()); m->mothurOutEndLine();
345 processedLabels.insert(sabund->getLabel());
346 userLabels.erase(sabund->getLabel());
348 outputFileHandle << sabund->getLabel();
349 for(int i=0;i<sumCalculators.size();i++){
350 vector<double> data = sumCalculators[i]->getValues(sabund);
352 if (m->control_pressed) { if (hadShared != "") { globaldata->setSharedFile(hadShared); globaldata->setFormat("sharedfile"); } outputFileHandle.close(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } for(int i=0;i<sumCalculators.size();i++){ delete sumCalculators[i]; } delete validCalculator; delete read; delete sabund; globaldata->sabund = NULL; delete input; globaldata->ginput = NULL; return 0; }
354 outputFileHandle << '\t';
355 sumCalculators[i]->print(outputFileHandle);
357 outputFileHandle << endl;
360 //restore real lastlabel to save below
361 sabund->setLabel(saveLabel);
364 lastLabel = sabund->getLabel();
367 sabund = input->getSAbundVector();
370 if (m->control_pressed) { if (hadShared != "") { globaldata->setSharedFile(hadShared); globaldata->setFormat("sharedfile"); } outputFileHandle.close(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } for(int i=0;i<sumCalculators.size();i++){ delete sumCalculators[i]; } delete validCalculator; delete read; delete input; globaldata->ginput = NULL; return 0; }
372 //output error messages about any remaining user labels
373 set<string>::iterator it;
374 bool needToRun = false;
375 for (it = userLabels.begin(); it != userLabels.end(); it++) {
376 m->mothurOut("Your file does not include the label " + *it);
377 if (processedLabels.count(lastLabel) != 1) {
378 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
381 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
385 //run last label if you need to
386 if (needToRun == true) {
387 if (sabund != NULL) { delete sabund; }
388 sabund = input->getSAbundVector(lastLabel);
390 m->mothurOut(sabund->getLabel()); m->mothurOutEndLine();
391 outputFileHandle << sabund->getLabel();
392 for(int i=0;i<sumCalculators.size();i++){
393 vector<double> data = sumCalculators[i]->getValues(sabund);
395 if (m->control_pressed) { if (hadShared != "") { globaldata->setSharedFile(hadShared); globaldata->setFormat("sharedfile"); } outputFileHandle.close(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } for(int i=0;i<sumCalculators.size();i++){ delete sumCalculators[i]; } delete validCalculator; delete read; delete sabund; globaldata->sabund = NULL; delete input; globaldata->ginput = NULL; return 0; }
397 outputFileHandle << '\t';
398 sumCalculators[i]->print(outputFileHandle);
400 outputFileHandle << endl;
405 outputFileHandle.close();
407 if (m->control_pressed) { if (hadShared != "") { globaldata->setSharedFile(hadShared); globaldata->setFormat("sharedfile"); } for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } for(int i=0;i<sumCalculators.size();i++){ delete sumCalculators[i]; } delete validCalculator; delete read; delete input; globaldata->ginput = NULL; return 0; }
410 delete input; globaldata->ginput = NULL;
412 delete validCalculator;
413 globaldata->sabund = NULL;
414 for(int i=0;i<sumCalculators.size();i++){ delete sumCalculators[i]; }
417 if (hadShared != "") { globaldata->setSharedFile(hadShared); globaldata->setFormat("sharedfile"); }
419 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
421 //create summary file containing all the groups data for each label - this function just combines the info from the files already created.
422 if ((hadShared != "") && (groupMode)) { outputNames.push_back(createGroupSummaryFile(numLines, numCols, outputNames)); }
424 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
426 m->mothurOutEndLine();
427 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
428 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
429 m->mothurOutEndLine();
433 catch(exception& e) {
434 m->errorOut(e, "SummaryCommand", "execute");
438 //**********************************************************************************************************************
439 vector<string> SummaryCommand::parseSharedFile(string filename) {
441 vector<string> filenames;
443 map<string, ofstream*> filehandles;
444 map<string, ofstream*>::iterator it3;
448 read = new ReadOTUFile(filename);
449 read->read(&*globaldata);
451 input = globaldata->ginput;
452 vector<SharedRAbundVector*> lookup = input->getSharedRAbundVectors();
454 string sharedFileRoot = m->getRootName(filename);
456 //clears file before we start to write to it below
457 for (int i=0; i<lookup.size(); i++) {
458 remove((sharedFileRoot + lookup[i]->getGroup() + ".rabund").c_str());
459 filenames.push_back((sharedFileRoot + lookup[i]->getGroup() + ".rabund"));
463 for (int i=0; i<lookup.size(); i++) {
465 filehandles[lookup[i]->getGroup()] = temp;
466 groups.push_back(lookup[i]->getGroup());
469 while(lookup[0] != NULL) {
471 for (int i = 0; i < lookup.size(); i++) {
472 RAbundVector rav = lookup[i]->getRAbundVector();
473 m->openOutputFileAppend(sharedFileRoot + lookup[i]->getGroup() + ".rabund", *(filehandles[lookup[i]->getGroup()]));
474 rav.print(*(filehandles[lookup[i]->getGroup()]));
475 (*(filehandles[lookup[i]->getGroup()])).close();
478 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
479 lookup = input->getSharedRAbundVectors();
483 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
488 globaldata->ginput = NULL;
492 catch(exception& e) {
493 m->errorOut(e, "SummaryCommand", "parseSharedFile");
497 //**********************************************************************************************************************
498 string SummaryCommand::createGroupSummaryFile(int numLines, int numCols, vector<string>& outputNames) {
502 string combineFileName = outputDir + m->getRootName(m->getSimpleName(globaldata->inputFileName)) + "groups.summary";
505 m->openOutputFile(combineFileName, out);
507 //open each groups summary file
508 string newLabel = "";
509 map<string, vector<string> > files;
510 for (int i=0; i<outputNames.size(); i++) {
511 vector<string> thisFilesLines;
514 m->openInputFile(outputNames[i], temp);
516 //read through first line - labels
518 if (i == 0) { //we want to save the labels to output below
519 for (int j = 0; j < numCols+1; j++) {
522 if (j == 1) { newLabel += "group\t" + tempLabel + '\t';
523 }else{ newLabel += tempLabel + '\t'; }
525 }else{ for (int j = 0; j < numCols+1; j++) { temp >> tempLabel; } }
530 for (int k = 0; k < numLines; k++) {
532 string thisLine = "";
535 for (int j = 0; j < numCols+1; j++) {
539 if (j == 1) { thisLine += groups[i] + "\t" + tempLabel + "\t"; }
540 else{ thisLine += tempLabel + "\t"; }
545 thisFilesLines.push_back(thisLine);
550 files[outputNames[i]] = thisFilesLines;
555 //output label line to new file
556 out << newLabel << endl;
559 for (int k = 0; k < numLines; k++) {
561 //grab summary data for each group
562 for (int i=0; i<outputNames.size(); i++) {
563 out << files[outputNames[i]][k];
571 //return combine file name
572 return combineFileName;
575 catch(exception& e) {
576 m->errorOut(e, "SummaryCommand", "createGroupSummaryFile");
580 //**********************************************************************************************************************