2 * summarysharedcommand.cpp
5 * Created by Sarah Westcott on 1/2/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "summarysharedcommand.h"
11 #include "sharedsobscollectsummary.h"
12 #include "sharedchao1.h"
13 #include "sharedace.h"
14 #include "sharednseqs.h"
15 #include "sharedjabund.h"
16 #include "sharedsorabund.h"
17 #include "sharedjclass.h"
18 #include "sharedsorclass.h"
19 #include "sharedjest.h"
20 #include "sharedsorest.h"
21 #include "sharedthetayc.h"
22 #include "sharedthetan.h"
23 #include "sharedkstest.h"
24 #include "whittaker.h"
25 #include "sharedochiai.h"
26 #include "sharedanderbergs.h"
27 #include "sharedkulczynski.h"
28 #include "sharedkulczynskicody.h"
29 #include "sharedlennon.h"
30 #include "sharedmorisitahorn.h"
31 #include "sharedbraycurtis.h"
32 #include "sharedjackknife.h"
33 #include "whittaker.h"
36 //**********************************************************************************************************************
38 SummarySharedCommand::SummarySharedCommand(string option) {
40 globaldata = GlobalData::getInstance();
46 //allow user to run help
47 if(option == "help") { validCalculator = new ValidCalculators(); help(); abort = true; }
50 //valid paramters for this command
51 string Array[] = {"label","calc","groups","all","outputdir","inputdir", "processors"};
52 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
54 OptionParser parser(option);
55 map<string, string> parameters = parser.getParameters();
57 ValidParameters validParameter;
59 //check to make sure all parameters are valid for command
60 for (map<string, string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
61 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
64 //make sure the user has already run the read.otu command
65 if (globaldata->getSharedFile() == "") {
66 m->mothurOut("You must read a list and a group, or a shared before you can use the summary.shared command."); m->mothurOutEndLine(); abort = true;
69 //if the user changes the output directory command factory will send this info to us in the output parameter
70 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
72 outputDir += m->hasPath(globaldata->getSharedFile()); //if user entered a file with a path then preserve it
75 //check for optional parameter and set defaults
76 // ...at some point should added some additional type checking...
77 label = validParameter.validFile(parameters, "label", false);
78 if (label == "not found") { label = ""; }
80 if(label != "all") { m->splitAtDash(label, labels); allLines = 0; }
81 else { allLines = 1; }
84 //if the user has not specified any labels use the ones from read.otu
86 allLines = globaldata->allLines;
87 labels = globaldata->labels;
90 calc = validParameter.validFile(parameters, "calc", false);
91 if (calc == "not found") { calc = "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan"; }
93 if (calc == "default") { calc = "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan"; }
95 m->splitAtDash(calc, Estimators);
97 groups = validParameter.validFile(parameters, "groups", false);
98 if (groups == "not found") { groups = ""; }
100 m->splitAtDash(groups, Groups);
101 globaldata->Groups = Groups;
104 string temp = validParameter.validFile(parameters, "all", false); if (temp == "not found") { temp = "false"; }
105 all = m->isTrue(temp);
107 temp = validParameter.validFile(parameters, "processors", false); if(temp == "not found"){ temp = "1"; }
108 convert(temp, processors);
110 if (abort == false) {
112 validCalculator = new ValidCalculators();
115 for (i=0; i<Estimators.size(); i++) {
116 if (validCalculator->isValidCalculator("sharedsummary", Estimators[i]) == true) {
117 if (Estimators[i] == "sharedsobs") {
118 sumCalculators.push_back(new SharedSobsCS());
119 }else if (Estimators[i] == "sharedchao") {
120 sumCalculators.push_back(new SharedChao1());
121 }else if (Estimators[i] == "sharedace") {
122 sumCalculators.push_back(new SharedAce());
123 }else if (Estimators[i] == "jabund") {
124 sumCalculators.push_back(new JAbund());
125 }else if (Estimators[i] == "sorabund") {
126 sumCalculators.push_back(new SorAbund());
127 }else if (Estimators[i] == "jclass") {
128 sumCalculators.push_back(new Jclass());
129 }else if (Estimators[i] == "sorclass") {
130 sumCalculators.push_back(new SorClass());
131 }else if (Estimators[i] == "jest") {
132 sumCalculators.push_back(new Jest());
133 }else if (Estimators[i] == "sorest") {
134 sumCalculators.push_back(new SorEst());
135 }else if (Estimators[i] == "thetayc") {
136 sumCalculators.push_back(new ThetaYC());
137 }else if (Estimators[i] == "thetan") {
138 sumCalculators.push_back(new ThetaN());
139 }else if (Estimators[i] == "kstest") {
140 sumCalculators.push_back(new KSTest());
141 }else if (Estimators[i] == "sharednseqs") {
142 sumCalculators.push_back(new SharedNSeqs());
143 }else if (Estimators[i] == "ochiai") {
144 sumCalculators.push_back(new Ochiai());
145 }else if (Estimators[i] == "anderberg") {
146 sumCalculators.push_back(new Anderberg());
147 }else if (Estimators[i] == "kulczynski") {
148 sumCalculators.push_back(new Kulczynski());
149 }else if (Estimators[i] == "kulczynskicody") {
150 sumCalculators.push_back(new KulczynskiCody());
151 }else if (Estimators[i] == "lennon") {
152 sumCalculators.push_back(new Lennon());
153 }else if (Estimators[i] == "morisitahorn") {
154 sumCalculators.push_back(new MorHorn());
155 }else if (Estimators[i] == "braycurtis") {
156 sumCalculators.push_back(new BrayCurtis());
157 }else if (Estimators[i] == "whittaker") {
158 sumCalculators.push_back(new Whittaker());
167 catch(exception& e) {
168 m->errorOut(e, "SummarySharedCommand", "SummarySharedCommand");
173 //**********************************************************************************************************************
175 void SummarySharedCommand::help(){
177 m->mothurOut("The summary.shared command can only be executed after a successful read.otu command.\n");
178 m->mothurOut("The summary.shared command parameters are label, calc and all. No parameters are required.\n");
179 m->mothurOut("The summary.shared command should be in the following format: \n");
180 m->mothurOut("summary.shared(label=yourLabel, calc=yourEstimators, groups=yourGroups).\n");
181 m->mothurOut("Example summary.shared(label=unique-.01-.03, groups=B-C, calc=sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan).\n");
182 validCalculator->printCalc("sharedsummary", cout);
183 m->mothurOut("The default value for calc is sharedsobs-sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan\n");
184 m->mothurOut("The default value for groups is all the groups in your groupfile.\n");
185 m->mothurOut("The label parameter is used to analyze specific labels in your input.\n");
186 m->mothurOut("The all parameter is used to specify if you want the estimate of all your groups together. This estimate can only be made for sharedsobs and sharedchao calculators. The default is false.\n");
187 m->mothurOut("If you use sharedchao and run into memory issues, set all to false. \n");
188 m->mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 2 valid groups.\n");
189 m->mothurOut("Note: No spaces between parameter labels (i.e. label), '=' and parameters (i.e.yourLabel).\n\n");
191 catch(exception& e) {
192 m->errorOut(e, "SummarySharedCommand", "help");
197 //**********************************************************************************************************************
199 SummarySharedCommand::~SummarySharedCommand(){
200 if (abort == false) {
202 delete validCalculator;
206 //**********************************************************************************************************************
208 int SummarySharedCommand::execute(){
211 if (abort == true) { return 0; }
213 ofstream outputFileHandle, outAll;
214 string outputFileName = outputDir + m->getRootName(m->getSimpleName(globaldata->inputFileName)) + "shared.summary";
216 //if the users entered no valid calculators don't execute command
217 if (sumCalculators.size() == 0) { return 0; }
218 //check if any calcs can do multiples
221 for (int i = 0; i < sumCalculators.size(); i++) {
222 if (sumCalculators[i]->getMultiple() == true) { mult = true; }
228 read = new ReadOTUFile(globaldata->inputFileName);
229 read->read(&*globaldata);
231 input = globaldata->ginput;
232 lookup = input->getSharedRAbundVectors();
233 string lastLabel = lookup[0]->getLabel();
235 /******************************************************/
236 //output headings for files
237 /******************************************************/
238 //output estimator names as column headers
239 m->openOutputFile(outputFileName, outputFileHandle);
240 outputFileHandle << "label" <<'\t' << "comparison" << '\t';
241 for(int i=0;i<sumCalculators.size();i++){
242 outputFileHandle << '\t' << sumCalculators[i]->getName();
243 if (sumCalculators[i]->getCols() == 3) { outputFileHandle << "\t" << sumCalculators[i]->getName() << "_lci\t" << sumCalculators[i]->getName() << "_hci"; }
245 outputFileHandle << endl;
246 outputFileHandle.close();
248 //create file and put column headers for multiple groups file
249 string outAllFileName = ((m->getRootName(globaldata->inputFileName)) + "sharedmultiple.summary");
251 m->openOutputFile(outAllFileName, outAll);
252 outputNames.push_back(outAllFileName);
254 outAll << "label" <<'\t' << "comparison" << '\t';
255 for(int i=0;i<sumCalculators.size();i++){
256 if (sumCalculators[i]->getMultiple() == true) {
257 outAll << '\t' << sumCalculators[i]->getName();
264 if (lookup.size() < 2) {
265 m->mothurOut("I cannot run the command without at least 2 valid groups.");
266 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
268 //close files and clean up
269 remove(outputFileName.c_str());
270 if (mult == true) { remove(outAllFileName.c_str()); }
272 //if you only have 2 groups you don't need a .sharedmultiple file
273 }else if ((lookup.size() == 2) && (mult == true)) {
275 remove(outAllFileName.c_str());
276 outputNames.pop_back();
279 if (m->control_pressed) {
280 if (mult) { remove(outAllFileName.c_str()); }
281 remove(outputFileName.c_str());
282 delete input; globaldata->ginput = NULL;
283 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
284 for(int i=0;i<sumCalculators.size();i++){ delete sumCalculators[i]; }
285 globaldata->Groups.clear();
288 /******************************************************/
291 /******************************************************/
292 //comparison breakup to be used by different processes later
293 numGroups = globaldata->Groups.size();
294 lines.resize(processors);
295 for (int i = 0; i < processors; i++) {
296 lines[i].start = int (sqrt(float(i)/float(processors)) * numGroups);
297 lines[i].end = int (sqrt(float(i+1)/float(processors)) * numGroups);
299 /******************************************************/
301 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
302 set<string> processedLabels;
303 set<string> userLabels = labels;
305 //as long as you are not at the end of the file or done wih the lines you want
306 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
307 if (m->control_pressed) {
308 if (mult) { remove(outAllFileName.c_str()); }
309 remove(outputFileName.c_str());
310 delete input; globaldata->ginput = NULL;
311 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
312 for(int i=0;i<sumCalculators.size();i++){ delete sumCalculators[i]; }
313 globaldata->Groups.clear();
318 if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){
319 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
320 process(lookup, outputFileName, outAllFileName);
322 processedLabels.insert(lookup[0]->getLabel());
323 userLabels.erase(lookup[0]->getLabel());
326 if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
327 string saveLabel = lookup[0]->getLabel();
329 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
330 lookup = input->getSharedRAbundVectors(lastLabel);
332 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
333 process(lookup, outputFileName, outAllFileName);
335 processedLabels.insert(lookup[0]->getLabel());
336 userLabels.erase(lookup[0]->getLabel());
338 //restore real lastlabel to save below
339 lookup[0]->setLabel(saveLabel);
342 lastLabel = lookup[0]->getLabel();
344 //get next line to process
345 //prevent memory leak
346 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
347 lookup = input->getSharedRAbundVectors();
350 if (m->control_pressed) {
351 if (mult) { remove(outAllFileName.c_str()); }
352 remove(outputFileName.c_str());
353 delete input; globaldata->ginput = NULL;
354 for(int i=0;i<sumCalculators.size();i++){ delete sumCalculators[i]; }
355 globaldata->Groups.clear();
359 //output error messages about any remaining user labels
360 set<string>::iterator it;
361 bool needToRun = false;
362 for (it = userLabels.begin(); it != userLabels.end(); it++) {
363 m->mothurOut("Your file does not include the label " + *it);
364 if (processedLabels.count(lastLabel) != 1) {
365 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
368 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
372 //run last label if you need to
373 if (needToRun == true) {
374 for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }
375 lookup = input->getSharedRAbundVectors(lastLabel);
377 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
378 process(lookup, outputFileName, outAllFileName);
379 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
383 //reset groups parameter
384 globaldata->Groups.clear();
386 for(int i=0;i<sumCalculators.size();i++){ delete sumCalculators[i]; }
387 delete input; globaldata->ginput = NULL;
389 if (m->control_pressed) {
390 remove(outAllFileName.c_str());
391 remove(outputFileName.c_str());
395 m->mothurOutEndLine();
396 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
397 m->mothurOut(outputFileName); m->mothurOutEndLine();
398 if (mult) { m->mothurOut(outAllFileName); m->mothurOutEndLine(); }
399 m->mothurOutEndLine();
403 catch(exception& e) {
404 m->errorOut(e, "SummarySharedCommand", "execute");
409 /***********************************************************/
410 int SummarySharedCommand::process(vector<SharedRAbundVector*> thisLookup, string sumFileName, string sumAllFileName) {
413 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
415 driver(thisLookup, 0, numGroups, sumFileName+".temp", sumAllFileName+".temp");
416 m->appendFiles((sumFileName + ".temp"), sumFileName);
417 remove((sumFileName + ".temp").c_str());
419 m->appendFiles((sumAllFileName + ".temp"), sumAllFileName);
420 remove((sumAllFileName + ".temp").c_str());
424 vector<int> processIDS;
426 //loop through and create all the processes you want
427 while (process != processors) {
431 processIDS.push_back(pid);
434 driver(thisLookup, lines[process].start, lines[process].end, sumFileName + toString(getpid()) + ".temp", sumAllFileName + toString(getpid()) + ".temp");
436 }else { m->mothurOut("unable to spawn the necessary processes."); m->mothurOutEndLine(); exit(0); }
439 //force parent to wait until all the processes are done
440 for (int i = 0; i < processIDS.size(); i++) {
441 int temp = processIDS[i];
445 for (int i = 0; i < processIDS.size(); i++) {
446 m->appendFiles((sumFileName + toString(processIDS[i]) + ".temp"), sumFileName);
447 remove((sumFileName + toString(processIDS[i]) + ".temp").c_str());
449 if (i == 0) { m->appendFiles((sumAllFileName + toString(processIDS[i]) + ".temp"), sumAllFileName); }
450 remove((sumAllFileName + toString(processIDS[i]) + ".temp").c_str());
456 driver(thisLookup, 0, numGroups, (sumFileName + ".temp"), (sumAllFileName + ".temp"));
457 m->appendFiles((sumFileName + ".temp"), sumFileName);
458 remove((sumFileName + ".temp").c_str());
460 m->appendFiles((sumAllFileName + ".temp"), sumAllFileName);
461 remove((sumAllFileName + ".temp").c_str());
465 catch(exception& e) {
466 m->errorOut(e, "SummarySharedCommand", "process");
470 /**************************************************************************************************/
471 int SummarySharedCommand::driver(vector<SharedRAbundVector*> thisLookup, int start, int end, string sumFile, string sumAllFile) {
474 //loop through calculators and add to file all for all calcs that can do mutiple groups
477 m->openOutputFile(sumAllFile, outAll);
480 outAll << thisLookup[0]->getLabel() << '\t';
482 //output groups names
483 string outNames = "";
484 for (int j = 0; j < thisLookup.size(); j++) {
485 outNames += thisLookup[j]->getGroup() + "-";
487 outNames = outNames.substr(0, outNames.length()-1); //rip off extra '-';
488 outAll << outNames << '\t';
490 for(int i=0;i<sumCalculators.size();i++){
491 if (sumCalculators[i]->getMultiple() == true) {
492 sumCalculators[i]->getValues(thisLookup);
494 if (m->control_pressed) { outAll.close(); return 1; }
497 sumCalculators[i]->print(outAll);
504 ofstream outputFileHandle;
505 m->openOutputFile(sumFile, outputFileHandle);
507 vector<SharedRAbundVector*> subset;
508 for (int k = start; k < end; k++) { // pass cdd each set of groups to compare
510 for (int l = 0; l < k; l++) {
512 outputFileHandle << thisLookup[0]->getLabel() << '\t';
514 subset.clear(); //clear out old pair of sharedrabunds
515 //add new pair of sharedrabunds
516 subset.push_back(thisLookup[k]); subset.push_back(thisLookup[l]);
518 //sort groups to be alphanumeric
519 if (thisLookup[k]->getGroup() > thisLookup[l]->getGroup()) {
520 outputFileHandle << (thisLookup[l]->getGroup() +'\t' + thisLookup[k]->getGroup()) << '\t'; //print out groups
522 outputFileHandle << (thisLookup[k]->getGroup() +'\t' + thisLookup[l]->getGroup()) << '\t'; //print out groups
525 for(int i=0;i<sumCalculators.size();i++) {
527 sumCalculators[i]->getValues(subset); //saves the calculator outputs
529 if (m->control_pressed) { outputFileHandle.close(); return 1; }
531 outputFileHandle << '\t';
532 sumCalculators[i]->print(outputFileHandle);
534 outputFileHandle << endl;
538 outputFileHandle.close();
542 catch(exception& e) {
543 m->errorOut(e, "SummarySharedCommand", "driver");
547 /**************************************************************************************************/