2 * summarysharedcommand.cpp
5 * Created by Sarah Westcott on 1/2/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "summarysharedcommand.h"
11 #include "sharedsobscollectsummary.h"
12 #include "sharedchao1.h"
13 #include "sharedace.h"
14 #include "sharednseqs.h"
15 #include "sharedjabund.h"
16 #include "sharedsorabund.h"
17 #include "sharedjclass.h"
18 #include "sharedsorclass.h"
19 #include "sharedjest.h"
20 #include "sharedsorest.h"
21 #include "sharedthetayc.h"
22 #include "sharedthetan.h"
23 #include "sharedkstest.h"
24 #include "whittaker.h"
25 #include "sharedochiai.h"
26 #include "sharedanderbergs.h"
27 #include "sharedkulczynski.h"
28 #include "sharedkulczynskicody.h"
29 #include "sharedlennon.h"
30 #include "sharedmorisitahorn.h"
31 #include "sharedbraycurtis.h"
32 #include "sharedjackknife.h"
33 #include "whittaker.h"
36 //**********************************************************************************************************************
38 SummarySharedCommand::SummarySharedCommand(string option) {
40 globaldata = GlobalData::getInstance();
46 //allow user to run help
47 if(option == "help") { validCalculator = new ValidCalculators(); help(); abort = true; }
50 //valid paramters for this command
51 string Array[] = {"label","calc","groups","all","outputdir","inputdir"};
52 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
54 OptionParser parser(option);
55 map<string, string> parameters = parser.getParameters();
57 ValidParameters validParameter;
59 //check to make sure all parameters are valid for command
60 for (map<string, string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
61 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
64 //make sure the user has already run the read.otu command
65 if (globaldata->getSharedFile() == "") {
66 m->mothurOut("You must read a list and a group, or a shared before you can use the summary.shared command."); m->mothurOutEndLine(); abort = true;
69 //if the user changes the output directory command factory will send this info to us in the output parameter
70 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
72 outputDir += hasPath(globaldata->getSharedFile()); //if user entered a file with a path then preserve it
75 //check for optional parameter and set defaults
76 // ...at some point should added some additional type checking...
77 label = validParameter.validFile(parameters, "label", false);
78 if (label == "not found") { label = ""; }
80 if(label != "all") { splitAtDash(label, labels); allLines = 0; }
81 else { allLines = 1; }
84 //if the user has not specified any labels use the ones from read.otu
86 allLines = globaldata->allLines;
87 labels = globaldata->labels;
90 calc = validParameter.validFile(parameters, "calc", false);
91 if (calc == "not found") { calc = "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan"; }
93 if (calc == "default") { calc = "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan"; }
95 splitAtDash(calc, Estimators);
97 groups = validParameter.validFile(parameters, "groups", false);
98 if (groups == "not found") { groups = ""; }
100 splitAtDash(groups, Groups);
101 globaldata->Groups = Groups;
104 string temp = validParameter.validFile(parameters, "all", false); if (temp == "not found") { temp = "false"; }
107 if (abort == false) {
109 validCalculator = new ValidCalculators();
112 for (i=0; i<Estimators.size(); i++) {
113 if (validCalculator->isValidCalculator("sharedsummary", Estimators[i]) == true) {
114 if (Estimators[i] == "sharedsobs") {
115 sumCalculators.push_back(new SharedSobsCS());
116 }else if (Estimators[i] == "sharedchao") {
117 sumCalculators.push_back(new SharedChao1());
118 }else if (Estimators[i] == "sharedace") {
119 sumCalculators.push_back(new SharedAce());
120 }else if (Estimators[i] == "jabund") {
121 sumCalculators.push_back(new JAbund());
122 }else if (Estimators[i] == "sorabund") {
123 sumCalculators.push_back(new SorAbund());
124 }else if (Estimators[i] == "jclass") {
125 sumCalculators.push_back(new Jclass());
126 }else if (Estimators[i] == "sorclass") {
127 sumCalculators.push_back(new SorClass());
128 }else if (Estimators[i] == "jest") {
129 sumCalculators.push_back(new Jest());
130 }else if (Estimators[i] == "sorest") {
131 sumCalculators.push_back(new SorEst());
132 }else if (Estimators[i] == "thetayc") {
133 sumCalculators.push_back(new ThetaYC());
134 }else if (Estimators[i] == "thetan") {
135 sumCalculators.push_back(new ThetaN());
136 }else if (Estimators[i] == "kstest") {
137 sumCalculators.push_back(new KSTest());
138 }else if (Estimators[i] == "sharednseqs") {
139 sumCalculators.push_back(new SharedNSeqs());
140 }else if (Estimators[i] == "ochiai") {
141 sumCalculators.push_back(new Ochiai());
142 }else if (Estimators[i] == "anderberg") {
143 sumCalculators.push_back(new Anderberg());
144 }else if (Estimators[i] == "kulczynski") {
145 sumCalculators.push_back(new Kulczynski());
146 }else if (Estimators[i] == "kulczynskicody") {
147 sumCalculators.push_back(new KulczynskiCody());
148 }else if (Estimators[i] == "lennon") {
149 sumCalculators.push_back(new Lennon());
150 }else if (Estimators[i] == "morisitahorn") {
151 sumCalculators.push_back(new MorHorn());
152 }else if (Estimators[i] == "braycurtis") {
153 sumCalculators.push_back(new BrayCurtis());
154 }else if (Estimators[i] == "whittaker") {
155 sumCalculators.push_back(new Whittaker());
160 outputFileName = outputDir + getRootName(getSimpleName(globaldata->inputFileName)) + "shared.summary";
161 openOutputFile(outputFileName, outputFileHandle);
162 outputNames.push_back(outputFileName);
168 catch(exception& e) {
169 m->errorOut(e, "SummarySharedCommand", "SummarySharedCommand");
174 //**********************************************************************************************************************
176 void SummarySharedCommand::help(){
178 m->mothurOut("The summary.shared command can only be executed after a successful read.otu command.\n");
179 m->mothurOut("The summary.shared command parameters are label, calc and all. No parameters are required.\n");
180 m->mothurOut("The summary.shared command should be in the following format: \n");
181 m->mothurOut("summary.shared(label=yourLabel, calc=yourEstimators, groups=yourGroups).\n");
182 m->mothurOut("Example summary.shared(label=unique-.01-.03, groups=B-C, calc=sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan).\n");
183 validCalculator->printCalc("sharedsummary", cout);
184 m->mothurOut("The default value for calc is sharedsobs-sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan\n");
185 m->mothurOut("The default value for groups is all the groups in your groupfile.\n");
186 m->mothurOut("The label parameter is used to analyze specific labels in your input.\n");
187 m->mothurOut("The all parameter is used to specify if you want the estimate of all your groups together. This estimate can only be made for sharedsobs and sharedchao calculators. The default is false.\n");
188 m->mothurOut("If you use sharedchao and run into memory issues, set all to false. \n");
189 m->mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 2 valid groups.\n");
190 m->mothurOut("Note: No spaces between parameter labels (i.e. label), '=' and parameters (i.e.yourLabel).\n\n");
192 catch(exception& e) {
193 m->errorOut(e, "SummarySharedCommand", "help");
198 //**********************************************************************************************************************
200 SummarySharedCommand::~SummarySharedCommand(){
201 if (abort == false) {
203 delete validCalculator;
207 //**********************************************************************************************************************
209 int SummarySharedCommand::execute(){
212 if (abort == true) { return 0; }
214 //if the users entered no valid calculators don't execute command
215 if (sumCalculators.size() == 0) { return 0; }
216 //check if any calcs can do multiples
219 for (int i = 0; i < sumCalculators.size(); i++) {
220 if (sumCalculators[i]->getMultiple() == true) { mult = true; }
226 read = new ReadOTUFile(globaldata->inputFileName);
227 read->read(&*globaldata);
229 input = globaldata->ginput;
230 lookup = input->getSharedRAbundVectors();
231 string lastLabel = lookup[0]->getLabel();
233 //output estimator names as column headers
234 outputFileHandle << "label" <<'\t' << "comparison" << '\t';
235 for(int i=0;i<sumCalculators.size();i++){
236 outputFileHandle << '\t' << sumCalculators[i]->getName();
238 outputFileHandle << endl;
240 //create file and put column headers for multiple groups file
242 outAllFileName = ((getRootName(globaldata->inputFileName)) + "sharedmultiple.summary");
243 openOutputFile(outAllFileName, outAll);
244 outputNames.push_back(outAllFileName);
246 outAll << "label" <<'\t' << "comparison" << '\t';
247 for(int i=0;i<sumCalculators.size();i++){
248 if (sumCalculators[i]->getMultiple() == true) {
249 outAll << '\t' << sumCalculators[i]->getName();
255 if (lookup.size() < 2) {
256 m->mothurOut("I cannot run the command without at least 2 valid groups.");
257 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
259 //close files and clean up
260 outputFileHandle.close(); remove(outputFileName.c_str());
261 if (mult == true) { outAll.close(); remove(outAllFileName.c_str()); }
263 //if you only have 2 groups you don't need a .sharedmultiple file
264 }else if ((lookup.size() == 2) && (mult == true)) {
267 remove(outAllFileName.c_str());
268 outputNames.pop_back();
271 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
272 set<string> processedLabels;
273 set<string> userLabels = labels;
275 //as long as you are not at the end of the file or done wih the lines you want
276 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
278 if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){
279 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
282 processedLabels.insert(lookup[0]->getLabel());
283 userLabels.erase(lookup[0]->getLabel());
286 if ((anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
287 string saveLabel = lookup[0]->getLabel();
289 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
290 lookup = input->getSharedRAbundVectors(lastLabel);
292 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
295 processedLabels.insert(lookup[0]->getLabel());
296 userLabels.erase(lookup[0]->getLabel());
298 //restore real lastlabel to save below
299 lookup[0]->setLabel(saveLabel);
302 lastLabel = lookup[0]->getLabel();
304 //get next line to process
305 //prevent memory leak
306 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
307 lookup = input->getSharedRAbundVectors();
310 //output error messages about any remaining user labels
311 set<string>::iterator it;
312 bool needToRun = false;
313 for (it = userLabels.begin(); it != userLabels.end(); it++) {
314 m->mothurOut("Your file does not include the label " + *it);
315 if (processedLabels.count(lastLabel) != 1) {
316 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
319 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
323 //run last label if you need to
324 if (needToRun == true) {
325 for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }
326 lookup = input->getSharedRAbundVectors(lastLabel);
328 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
330 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
334 //reset groups parameter
335 globaldata->Groups.clear();
338 outputFileHandle.close();
339 if (mult == true) { outAll.close(); }
341 for(int i=0;i<sumCalculators.size();i++){ delete sumCalculators[i]; }
343 delete input; globaldata->ginput = NULL;
345 m->mothurOutEndLine();
346 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
347 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
348 m->mothurOutEndLine();
352 catch(exception& e) {
353 m->errorOut(e, "SummarySharedCommand", "execute");
358 /***********************************************************/
359 void SummarySharedCommand::process(vector<SharedRAbundVector*> thisLookup) {
361 //loop through calculators and add to file all for all calcs that can do mutiple groups
364 outAll << thisLookup[0]->getLabel() << '\t';
366 //output groups names
367 string outNames = "";
368 for (int j = 0; j < thisLookup.size(); j++) {
369 outNames += thisLookup[j]->getGroup() + "-";
371 outNames = outNames.substr(0, outNames.length()-1); //rip off extra '-';
372 outAll << outNames << '\t';
374 for(int i=0;i<sumCalculators.size();i++){
375 if (sumCalculators[i]->getMultiple() == true) {
376 sumCalculators[i]->getValues(thisLookup);
378 sumCalculators[i]->print(outAll);
385 vector<SharedRAbundVector*> subset;
386 for (int k = 0; k < (thisLookup.size() - 1); k++) { // pass cdd each set of groups to commpare
387 for (int l = n; l < thisLookup.size(); l++) {
389 outputFileHandle << thisLookup[0]->getLabel() << '\t';
391 subset.clear(); //clear out old pair of sharedrabunds
392 //add new pair of sharedrabunds
393 subset.push_back(thisLookup[k]); subset.push_back(thisLookup[l]);
395 //sort groups to be alphanumeric
396 if (thisLookup[k]->getGroup() > thisLookup[l]->getGroup()) {
397 outputFileHandle << (thisLookup[l]->getGroup() +'\t' + thisLookup[k]->getGroup()) << '\t'; //print out groups
399 outputFileHandle << (thisLookup[k]->getGroup() +'\t' + thisLookup[l]->getGroup()) << '\t'; //print out groups
402 for(int i=0;i<sumCalculators.size();i++) {
404 sumCalculators[i]->getValues(subset); //saves the calculator outputs
405 outputFileHandle << '\t';
406 sumCalculators[i]->print(outputFileHandle);
408 outputFileHandle << endl;
414 catch(exception& e) {
415 m->errorOut(e, "SummarySharedCommand", "process");
420 /***********************************************************/