2 * summarysharedcommand.cpp
5 * Created by Sarah Westcott on 1/2/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "summarysharedcommand.h"
11 #include "sharedsobscollectsummary.h"
12 #include "sharedchao1.h"
13 #include "sharedace.h"
14 #include "sharednseqs.h"
15 #include "sharedjabund.h"
16 #include "sharedsorabund.h"
17 #include "sharedjclass.h"
18 #include "sharedsorclass.h"
19 #include "sharedjest.h"
20 #include "sharedsorest.h"
21 #include "sharedthetayc.h"
22 #include "sharedthetan.h"
23 #include "sharedkstest.h"
24 #include "whittaker.h"
25 #include "sharedochiai.h"
26 #include "sharedanderbergs.h"
27 #include "sharedkulczynski.h"
28 #include "sharedkulczynskicody.h"
29 #include "sharedlennon.h"
30 #include "sharedmorisitahorn.h"
31 #include "sharedbraycurtis.h"
32 #include "sharedjackknife.h"
33 #include "whittaker.h"
36 //**********************************************************************************************************************
38 SummarySharedCommand::SummarySharedCommand(string option){
40 globaldata = GlobalData::getInstance();
46 //allow user to run help
47 if(option == "help") { validCalculator = new ValidCalculators(); help(); abort = true; }
50 //valid paramters for this command
51 string Array[] = {"label","calc","groups","all","outputdir","inputdir"};
52 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
54 OptionParser parser(option);
55 map<string, string> parameters = parser.getParameters();
57 ValidParameters validParameter;
59 //check to make sure all parameters are valid for command
60 for (map<string, string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
61 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
64 //make sure the user has already run the read.otu command
65 if (globaldata->getSharedFile() == "") {
66 mothurOut("You must read a list and a group, or a shared before you can use the summary.shared command."); mothurOutEndLine(); abort = true;
69 //if the user changes the output directory command factory will send this info to us in the output parameter
70 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
72 outputDir += hasPath(globaldata->getSharedFile()); //if user entered a file with a path then preserve it
75 //check for optional parameter and set defaults
76 // ...at some point should added some additional type checking...
77 label = validParameter.validFile(parameters, "label", false);
78 if (label == "not found") { label = ""; }
80 if(label != "all") { splitAtDash(label, labels); allLines = 0; }
81 else { allLines = 1; }
84 //if the user has not specified any labels use the ones from read.otu
86 allLines = globaldata->allLines;
87 labels = globaldata->labels;
90 calc = validParameter.validFile(parameters, "calc", false);
91 if (calc == "not found") { calc = "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan"; }
93 if (calc == "default") { calc = "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan"; }
95 splitAtDash(calc, Estimators);
97 groups = validParameter.validFile(parameters, "groups", false);
98 if (groups == "not found") { groups = ""; }
100 splitAtDash(groups, Groups);
101 globaldata->Groups = Groups;
104 string temp = validParameter.validFile(parameters, "all", false); if (temp == "not found") { temp = "false"; }
107 if (abort == false) {
109 validCalculator = new ValidCalculators();
112 for (i=0; i<Estimators.size(); i++) {
113 if (validCalculator->isValidCalculator("sharedsummary", Estimators[i]) == true) {
114 if (Estimators[i] == "sharedsobs") {
115 sumCalculators.push_back(new SharedSobsCS());
116 }else if (Estimators[i] == "sharedchao") {
117 sumCalculators.push_back(new SharedChao1());
118 }else if (Estimators[i] == "sharedace") {
119 sumCalculators.push_back(new SharedAce());
120 }else if (Estimators[i] == "jabund") {
121 sumCalculators.push_back(new JAbund());
122 }else if (Estimators[i] == "sorabund") {
123 sumCalculators.push_back(new SorAbund());
124 }else if (Estimators[i] == "jclass") {
125 sumCalculators.push_back(new Jclass());
126 }else if (Estimators[i] == "sorclass") {
127 sumCalculators.push_back(new SorClass());
128 }else if (Estimators[i] == "jest") {
129 sumCalculators.push_back(new Jest());
130 }else if (Estimators[i] == "sorest") {
131 sumCalculators.push_back(new SorEst());
132 }else if (Estimators[i] == "thetayc") {
133 sumCalculators.push_back(new ThetaYC());
134 }else if (Estimators[i] == "thetan") {
135 sumCalculators.push_back(new ThetaN());
136 }else if (Estimators[i] == "kstest") {
137 sumCalculators.push_back(new KSTest());
138 }else if (Estimators[i] == "sharednseqs") {
139 sumCalculators.push_back(new SharedNSeqs());
140 }else if (Estimators[i] == "ochiai") {
141 sumCalculators.push_back(new Ochiai());
142 }else if (Estimators[i] == "anderberg") {
143 sumCalculators.push_back(new Anderberg());
144 }else if (Estimators[i] == "kulczynski") {
145 sumCalculators.push_back(new Kulczynski());
146 }else if (Estimators[i] == "kulczynskicody") {
147 sumCalculators.push_back(new KulczynskiCody());
148 }else if (Estimators[i] == "lennon") {
149 sumCalculators.push_back(new Lennon());
150 }else if (Estimators[i] == "morisitahorn") {
151 sumCalculators.push_back(new MorHorn());
152 }else if (Estimators[i] == "braycurtis") {
153 sumCalculators.push_back(new BrayCurtis());
154 }else if (Estimators[i] == "whittaker") {
155 sumCalculators.push_back(new Whittaker());
160 outputFileName = outputDir + getRootName(getSimpleName(globaldata->inputFileName)) + "shared.summary";
161 openOutputFile(outputFileName, outputFileHandle);
166 catch(exception& e) {
167 errorOut(e, "SummarySharedCommand", "SummarySharedCommand");
172 //**********************************************************************************************************************
174 void SummarySharedCommand::help(){
176 mothurOut("The summary.shared command can only be executed after a successful read.otu command.\n");
177 mothurOut("The summary.shared command parameters are label, calc and all. No parameters are required.\n");
178 mothurOut("The summary.shared command should be in the following format: \n");
179 mothurOut("summary.shared(label=yourLabel, calc=yourEstimators, groups=yourGroups).\n");
180 mothurOut("Example summary.shared(label=unique-.01-.03, groups=B-C, calc=sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan).\n");
181 validCalculator->printCalc("sharedsummary", cout);
182 mothurOut("The default value for calc is sharedsobs-sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan\n");
183 mothurOut("The default value for groups is all the groups in your groupfile.\n");
184 mothurOut("The label parameter is used to analyze specific labels in your input.\n");
185 mothurOut("The all parameter is used to specify if you want the estimate of all your groups together. This estimate can only be made for sharedsobs and sharedchao calculators. The default is false.\n");
186 mothurOut("If you use sharedchao and run into memory issues, set all to false. \n");
187 mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 2 valid groups.\n");
188 mothurOut("Note: No spaces between parameter labels (i.e. label), '=' and parameters (i.e.yourLabel).\n\n");
190 catch(exception& e) {
191 errorOut(e, "SummarySharedCommand", "help");
196 //**********************************************************************************************************************
198 SummarySharedCommand::~SummarySharedCommand(){
199 if (abort == false) {
201 delete validCalculator;
205 //**********************************************************************************************************************
207 int SummarySharedCommand::execute(){
210 if (abort == true) { return 0; }
212 //if the users entered no valid calculators don't execute command
213 if (sumCalculators.size() == 0) { return 0; }
214 //check if any calcs can do multiples
217 for (int i = 0; i < sumCalculators.size(); i++) {
218 if (sumCalculators[i]->getMultiple() == true) { mult = true; }
224 read = new ReadOTUFile(globaldata->inputFileName);
225 read->read(&*globaldata);
227 input = globaldata->ginput;
228 lookup = input->getSharedRAbundVectors();
229 string lastLabel = lookup[0]->getLabel();
231 //output estimator names as column headers
232 outputFileHandle << "label" <<'\t' << "comparison" << '\t';
233 for(int i=0;i<sumCalculators.size();i++){
234 outputFileHandle << '\t' << sumCalculators[i]->getName();
236 outputFileHandle << endl;
238 //create file and put column headers for multiple groups file
240 outAllFileName = ((getRootName(globaldata->inputFileName)) + "sharedmultiple.summary");
241 openOutputFile(outAllFileName, outAll);
243 outAll << "label" <<'\t' << "comparison" << '\t';
244 for(int i=0;i<sumCalculators.size();i++){
245 if (sumCalculators[i]->getMultiple() == true) {
246 outAll << '\t' << sumCalculators[i]->getName();
252 if (lookup.size() < 2) {
253 mothurOut("I cannot run the command without at least 2 valid groups.");
254 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
256 //close files and clean up
257 outputFileHandle.close(); remove(outputFileName.c_str());
258 if (mult == true) { outAll.close(); remove(outAllFileName.c_str()); }
260 //if you only have 2 groups you don't need a .sharedmultiple file
261 }else if ((lookup.size() == 2) && (mult == true)) {
264 remove(outAllFileName.c_str());
267 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
268 set<string> processedLabels;
269 set<string> userLabels = labels;
271 //as long as you are not at the end of the file or done wih the lines you want
272 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
274 if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){
275 mothurOut(lookup[0]->getLabel()); mothurOutEndLine();
278 processedLabels.insert(lookup[0]->getLabel());
279 userLabels.erase(lookup[0]->getLabel());
282 if ((anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
283 string saveLabel = lookup[0]->getLabel();
285 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
286 lookup = input->getSharedRAbundVectors(lastLabel);
288 mothurOut(lookup[0]->getLabel()); mothurOutEndLine();
291 processedLabels.insert(lookup[0]->getLabel());
292 userLabels.erase(lookup[0]->getLabel());
294 //restore real lastlabel to save below
295 lookup[0]->setLabel(saveLabel);
298 lastLabel = lookup[0]->getLabel();
300 //get next line to process
301 //prevent memory leak
302 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
303 lookup = input->getSharedRAbundVectors();
306 //output error messages about any remaining user labels
307 set<string>::iterator it;
308 bool needToRun = false;
309 for (it = userLabels.begin(); it != userLabels.end(); it++) {
310 mothurOut("Your file does not include the label " + *it);
311 if (processedLabels.count(lastLabel) != 1) {
312 mothurOut(". I will use " + lastLabel + "."); mothurOutEndLine();
315 mothurOut(". Please refer to " + lastLabel + "."); mothurOutEndLine();
319 //run last label if you need to
320 if (needToRun == true) {
321 for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }
322 lookup = input->getSharedRAbundVectors(lastLabel);
324 mothurOut(lookup[0]->getLabel()); mothurOutEndLine();
326 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
330 //reset groups parameter
331 globaldata->Groups.clear();
334 outputFileHandle.close();
335 if (mult == true) { outAll.close(); }
337 for(int i=0;i<sumCalculators.size();i++){ delete sumCalculators[i]; }
339 delete input; globaldata->ginput = NULL;
343 catch(exception& e) {
344 errorOut(e, "SummarySharedCommand", "execute");
349 /***********************************************************/
350 void SummarySharedCommand::process(vector<SharedRAbundVector*> thisLookup) {
352 //loop through calculators and add to file all for all calcs that can do mutiple groups
355 outAll << thisLookup[0]->getLabel() << '\t';
357 //output groups names
358 string outNames = "";
359 for (int j = 0; j < thisLookup.size(); j++) {
360 outNames += thisLookup[j]->getGroup() + "-";
362 outNames = outNames.substr(0, outNames.length()-1); //rip off extra '-';
363 outAll << outNames << '\t';
365 for(int i=0;i<sumCalculators.size();i++){
366 if (sumCalculators[i]->getMultiple() == true) {
367 sumCalculators[i]->getValues(thisLookup);
369 sumCalculators[i]->print(outAll);
376 vector<SharedRAbundVector*> subset;
377 for (int k = 0; k < (thisLookup.size() - 1); k++) { // pass cdd each set of groups to commpare
378 for (int l = n; l < thisLookup.size(); l++) {
380 outputFileHandle << thisLookup[0]->getLabel() << '\t';
382 subset.clear(); //clear out old pair of sharedrabunds
383 //add new pair of sharedrabunds
384 subset.push_back(thisLookup[k]); subset.push_back(thisLookup[l]);
386 //sort groups to be alphanumeric
387 if (thisLookup[k]->getGroup() > thisLookup[l]->getGroup()) {
388 outputFileHandle << (thisLookup[l]->getGroup() +'\t' + thisLookup[k]->getGroup()) << '\t'; //print out groups
390 outputFileHandle << (thisLookup[k]->getGroup() +'\t' + thisLookup[l]->getGroup()) << '\t'; //print out groups
393 for(int i=0;i<sumCalculators.size();i++) {
395 sumCalculators[i]->getValues(subset); //saves the calculator outputs
396 outputFileHandle << '\t';
397 sumCalculators[i]->print(outputFileHandle);
399 outputFileHandle << endl;
405 catch(exception& e) {
406 errorOut(e, "SummarySharedCommand", "process");
411 /***********************************************************/