2 * collectsharedcommand.cpp
5 * Created by Sarah Westcott on 1/2/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "collectsharedcommand.h"
11 #include "sharedsobscollectsummary.h"
12 #include "sharedchao1.h"
13 #include "sharedace.h"
14 #include "sharedjabund.h"
15 #include "sharedsorabund.h"
16 #include "sharedjclass.h"
17 #include "sharedsorclass.h"
18 #include "sharedjest.h"
19 #include "sharedsorest.h"
20 #include "sharedthetayc.h"
21 #include "sharedthetan.h"
22 #include "sharedkstest.h"
23 #include "whittaker.h"
24 #include "sharednseqs.h"
25 #include "sharedochiai.h"
26 #include "sharedanderbergs.h"
27 #include "sharedkulczynski.h"
28 #include "sharedkulczynskicody.h"
29 #include "sharedlennon.h"
30 #include "sharedmorisitahorn.h"
31 #include "sharedbraycurtis.h"
32 #include "sharedjackknife.h"
33 #include "whittaker.h"
36 //**********************************************************************************************************************
37 vector<string> CollectSharedCommand::getValidParameters(){
39 string AlignArray[] = {"freq","label","calc","groups","all","outputdir","inputdir"};
40 vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
44 m->errorOut(e, "CollectSharedCommand", "getValidParameters");
48 //**********************************************************************************************************************
49 vector<string> CollectSharedCommand::getRequiredParameters(){
51 vector<string> myArray;
55 m->errorOut(e, "CollectSharedCommand", "getRequiredParameters");
59 //**********************************************************************************************************************
60 vector<string> CollectSharedCommand::getRequiredFiles(){
62 string AlignArray[] = {"shared"};
63 vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
67 m->errorOut(e, "CollectSharedCommand", "getRequiredFiles");
71 //**********************************************************************************************************************
72 CollectSharedCommand::CollectSharedCommand(){
75 //initialize outputTypes
76 vector<string> tempOutNames;
77 outputTypes["sharedchao"] = tempOutNames;
78 outputTypes["sharedsobs"] = tempOutNames;
79 outputTypes["sharedace"] = tempOutNames;
80 outputTypes["jabund"] = tempOutNames;
81 outputTypes["sorabund"] = tempOutNames;
82 outputTypes["jclass"] = tempOutNames;
83 outputTypes["sorclass"] = tempOutNames;
84 outputTypes["jest"] = tempOutNames;
85 outputTypes["sorest"] = tempOutNames;
86 outputTypes["thetayc"] = tempOutNames;
87 outputTypes["thetan"] = tempOutNames;
88 outputTypes["kstest"] = tempOutNames;
89 outputTypes["whittaker"] = tempOutNames;
90 outputTypes["sharednseqs"] = tempOutNames;
91 outputTypes["ochiai"] = tempOutNames;
92 outputTypes["anderberg"] = tempOutNames;
93 outputTypes["skulczynski"] = tempOutNames;
94 outputTypes["kulczynskicody"] = tempOutNames;
95 outputTypes["lennon"] = tempOutNames;
96 outputTypes["morisitahorn"] = tempOutNames;
97 outputTypes["braycurtis"] = tempOutNames;
100 catch(exception& e) {
101 m->errorOut(e, "CollectSharedCommand", "CollectSharedCommand");
105 //**********************************************************************************************************************
106 CollectSharedCommand::CollectSharedCommand(string option) {
108 globaldata = GlobalData::getInstance();
115 //allow user to run help
116 if(option == "help") { validCalculator = new ValidCalculators(); help(); abort = true; }
119 //valid paramters for this command
120 string Array[] = {"freq","label","calc","groups","all","outputdir","inputdir"};
121 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
123 OptionParser parser(option);
124 map<string,string> parameters=parser.getParameters();
126 ValidParameters validParameter;
128 //check to make sure all parameters are valid for command
129 for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
130 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
133 //initialize outputTypes
134 vector<string> tempOutNames;
135 outputTypes["sharedchao"] = tempOutNames;
136 outputTypes["sharedsobs"] = tempOutNames;
137 outputTypes["sharedace"] = tempOutNames;
138 outputTypes["jabund"] = tempOutNames;
139 outputTypes["sorabund"] = tempOutNames;
140 outputTypes["jclass"] = tempOutNames;
141 outputTypes["sorclass"] = tempOutNames;
142 outputTypes["jest"] = tempOutNames;
143 outputTypes["sorest"] = tempOutNames;
144 outputTypes["thetayc"] = tempOutNames;
145 outputTypes["thetan"] = tempOutNames;
146 outputTypes["kstest"] = tempOutNames;
147 outputTypes["whittaker"] = tempOutNames;
148 outputTypes["sharednseqs"] = tempOutNames;
149 outputTypes["ochiai"] = tempOutNames;
150 outputTypes["anderberg"] = tempOutNames;
151 outputTypes["skulczynski"] = tempOutNames;
152 outputTypes["kulczynskicody"] = tempOutNames;
153 outputTypes["lennon"] = tempOutNames;
154 outputTypes["morisitahorn"] = tempOutNames;
155 outputTypes["braycurtis"] = tempOutNames;
158 //if the user changes the output directory command factory will send this info to us in the output parameter
159 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
162 //make sure the user has already run the read.otu command
163 if (globaldata->getSharedFile() == "") {
164 if (globaldata->getListFile() == "") { m->mothurOut("You must read a list and a group, or a shared before you can use the collect.shared command."); m->mothurOutEndLine(); abort = true; }
165 else if (globaldata->getGroupFile() == "") { m->mothurOut("You must read a list and a group, or a shared before you can use the collect.shared command."); m->mothurOutEndLine(); abort = true; }
169 //check for optional parameter and set defaults
170 // ...at some point should added some additional type checking..
171 label = validParameter.validFile(parameters, "label", false);
172 if (label == "not found") { label = ""; }
174 if(label != "all") { m->splitAtDash(label, labels); allLines = 0; }
175 else { allLines = 1; }
178 //if the user has not specified any labels use the ones from read.otu
180 allLines = globaldata->allLines;
181 labels = globaldata->labels;
184 calc = validParameter.validFile(parameters, "calc", false);
185 if (calc == "not found") { calc = "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan"; }
187 if (calc == "default") { calc = "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan"; }
189 m->splitAtDash(calc, Estimators);
191 groups = validParameter.validFile(parameters, "groups", false);
192 if (groups == "not found") { groups = ""; }
194 m->splitAtDash(groups, Groups);
196 globaldata->Groups = Groups;
199 temp = validParameter.validFile(parameters, "freq", false); if (temp == "not found") { temp = "100"; }
202 temp = validParameter.validFile(parameters, "all", false); if (temp == "not found") { temp = "false"; }
203 all = m->isTrue(temp);
205 if (abort == false) {
207 if (outputDir == "") { outputDir += m->hasPath(globaldata->inputFileName); }
208 string fileNameRoot = outputDir + m->getRootName(m->getSimpleName(globaldata->inputFileName));
209 format = globaldata->getFormat();
212 validCalculator = new ValidCalculators();
213 util = new SharedUtil();
215 for (i=0; i<Estimators.size(); i++) {
216 if (validCalculator->isValidCalculator("shared", Estimators[i]) == true) {
217 if (Estimators[i] == "sharedchao") {
218 cDisplays.push_back(new CollectDisplay(new SharedChao1(), new SharedOneColumnFile(fileNameRoot+"shared.chao")));
219 outputNames.push_back(fileNameRoot+"shared.chao"); outputTypes["sharedchao"].push_back(fileNameRoot+"shared.chao");
220 }else if (Estimators[i] == "sharedsobs") {
221 cDisplays.push_back(new CollectDisplay(new SharedSobsCS(), new SharedOneColumnFile(fileNameRoot+"shared.sobs")));
222 outputNames.push_back(fileNameRoot+"shared.sobs"); outputTypes["sharedsobs"].push_back(fileNameRoot+"shared.sobs");
223 }else if (Estimators[i] == "sharedace") {
224 cDisplays.push_back(new CollectDisplay(new SharedAce(), new SharedOneColumnFile(fileNameRoot+"shared.ace")));
225 outputNames.push_back(fileNameRoot+"shared.ace"); outputTypes["sharedace"].push_back(fileNameRoot+"shared.ace");
226 }else if (Estimators[i] == "jabund") {
227 cDisplays.push_back(new CollectDisplay(new JAbund(), new SharedOneColumnFile(fileNameRoot+"jabund")));
228 outputNames.push_back(fileNameRoot+"jabund"); outputTypes["jabund"].push_back(fileNameRoot+"jabund");
229 }else if (Estimators[i] == "sorabund") {
230 cDisplays.push_back(new CollectDisplay(new SorAbund(), new SharedOneColumnFile(fileNameRoot+"sorabund")));
231 outputNames.push_back(fileNameRoot+"sorabund"); outputTypes["sorabund"].push_back(fileNameRoot+"sorabund");
232 }else if (Estimators[i] == "jclass") {
233 cDisplays.push_back(new CollectDisplay(new Jclass(), new SharedOneColumnFile(fileNameRoot+"jclass")));
234 outputNames.push_back(fileNameRoot+"jclass"); outputTypes["jclass"].push_back(fileNameRoot+"jclass");
235 }else if (Estimators[i] == "sorclass") {
236 cDisplays.push_back(new CollectDisplay(new SorClass(), new SharedOneColumnFile(fileNameRoot+"sorclass")));
237 outputNames.push_back(fileNameRoot+"sorclass"); outputTypes["sorclass"].push_back(fileNameRoot+"sorclass");
238 }else if (Estimators[i] == "jest") {
239 cDisplays.push_back(new CollectDisplay(new Jest(), new SharedOneColumnFile(fileNameRoot+"jest")));
240 outputNames.push_back(fileNameRoot+"jest"); outputTypes["jest"].push_back(fileNameRoot+"jest");
241 }else if (Estimators[i] == "sorest") {
242 cDisplays.push_back(new CollectDisplay(new SorEst(), new SharedOneColumnFile(fileNameRoot+"sorest")));
243 outputNames.push_back(fileNameRoot+"sorest"); outputTypes["sorest"].push_back(fileNameRoot+"sorest");
244 }else if (Estimators[i] == "thetayc") {
245 cDisplays.push_back(new CollectDisplay(new ThetaYC(), new SharedOneColumnFile(fileNameRoot+"thetayc")));
246 outputNames.push_back(fileNameRoot+"thetayc"); outputTypes["thetayc"].push_back(fileNameRoot+"thetayc");
247 }else if (Estimators[i] == "thetan") {
248 cDisplays.push_back(new CollectDisplay(new ThetaN(), new SharedOneColumnFile(fileNameRoot+"thetan")));
249 outputNames.push_back(fileNameRoot+"thetan"); outputTypes["thetan"].push_back(fileNameRoot+"thetan");
250 }else if (Estimators[i] == "kstest") {
251 cDisplays.push_back(new CollectDisplay(new KSTest(), new SharedOneColumnFile(fileNameRoot+"kstest")));
252 outputNames.push_back(fileNameRoot+"kstest"); outputTypes["kstest"].push_back(fileNameRoot+"kstest");
253 }else if (Estimators[i] == "whittaker") {
254 cDisplays.push_back(new CollectDisplay(new Whittaker(), new SharedOneColumnFile(fileNameRoot+"whittaker")));
255 outputNames.push_back(fileNameRoot+"whittaker"); outputTypes["whittaker"].push_back(fileNameRoot+"whittaker");
256 }else if (Estimators[i] == "sharednseqs") {
257 cDisplays.push_back(new CollectDisplay(new SharedNSeqs(), new SharedOneColumnFile(fileNameRoot+"shared.nseqs")));
258 outputNames.push_back(fileNameRoot+"shared.nseqs"); outputTypes["shared.nseqs"].push_back(fileNameRoot+"shared.nseqs");
259 }else if (Estimators[i] == "ochiai") {
260 cDisplays.push_back(new CollectDisplay(new Ochiai(), new SharedOneColumnFile(fileNameRoot+"ochiai")));
261 outputNames.push_back(fileNameRoot+"ochiai"); outputTypes["ochiai"].push_back(fileNameRoot+"ochiai");
262 }else if (Estimators[i] == "anderberg") {
263 cDisplays.push_back(new CollectDisplay(new Anderberg(), new SharedOneColumnFile(fileNameRoot+"anderberg")));
264 outputNames.push_back(fileNameRoot+"anderberg"); outputTypes["anderberg"].push_back(fileNameRoot+"anderberg");
265 }else if (Estimators[i] == "skulczynski") {
266 cDisplays.push_back(new CollectDisplay(new Kulczynski(), new SharedOneColumnFile(fileNameRoot+"kulczynski")));
267 outputNames.push_back(fileNameRoot+"kulczynski"); outputTypes["kulczynski"].push_back(fileNameRoot+"kulczynski");
268 }else if (Estimators[i] == "kulczynskicody") {
269 cDisplays.push_back(new CollectDisplay(new KulczynskiCody(), new SharedOneColumnFile(fileNameRoot+"kulczynskicody")));
270 outputNames.push_back(fileNameRoot+"kulczynskicody"); outputTypes["kulczynskicody"].push_back(fileNameRoot+"kulczynskicody");
271 }else if (Estimators[i] == "lennon") {
272 cDisplays.push_back(new CollectDisplay(new Lennon(), new SharedOneColumnFile(fileNameRoot+"lennon")));
273 outputNames.push_back(fileNameRoot+"lennon"); outputTypes["lennon"].push_back(fileNameRoot+"lennon");
274 }else if (Estimators[i] == "morisitahorn") {
275 cDisplays.push_back(new CollectDisplay(new MorHorn(), new SharedOneColumnFile(fileNameRoot+"morisitahorn")));
276 outputNames.push_back(fileNameRoot+"morisitahorn"); outputTypes["morisitahorn"].push_back(fileNameRoot+"morisitahorn");
277 }else if (Estimators[i] == "braycurtis") {
278 cDisplays.push_back(new CollectDisplay(new BrayCurtis(), new SharedOneColumnFile(fileNameRoot+"braycurtis")));
279 outputNames.push_back(fileNameRoot+"braycurtis"); outputTypes["braycurtis"].push_back(fileNameRoot+"braycurtis");
287 catch(exception& e) {
288 m->errorOut(e, "CollectSharedCommand", "CollectSharedCommand");
292 //**********************************************************************************************************************
294 void CollectSharedCommand::help(){
296 m->mothurOut("The collect.shared command can only be executed after a successful read.otu command.\n");
297 m->mothurOut("The collect.shared command parameters are label, freq, calc and groups. No parameters are required \n");
298 m->mothurOut("The collect.shared command should be in the following format: \n");
299 m->mothurOut("collect.shared(label=yourLabel, freq=yourFreq, calc=yourEstimators, groups=yourGroups).\n");
300 m->mothurOut("Example collect.shared(label=unique-.01-.03, freq=10, groups=B-C, calc=sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan).\n");
301 m->mothurOut("The default values for freq is 100 and calc are sharedsobs-sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan.\n");
302 m->mothurOut("The default value for groups is all the groups in your groupfile.\n");
303 m->mothurOut("The freq parameter is used indicate when to output your data, by default it is set to 100. But you can set it to a percentage of the number of sequence. For example freq=0.10, means 10%. \n");
304 validCalculator->printCalc("shared", cout);
305 m->mothurOut("The label parameter is used to analyze specific labels in your input.\n");
306 m->mothurOut("The all parameter is used to specify if you want the estimate of all your groups together. This estimate can only be made for sharedsobs and sharedchao calculators. The default is false.\n");
307 m->mothurOut("If you use sharedchao and run into memory issues, set all to false. \n");
308 m->mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 2 valid groups.\n");
309 m->mothurOut("Note: No spaces between parameter labels (i.e. list), '=' and parameters (i.e.yourListfile).\n\n");
312 catch(exception& e) {
313 m->errorOut(e, "CollectSharedCommand", "help");
318 //**********************************************************************************************************************
320 CollectSharedCommand::~CollectSharedCommand(){
321 if (abort == false) {
322 delete input; globaldata->ginput = NULL;
325 delete validCalculator;
326 globaldata->gorder = NULL;
330 //**********************************************************************************************************************
332 int CollectSharedCommand::execute(){
335 if (abort == true) { return 0; }
337 //if the users entered no valid calculators don't execute command
338 if (cDisplays.size() == 0) { return 0; }
339 for(int i=0;i<cDisplays.size();i++){ cDisplays[i]->setAll(all); }
341 read = new ReadOTUFile(globaldata->inputFileName);
342 read->read(&*globaldata);
344 input = globaldata->ginput;
345 order = input->getSharedOrderVector();
346 string lastLabel = order->getLabel();
348 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
349 set<string> processedLabels;
350 set<string> userLabels = labels;
353 util->setGroups(globaldata->Groups, globaldata->gGroupmap->namesOfGroups, "collect");
354 util->updateGroupIndex(globaldata->Groups, globaldata->gGroupmap->groupIndex);
356 while((order != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
357 if (m->control_pressed) {
358 for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } outputTypes.clear();
359 for(int i=0;i<cDisplays.size();i++){ delete cDisplays[i]; }
361 globaldata->Groups.clear();
365 if(allLines == 1 || labels.count(order->getLabel()) == 1){
367 m->mothurOut(order->getLabel()); m->mothurOutEndLine();
368 //create collectors curve
369 cCurve = new Collect(order, cDisplays);
370 cCurve->getSharedCurve(freq);
373 processedLabels.insert(order->getLabel());
374 userLabels.erase(order->getLabel());
377 //you have a label the user want that is smaller than this label and the last label has not already been processed
378 if ((m->anyLabelsToProcess(order->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
379 string saveLabel = order->getLabel();
382 order = input->getSharedOrderVector(lastLabel);
384 m->mothurOut(order->getLabel()); m->mothurOutEndLine();
385 //create collectors curve
386 cCurve = new Collect(order, cDisplays);
387 cCurve->getSharedCurve(freq);
390 processedLabels.insert(order->getLabel());
391 userLabels.erase(order->getLabel());
393 //restore real lastlabel to save below
394 order->setLabel(saveLabel);
398 lastLabel = order->getLabel();
400 //get next line to process
402 order = input->getSharedOrderVector();
405 if (m->control_pressed) {
406 for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } outputTypes.clear();
407 for(int i=0;i<cDisplays.size();i++){ delete cDisplays[i]; }
408 globaldata->Groups.clear();
412 //output error messages about any remaining user labels
413 set<string>::iterator it;
414 bool needToRun = false;
415 for (it = userLabels.begin(); it != userLabels.end(); it++) {
416 m->mothurOut("Your file does not include the label " + *it);
417 if (processedLabels.count(lastLabel) != 1) {
418 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
421 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
425 //run last label if you need to
426 if (needToRun == true) {
427 if (order != NULL) { delete order; }
428 order = input->getSharedOrderVector(lastLabel);
430 m->mothurOut(order->getLabel()); m->mothurOutEndLine();
431 cCurve = new Collect(order, cDisplays);
432 cCurve->getSharedCurve(freq);
435 if (m->control_pressed) {
436 for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } outputTypes.clear();
437 for(int i=0;i<cDisplays.size();i++){ delete cDisplays[i]; }
439 globaldata->Groups.clear();
446 for(int i=0;i<cDisplays.size();i++){ delete cDisplays[i]; }
448 //reset groups parameter
449 globaldata->Groups.clear();
451 m->mothurOutEndLine();
452 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
453 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
454 m->mothurOutEndLine();
459 catch(exception& e) {
460 m->errorOut(e, "CollectSharedCommand", "execute");
465 /***********************************************************/