2 * collectsharedcommand.cpp
5 * Created by Sarah Westcott on 1/2/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "collectsharedcommand.h"
11 #include "sharedsobscollectsummary.h"
12 #include "sharedchao1.h"
13 #include "sharedace.h"
14 #include "sharedjabund.h"
15 #include "sharedsorabund.h"
16 #include "sharedjclass.h"
17 #include "sharedsorclass.h"
18 #include "sharedjest.h"
19 #include "sharedsorest.h"
20 #include "sharedthetayc.h"
21 #include "sharedthetan.h"
22 #include "sharedkstest.h"
23 #include "whittaker.h"
24 #include "sharednseqs.h"
25 #include "sharedochiai.h"
26 #include "sharedanderbergs.h"
27 #include "sharedkulczynski.h"
28 #include "sharedkulczynskicody.h"
29 #include "sharedlennon.h"
30 #include "sharedmorisitahorn.h"
31 #include "sharedbraycurtis.h"
32 #include "sharedjackknife.h"
33 #include "whittaker.h"
37 //**********************************************************************************************************************
39 CollectSharedCommand::CollectSharedCommand(string option) {
41 globaldata = GlobalData::getInstance();
48 //allow user to run help
49 if(option == "help") { validCalculator = new ValidCalculators(); help(); abort = true; }
52 //valid paramters for this command
53 string Array[] = {"freq","label","calc","groups","all","outputdir","inputdir"};
54 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
56 OptionParser parser(option);
57 map<string,string> parameters=parser.getParameters();
59 ValidParameters validParameter;
61 //check to make sure all parameters are valid for command
62 for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
63 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
66 //if the user changes the output directory command factory will send this info to us in the output parameter
67 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
70 //make sure the user has already run the read.otu command
71 if (globaldata->getSharedFile() == "") {
72 if (globaldata->getListFile() == "") { m->mothurOut("You must read a list and a group, or a shared before you can use the collect.shared command."); m->mothurOutEndLine(); abort = true; }
73 else if (globaldata->getGroupFile() == "") { m->mothurOut("You must read a list and a group, or a shared before you can use the collect.shared command."); m->mothurOutEndLine(); abort = true; }
77 //check for optional parameter and set defaults
78 // ...at some point should added some additional type checking..
79 label = validParameter.validFile(parameters, "label", false);
80 if (label == "not found") { label = ""; }
82 if(label != "all") { splitAtDash(label, labels); allLines = 0; }
83 else { allLines = 1; }
86 //if the user has not specified any labels use the ones from read.otu
88 allLines = globaldata->allLines;
89 labels = globaldata->labels;
92 calc = validParameter.validFile(parameters, "calc", false);
93 if (calc == "not found") { calc = "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan"; }
95 if (calc == "default") { calc = "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan"; }
97 splitAtDash(calc, Estimators);
99 groups = validParameter.validFile(parameters, "groups", false);
100 if (groups == "not found") { groups = ""; }
102 splitAtDash(groups, Groups);
104 globaldata->Groups = Groups;
107 temp = validParameter.validFile(parameters, "freq", false); if (temp == "not found") { temp = "100"; }
110 temp = validParameter.validFile(parameters, "all", false); if (temp == "not found") { temp = "false"; }
113 if (abort == false) {
115 if (outputDir == "") { outputDir += hasPath(globaldata->inputFileName); }
116 string fileNameRoot = outputDir + getRootName(getSimpleName(globaldata->inputFileName));
117 format = globaldata->getFormat();
120 validCalculator = new ValidCalculators();
121 util = new SharedUtil();
123 for (i=0; i<Estimators.size(); i++) {
124 if (validCalculator->isValidCalculator("shared", Estimators[i]) == true) {
125 if (Estimators[i] == "sharedchao") {
126 cDisplays.push_back(new CollectDisplay(new SharedChao1(), new SharedOneColumnFile(fileNameRoot+"shared.chao")));
127 outputNames.push_back(fileNameRoot+"shared.chao");
128 }else if (Estimators[i] == "sharedsobs") {
129 cDisplays.push_back(new CollectDisplay(new SharedSobsCS(), new SharedOneColumnFile(fileNameRoot+"shared.sobs")));
130 outputNames.push_back(fileNameRoot+"shared.sobs");
131 }else if (Estimators[i] == "sharedace") {
132 cDisplays.push_back(new CollectDisplay(new SharedAce(), new SharedOneColumnFile(fileNameRoot+"shared.ace")));
133 outputNames.push_back(fileNameRoot+"shared.ace");
134 }else if (Estimators[i] == "jabund") {
135 cDisplays.push_back(new CollectDisplay(new JAbund(), new SharedOneColumnFile(fileNameRoot+"jabund")));
136 outputNames.push_back(fileNameRoot+"jabund");
137 }else if (Estimators[i] == "sorabund") {
138 cDisplays.push_back(new CollectDisplay(new SorAbund(), new SharedOneColumnFile(fileNameRoot+"sorabund")));
139 outputNames.push_back(fileNameRoot+"sorabund");
140 }else if (Estimators[i] == "jclass") {
141 cDisplays.push_back(new CollectDisplay(new Jclass(), new SharedOneColumnFile(fileNameRoot+"jclass")));
142 outputNames.push_back(fileNameRoot+"jclass");
143 }else if (Estimators[i] == "sorclass") {
144 cDisplays.push_back(new CollectDisplay(new SorClass(), new SharedOneColumnFile(fileNameRoot+"sorclass")));
145 outputNames.push_back(fileNameRoot+"sorclass");
146 }else if (Estimators[i] == "jest") {
147 cDisplays.push_back(new CollectDisplay(new Jest(), new SharedOneColumnFile(fileNameRoot+"jest")));
148 outputNames.push_back(fileNameRoot+"jest");
149 }else if (Estimators[i] == "sorest") {
150 cDisplays.push_back(new CollectDisplay(new SorEst(), new SharedOneColumnFile(fileNameRoot+"sorest")));
151 outputNames.push_back(fileNameRoot+"sorest");
152 }else if (Estimators[i] == "thetayc") {
153 cDisplays.push_back(new CollectDisplay(new ThetaYC(), new SharedOneColumnFile(fileNameRoot+"thetayc")));
154 outputNames.push_back(fileNameRoot+"thetayc");
155 }else if (Estimators[i] == "thetan") {
156 cDisplays.push_back(new CollectDisplay(new ThetaN(), new SharedOneColumnFile(fileNameRoot+"thetan")));
157 outputNames.push_back(fileNameRoot+"thetan");
158 }else if (Estimators[i] == "kstest") {
159 cDisplays.push_back(new CollectDisplay(new KSTest(), new SharedOneColumnFile(fileNameRoot+"kstest")));
160 outputNames.push_back(fileNameRoot+"kstest");
161 }else if (Estimators[i] == "whittaker") {
162 cDisplays.push_back(new CollectDisplay(new Whittaker(), new SharedOneColumnFile(fileNameRoot+"whittaker")));
163 outputNames.push_back(fileNameRoot+"whittaker");
164 }else if (Estimators[i] == "sharednseqs") {
165 cDisplays.push_back(new CollectDisplay(new SharedNSeqs(), new SharedOneColumnFile(fileNameRoot+"shared.nseqs")));
166 outputNames.push_back(fileNameRoot+"shared.nseqs");
167 }else if (Estimators[i] == "ochiai") {
168 cDisplays.push_back(new CollectDisplay(new Ochiai(), new SharedOneColumnFile(fileNameRoot+"ochiai")));
169 outputNames.push_back(fileNameRoot+"ochiai");
170 }else if (Estimators[i] == "anderberg") {
171 cDisplays.push_back(new CollectDisplay(new Anderberg(), new SharedOneColumnFile(fileNameRoot+"anderberg")));
172 outputNames.push_back(fileNameRoot+"anderberg");
173 }else if (Estimators[i] == "skulczynski") {
174 cDisplays.push_back(new CollectDisplay(new Kulczynski(), new SharedOneColumnFile(fileNameRoot+"kulczynski")));
175 outputNames.push_back(fileNameRoot+"kulczynski");
176 }else if (Estimators[i] == "kulczynskicody") {
177 cDisplays.push_back(new CollectDisplay(new KulczynskiCody(), new SharedOneColumnFile(fileNameRoot+"kulczynskicody")));
178 outputNames.push_back(fileNameRoot+"kulczynskicody");
179 }else if (Estimators[i] == "lennon") {
180 cDisplays.push_back(new CollectDisplay(new Lennon(), new SharedOneColumnFile(fileNameRoot+"lennon")));
181 outputNames.push_back(fileNameRoot+"lennon");
182 }else if (Estimators[i] == "morisitahorn") {
183 cDisplays.push_back(new CollectDisplay(new MorHorn(), new SharedOneColumnFile(fileNameRoot+"morisitahorn")));
184 outputNames.push_back(fileNameRoot+"morisitahorn");
185 }else if (Estimators[i] == "braycurtis") {
186 cDisplays.push_back(new CollectDisplay(new BrayCurtis(), new SharedOneColumnFile(fileNameRoot+"braycurtis")));
187 outputNames.push_back(fileNameRoot+"braycurtis");
195 catch(exception& e) {
196 m->errorOut(e, "CollectSharedCommand", "CollectSharedCommand");
200 //**********************************************************************************************************************
202 void CollectSharedCommand::help(){
204 m->mothurOut("The collect.shared command can only be executed after a successful read.otu command.\n");
205 m->mothurOut("The collect.shared command parameters are label, freq, calc and groups. No parameters are required \n");
206 m->mothurOut("The collect.shared command should be in the following format: \n");
207 m->mothurOut("collect.shared(label=yourLabel, freq=yourFreq, calc=yourEstimators, groups=yourGroups).\n");
208 m->mothurOut("Example collect.shared(label=unique-.01-.03, freq=10, groups=B-C, calc=sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan).\n");
209 m->mothurOut("The default values for freq is 100 and calc are sharedsobs-sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan.\n");
210 m->mothurOut("The default value for groups is all the groups in your groupfile.\n");
211 m->mothurOut("The freq parameter is used indicate when to output your data, by default it is set to 100. But you can set it to a percentage of the number of sequence. For example freq=0.10, means 10%. \n");
212 validCalculator->printCalc("shared", cout);
213 m->mothurOut("The label parameter is used to analyze specific labels in your input.\n");
214 m->mothurOut("The all parameter is used to specify if you want the estimate of all your groups together. This estimate can only be made for sharedsobs and sharedchao calculators. The default is false.\n");
215 m->mothurOut("If you use sharedchao and run into memory issues, set all to false. \n");
216 m->mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 2 valid groups.\n");
217 m->mothurOut("Note: No spaces between parameter labels (i.e. list), '=' and parameters (i.e.yourListfile).\n\n");
220 catch(exception& e) {
221 m->errorOut(e, "CollectSharedCommand", "help");
226 //**********************************************************************************************************************
228 CollectSharedCommand::~CollectSharedCommand(){
229 if (abort == false) {
230 delete input; globaldata->ginput = NULL;
233 delete validCalculator;
234 globaldata->gorder = NULL;
238 //**********************************************************************************************************************
240 int CollectSharedCommand::execute(){
243 if (abort == true) { return 0; }
245 //if the users entered no valid calculators don't execute command
246 if (cDisplays.size() == 0) { return 0; }
247 for(int i=0;i<cDisplays.size();i++){ cDisplays[i]->setAll(all); }
249 read = new ReadOTUFile(globaldata->inputFileName);
250 read->read(&*globaldata);
252 input = globaldata->ginput;
253 order = input->getSharedOrderVector();
254 string lastLabel = order->getLabel();
256 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
257 set<string> processedLabels;
258 set<string> userLabels = labels;
261 util->setGroups(globaldata->Groups, globaldata->gGroupmap->namesOfGroups, "collect");
262 util->updateGroupIndex(globaldata->Groups, globaldata->gGroupmap->groupIndex);
264 while((order != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
265 if (m->control_pressed) {
266 for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); }
267 for(int i=0;i<cDisplays.size();i++){ delete cDisplays[i]; }
269 globaldata->Groups.clear();
273 if(allLines == 1 || labels.count(order->getLabel()) == 1){
275 m->mothurOut(order->getLabel()); m->mothurOutEndLine();
276 //create collectors curve
277 cCurve = new Collect(order, cDisplays);
278 cCurve->getSharedCurve(freq);
281 processedLabels.insert(order->getLabel());
282 userLabels.erase(order->getLabel());
285 //you have a label the user want that is smaller than this label and the last label has not already been processed
286 if ((anyLabelsToProcess(order->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
287 string saveLabel = order->getLabel();
290 order = input->getSharedOrderVector(lastLabel);
292 m->mothurOut(order->getLabel()); m->mothurOutEndLine();
293 //create collectors curve
294 cCurve = new Collect(order, cDisplays);
295 cCurve->getSharedCurve(freq);
298 processedLabels.insert(order->getLabel());
299 userLabels.erase(order->getLabel());
301 //restore real lastlabel to save below
302 order->setLabel(saveLabel);
306 lastLabel = order->getLabel();
308 //get next line to process
310 order = input->getSharedOrderVector();
313 if (m->control_pressed) {
314 for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); }
315 for(int i=0;i<cDisplays.size();i++){ delete cDisplays[i]; }
316 globaldata->Groups.clear();
320 //output error messages about any remaining user labels
321 set<string>::iterator it;
322 bool needToRun = false;
323 for (it = userLabels.begin(); it != userLabels.end(); it++) {
324 m->mothurOut("Your file does not include the label " + *it);
325 if (processedLabels.count(lastLabel) != 1) {
326 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
329 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
333 //run last label if you need to
334 if (needToRun == true) {
335 if (order != NULL) { delete order; }
336 order = input->getSharedOrderVector(lastLabel);
338 m->mothurOut(order->getLabel()); m->mothurOutEndLine();
339 cCurve = new Collect(order, cDisplays);
340 cCurve->getSharedCurve(freq);
343 if (m->control_pressed) {
344 for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); }
345 for(int i=0;i<cDisplays.size();i++){ delete cDisplays[i]; }
347 globaldata->Groups.clear();
354 for(int i=0;i<cDisplays.size();i++){ delete cDisplays[i]; }
356 //reset groups parameter
357 globaldata->Groups.clear();
359 m->mothurOutEndLine();
360 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
361 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
362 m->mothurOutEndLine();
367 catch(exception& e) {
368 m->errorOut(e, "CollectSharedCommand", "execute");
373 /***********************************************************/