2 * rarefactsharedcommand.cpp
5 * Created by Sarah Westcott on 1/6/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "rarefactsharedcommand.h"
11 #include "sharedsobs.h"
12 #include "sharednseqs.h"
13 #include "sharedutilities.h"
15 //**********************************************************************************************************************
16 vector<string> RareFactSharedCommand::setParameters(){
18 CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pshared);
19 CommandParameter pdesign("design", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pdesign);
20 CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
21 CommandParameter pfreq("freq", "Number", "", "100", "", "", "",false,false); parameters.push_back(pfreq);
22 CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
23 CommandParameter pcalc("calc", "Multiple", "sharednseqs-sharedobserved", "sharedobserved", "", "", "",true,false); parameters.push_back(pcalc);
24 CommandParameter pjumble("jumble", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pjumble);
25 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
26 CommandParameter psets("sets", "String", "", "", "", "", "",false,false); parameters.push_back(psets);
27 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
28 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
30 vector<string> myArray;
31 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
35 m->errorOut(e, "RareFactSharedCommand", "setParameters");
39 //**********************************************************************************************************************
40 string RareFactSharedCommand::getHelpString(){
42 string helpString = "";
43 ValidCalculators validCalculator;
44 helpString += "The collect.shared command parameters are shared, label, freq, calc and groups. shared is required if there is no current sharedfile. \n";
45 helpString += "The rarefaction.shared command parameters are shared, design, label, iters, groups, sets, jumble and calc. shared is required if there is no current sharedfile. \n";
46 helpString += "The design parameter allows you to assign your groups to sets. If provided mothur will run rarefaction.shared on a per set basis. \n";
47 helpString += "The sets parameter allows you to specify which of the sets in your designfile you would like to analyze. The set names are separated by dashes. THe default is all sets in the designfile.\n";
48 helpString += "The rarefaction command should be in the following format: \n";
49 helpString += "rarefaction.shared(label=yourLabel, iters=yourIters, calc=yourEstimators, jumble=yourJumble, groups=yourGroups).\n";
50 helpString += "The freq parameter is used indicate when to output your data, by default it is set to 100. But you can set it to a percentage of the number of sequence. For example freq=0.10, means 10%. \n";
51 helpString += "Example rarefaction.shared(label=unique-0.01-0.03, iters=10000, groups=B-C, jumble=T, calc=sharedobserved).\n";
52 helpString += "The default values for iters is 1000, freq is 100, and calc is sharedobserved which calculates the shared rarefaction curve for the observed richness.\n";
53 helpString += "The default value for groups is all the groups in your groupfile, and jumble is true.\n";
54 helpString += validCalculator.printCalc("sharedrarefaction");
55 helpString += "The label parameter is used to analyze specific labels in your input.\n";
56 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 2 valid groups.\n";
57 helpString += "Note: No spaces between parameter labels (i.e. freq), '=' and parameters (i.e.yourFreq).\n";
61 m->errorOut(e, "RareFactSharedCommand", "getHelpString");
66 //**********************************************************************************************************************
67 RareFactSharedCommand::RareFactSharedCommand(){
69 abort = true; calledHelp = true;
71 vector<string> tempOutNames;
72 outputTypes["sharedrarefaction"] = tempOutNames;
73 outputTypes["sharedr_nseqs"] = tempOutNames;
76 m->errorOut(e, "RareFactSharedCommand", "RareFactSharedCommand");
80 //**********************************************************************************************************************
82 RareFactSharedCommand::RareFactSharedCommand(string option) {
84 abort = false; calledHelp = false;
87 //allow user to run help
88 if(option == "help") { help(); abort = true; calledHelp = true; }
89 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
92 vector<string> myArray = setParameters();
94 OptionParser parser(option);
95 map<string,string> parameters = parser.getParameters();
96 map<string,string>::iterator it;
98 ValidParameters validParameter;
100 //check to make sure all parameters are valid for command
101 for (it = parameters.begin(); it != parameters.end(); it++) {
102 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
105 //initialize outputTypes
106 vector<string> tempOutNames;
107 outputTypes["sharedrarefaction"] = tempOutNames;
108 outputTypes["sharedr_nseqs"] = tempOutNames;
110 //if the user changes the input directory command factory will send this info to us in the output parameter
111 string inputDir = validParameter.validFile(parameters, "inputdir", false);
112 if (inputDir == "not found"){ inputDir = ""; }
115 it = parameters.find("shared");
116 //user has given a template file
117 if(it != parameters.end()){
118 path = m->hasPath(it->second);
119 //if the user has not given a path then, add inputdir. else leave path alone.
120 if (path == "") { parameters["shared"] = inputDir + it->second; }
123 it = parameters.find("design");
124 //user has given a template file
125 if(it != parameters.end()){
126 path = m->hasPath(it->second);
127 //if the user has not given a path then, add inputdir. else leave path alone.
128 if (path == "") { parameters["design"] = inputDir + it->second; }
134 sharedfile = validParameter.validFile(parameters, "shared", true);
135 if (sharedfile == "not open") { sharedfile = ""; abort = true; }
136 else if (sharedfile == "not found") {
137 //if there is a current shared file, use it
138 sharedfile = m->getSharedFile();
139 if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
140 else { m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; }
141 }else { m->setSharedFile(sharedfile); }
143 designfile = validParameter.validFile(parameters, "design", true);
144 if (designfile == "not open") { abort = true; designfile = ""; }
145 else if (designfile == "not found") { designfile = ""; }
146 else { m->setDesignFile(designfile); }
149 //if the user changes the output directory command factory will send this info to us in the output parameter
150 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = m->hasPath(sharedfile); }
153 //check for optional parameter and set defaults
154 // ...at some point should added some additional type checking...
155 label = validParameter.validFile(parameters, "label", false);
156 if (label == "not found") { label = ""; }
158 if(label != "all") { m->splitAtDash(label, labels); allLines = 0; }
159 else { allLines = 1; }
163 calc = validParameter.validFile(parameters, "calc", false);
164 if (calc == "not found") { calc = "sharedobserved"; }
166 if (calc == "default") { calc = "sharedobserved"; }
168 m->splitAtDash(calc, Estimators);
169 if (m->inUsersGroups("citation", Estimators)) {
170 ValidCalculators validCalc; validCalc.printCitations(Estimators);
171 //remove citation from list of calcs
172 for (int i = 0; i < Estimators.size(); i++) { if (Estimators[i] == "citation") { Estimators.erase(Estimators.begin()+i); break; } }
175 groups = validParameter.validFile(parameters, "groups", false);
176 if (groups == "not found") { groups = ""; }
178 m->splitAtDash(groups, Groups);
180 m->setGroups(Groups);
182 string sets = validParameter.validFile(parameters, "sets", false);
183 if (sets == "not found") { sets = ""; }
185 m->splitAtDash(sets, Sets);
189 temp = validParameter.validFile(parameters, "freq", false); if (temp == "not found") { temp = "100"; }
190 m->mothurConvert(temp, freq);
192 temp = validParameter.validFile(parameters, "iters", false); if (temp == "not found") { temp = "1000"; }
193 m->mothurConvert(temp, nIters);
195 temp = validParameter.validFile(parameters, "jumble", false); if (temp == "not found") { temp = "T"; }
196 if (m->isTrue(temp)) { jumble = true; }
197 else { jumble = false; }
203 catch(exception& e) {
204 m->errorOut(e, "RareFactSharedCommand", "RareFactSharedCommand");
208 //**********************************************************************************************************************
210 int RareFactSharedCommand::execute(){
213 if (abort == true) { if (calledHelp) { return 0; } return 2; }
216 if (designfile == "") { //fake out designMap to run with process
217 process(designMap, "");
219 designMap.readDesignMap(designfile);
221 //fill Sets - checks for "all" and for any typo groups
223 vector<string> nameSets = designMap.getNamesOfGroups();
224 util.setGroups(Sets, nameSets);
226 for (int i = 0; i < Sets.size(); i++) {
227 process(designMap, Sets[i]);
231 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
233 m->mothurOutEndLine();
234 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
235 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
236 m->mothurOutEndLine();
240 catch(exception& e) {
241 m->errorOut(e, "RareFactSharedCommand", "execute");
245 //**********************************************************************************************************************
247 int RareFactSharedCommand::process(GroupMap& designMap, string thisSet){
250 vector<Display*> rDisplays;
252 InputData input(sharedfile, "sharedfile");
253 lookup = input.getSharedRAbundVectors();
254 if (lookup.size() < 2) {
255 m->mothurOut("I cannot run the command without at least 2 valid groups.");
256 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
260 string fileNameRoot = outputDir + m->getRootName(m->getSimpleName(sharedfile));
262 vector<string> newGroups = m->getGroups();
263 if (thisSet != "") { //make groups only filled with groups from this set so that's all inputdata will read
264 vector<string> thisSets; thisSets.push_back(thisSet);
265 newGroups = designMap.getNamesSeqs(thisSets);
266 fileNameRoot += thisSet + ".";
269 vector<SharedRAbundVector*> subset;
270 if (thisSet == "") { subset.clear(); subset = lookup; }
271 else {//fill subset with this sets groups
273 for (int i = 0; i < lookup.size(); i++) {
274 if (m->inUsersGroups(lookup[i]->getGroup(), newGroups)) {
275 subset.push_back(lookup[i]);
280 ValidCalculators validCalculator;
281 for (int i=0; i<Estimators.size(); i++) {
282 if (validCalculator.isValidCalculator("sharedrarefaction", Estimators[i]) == true) {
283 if (Estimators[i] == "sharedobserved") {
284 rDisplays.push_back(new RareDisplay(new SharedSobs(), new SharedThreeColumnFile(fileNameRoot+"shared.rarefaction", "")));
285 outputNames.push_back(fileNameRoot+"shared.rarefaction"); outputTypes["sharedrarefaction"].push_back(fileNameRoot+"shared.rarefaction");
286 }else if (Estimators[i] == "sharednseqs") {
287 rDisplays.push_back(new RareDisplay(new SharedNSeqs(), new SharedThreeColumnFile(fileNameRoot+"shared.r_nseqs", "")));
288 outputNames.push_back(fileNameRoot+"shared.r_nseqs"); outputTypes["sharedr_nseqs"].push_back(fileNameRoot+"shared.r_nseqs");
293 //if the users entered no valid calculators don't execute command
294 if (rDisplays.size() == 0) { for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } return 0; }
296 if (m->control_pressed) {
297 for(int i=0;i<rDisplays.size();i++){ delete rDisplays[i]; }
298 for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); }
299 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
304 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
305 string lastLabel = subset[0]->getLabel();
306 set<string> processedLabels;
307 set<string> userLabels = labels;
309 //as long as you are not at the end of the file or done wih the lines you want
310 while((subset[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
311 if (m->control_pressed) {
312 for(int i=0;i<rDisplays.size();i++){ delete rDisplays[i]; }
313 for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); }
314 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
318 if(allLines == 1 || labels.count(subset[0]->getLabel()) == 1){
319 m->mothurOut(subset[0]->getLabel() + '\t' + thisSet); m->mothurOutEndLine();
320 rCurve = new Rarefact(subset, rDisplays);
321 rCurve->getSharedCurve(freq, nIters);
324 processedLabels.insert(subset[0]->getLabel());
325 userLabels.erase(subset[0]->getLabel());
328 if ((m->anyLabelsToProcess(subset[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
329 string saveLabel = subset[0]->getLabel();
331 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
332 lookup = input.getSharedRAbundVectors(lastLabel);
334 if (thisSet == "") { subset.clear(); subset = lookup; }
335 else {//fill subset with this sets groups
337 for (int i = 0; i < lookup.size(); i++) {
338 if (m->inUsersGroups(lookup[i]->getGroup(), newGroups)) {
339 subset.push_back(lookup[i]);
344 m->mothurOut(subset[0]->getLabel() + '\t' + thisSet); m->mothurOutEndLine();
345 rCurve = new Rarefact(subset, rDisplays);
346 rCurve->getSharedCurve(freq, nIters);
349 processedLabels.insert(subset[0]->getLabel());
350 userLabels.erase(subset[0]->getLabel());
352 //restore real lastlabel to save below
353 subset[0]->setLabel(saveLabel);
357 lastLabel = subset[0]->getLabel();
359 //get next line to process
360 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
361 lookup = input.getSharedRAbundVectors();
363 if (lookup[0] != NULL) {
364 if (thisSet == "") { subset.clear(); subset = lookup; }
365 else {//fill subset with this sets groups
367 for (int i = 0; i < lookup.size(); i++) {
368 if (m->inUsersGroups(lookup[i]->getGroup(), newGroups)) {
369 subset.push_back(lookup[i]);
373 }else { subset.clear(); subset.push_back(NULL); }
377 if (m->control_pressed) {
378 for(int i=0;i<rDisplays.size();i++){ delete rDisplays[i]; }
379 for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); }
383 //output error messages about any remaining user labels
384 set<string>::iterator it;
385 bool needToRun = false;
386 for (it = userLabels.begin(); it != userLabels.end(); it++) {
387 m->mothurOut("Your file does not include the label " + *it);
388 if (processedLabels.count(lastLabel) != 1) {
389 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
392 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
396 if (m->control_pressed) {
397 for(int i=0;i<rDisplays.size();i++){ delete rDisplays[i]; }
398 for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); }
402 //run last label if you need to
403 if (needToRun == true) {
404 for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }
405 lookup = input.getSharedRAbundVectors(lastLabel);
407 if (thisSet == "") { subset.clear(); subset = lookup; }
408 else {//fill subset with this sets groups
410 for (int i = 0; i < lookup.size(); i++) {
411 if (m->inUsersGroups(lookup[i]->getGroup(), newGroups)) {
412 subset.push_back(lookup[i]);
417 m->mothurOut(subset[0]->getLabel() + '\t' + thisSet); m->mothurOutEndLine();
418 rCurve = new Rarefact(subset, rDisplays);
419 rCurve->getSharedCurve(freq, nIters);
421 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
424 for(int i=0;i<rDisplays.size();i++){ delete rDisplays[i]; }
429 catch(exception& e) {
430 m->errorOut(e, "RareFactSharedCommand", "process");
434 //**********************************************************************************************************************