2 * rarefactsharedcommand.cpp
5 * Created by Sarah Westcott on 1/6/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "rarefactsharedcommand.h"
11 #include "sharedsobs.h"
12 #include "sharednseqs.h"
13 #include "sharedutilities.h"
15 //**********************************************************************************************************************
16 vector<string> RareFactSharedCommand::setParameters(){
18 CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pshared);
19 CommandParameter pdesign("design", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pdesign);
20 CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
21 CommandParameter pfreq("freq", "Number", "", "100", "", "", "",false,false); parameters.push_back(pfreq);
22 CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
23 CommandParameter pcalc("calc", "Multiple", "sharednseqs-sharedobserved", "sharedobserved", "", "", "",true,false); parameters.push_back(pcalc);
24 CommandParameter pjumble("jumble", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pjumble);
25 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
26 CommandParameter psets("sets", "String", "", "", "", "", "",false,false); parameters.push_back(psets);
27 CommandParameter pgroupmode("groupmode", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pgroupmode);
28 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
29 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
31 vector<string> myArray;
32 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
36 m->errorOut(e, "RareFactSharedCommand", "setParameters");
40 //**********************************************************************************************************************
41 string RareFactSharedCommand::getHelpString(){
43 string helpString = "";
44 ValidCalculators validCalculator;
45 helpString += "The rarefaction.shared command parameters are shared, design, label, iters, groups, sets, jumble, groupmode and calc. shared is required if there is no current sharedfile. \n";
46 helpString += "The design parameter allows you to assign your groups to sets. If provided mothur will run rarefaction.shared on a per set basis. \n";
47 helpString += "The sets parameter allows you to specify which of the sets in your designfile you would like to analyze. The set names are separated by dashes. THe default is all sets in the designfile.\n";
48 helpString += "The rarefaction command should be in the following format: \n";
49 helpString += "rarefaction.shared(label=yourLabel, iters=yourIters, calc=yourEstimators, jumble=yourJumble, groups=yourGroups).\n";
50 helpString += "The freq parameter is used indicate when to output your data, by default it is set to 100. But you can set it to a percentage of the number of sequence. For example freq=0.10, means 10%. \n";
51 helpString += "Example rarefaction.shared(label=unique-0.01-0.03, iters=10000, groups=B-C, jumble=T, calc=sharedobserved).\n";
52 helpString += "The default values for iters is 1000, freq is 100, and calc is sharedobserved which calculates the shared rarefaction curve for the observed richness.\n";
53 helpString += "The default value for groups is all the groups in your groupfile, and jumble is true.\n";
54 helpString += validCalculator.printCalc("sharedrarefaction");
55 helpString += "The label parameter is used to analyze specific labels in your input.\n";
56 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 2 valid groups.\n";
57 helpString += "Note: No spaces between parameter labels (i.e. freq), '=' and parameters (i.e.yourFreq).\n";
61 m->errorOut(e, "RareFactSharedCommand", "getHelpString");
65 //**********************************************************************************************************************
66 string RareFactSharedCommand::getOutputFileNameTag(string type, string inputName=""){
68 string outputFileName = "";
69 map<string, vector<string> >::iterator it;
71 //is this a type this command creates
72 it = outputTypes.find(type);
73 if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
75 if (type == "sharedrarefaction") { outputFileName = "shared.rarefaction"; }
76 else if (type == "sharedr_nseqs") { outputFileName = "shared.r_nseqs"; }
77 else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
79 return outputFileName;
82 m->errorOut(e, "RareFactSharedCommand", "getOutputFileNameTag");
86 //**********************************************************************************************************************
87 RareFactSharedCommand::RareFactSharedCommand(){
89 abort = true; calledHelp = true;
91 vector<string> tempOutNames;
92 outputTypes["sharedrarefaction"] = tempOutNames;
93 outputTypes["sharedr_nseqs"] = tempOutNames;
96 m->errorOut(e, "RareFactSharedCommand", "RareFactSharedCommand");
100 //**********************************************************************************************************************
102 RareFactSharedCommand::RareFactSharedCommand(string option) {
104 abort = false; calledHelp = false;
107 //allow user to run help
108 if(option == "help") { help(); abort = true; calledHelp = true; }
109 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
112 vector<string> myArray = setParameters();
114 OptionParser parser(option);
115 map<string,string> parameters = parser.getParameters();
116 map<string,string>::iterator it;
118 ValidParameters validParameter;
120 //check to make sure all parameters are valid for command
121 for (it = parameters.begin(); it != parameters.end(); it++) {
122 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
125 //initialize outputTypes
126 vector<string> tempOutNames;
127 outputTypes["sharedrarefaction"] = tempOutNames;
128 outputTypes["sharedr_nseqs"] = tempOutNames;
130 //if the user changes the input directory command factory will send this info to us in the output parameter
131 string inputDir = validParameter.validFile(parameters, "inputdir", false);
132 if (inputDir == "not found"){ inputDir = ""; }
135 it = parameters.find("shared");
136 //user has given a template file
137 if(it != parameters.end()){
138 path = m->hasPath(it->second);
139 //if the user has not given a path then, add inputdir. else leave path alone.
140 if (path == "") { parameters["shared"] = inputDir + it->second; }
143 it = parameters.find("design");
144 //user has given a template file
145 if(it != parameters.end()){
146 path = m->hasPath(it->second);
147 //if the user has not given a path then, add inputdir. else leave path alone.
148 if (path == "") { parameters["design"] = inputDir + it->second; }
154 sharedfile = validParameter.validFile(parameters, "shared", true);
155 if (sharedfile == "not open") { sharedfile = ""; abort = true; }
156 else if (sharedfile == "not found") {
157 //if there is a current shared file, use it
158 sharedfile = m->getSharedFile();
159 if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
160 else { m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; }
161 }else { m->setSharedFile(sharedfile); }
163 designfile = validParameter.validFile(parameters, "design", true);
164 if (designfile == "not open") { abort = true; designfile = ""; }
165 else if (designfile == "not found") { designfile = ""; }
166 else { m->setDesignFile(designfile); }
169 //if the user changes the output directory command factory will send this info to us in the output parameter
170 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = m->hasPath(sharedfile); }
173 //check for optional parameter and set defaults
174 // ...at some point should added some additional type checking...
175 label = validParameter.validFile(parameters, "label", false);
176 if (label == "not found") { label = ""; }
178 if(label != "all") { m->splitAtDash(label, labels); allLines = 0; }
179 else { allLines = 1; }
183 calc = validParameter.validFile(parameters, "calc", false);
184 if (calc == "not found") { calc = "sharedobserved"; }
186 if (calc == "default") { calc = "sharedobserved"; }
188 m->splitAtDash(calc, Estimators);
189 if (m->inUsersGroups("citation", Estimators)) {
190 ValidCalculators validCalc; validCalc.printCitations(Estimators);
191 //remove citation from list of calcs
192 for (int i = 0; i < Estimators.size(); i++) { if (Estimators[i] == "citation") { Estimators.erase(Estimators.begin()+i); break; } }
195 groups = validParameter.validFile(parameters, "groups", false);
196 if (groups == "not found") { groups = ""; }
198 m->splitAtDash(groups, Groups);
200 m->setGroups(Groups);
202 string sets = validParameter.validFile(parameters, "sets", false);
203 if (sets == "not found") { sets = ""; }
205 m->splitAtDash(sets, Sets);
209 temp = validParameter.validFile(parameters, "freq", false); if (temp == "not found") { temp = "100"; }
210 m->mothurConvert(temp, freq);
212 temp = validParameter.validFile(parameters, "iters", false); if (temp == "not found") { temp = "1000"; }
213 m->mothurConvert(temp, nIters);
215 temp = validParameter.validFile(parameters, "jumble", false); if (temp == "not found") { temp = "T"; }
216 if (m->isTrue(temp)) { jumble = true; }
217 else { jumble = false; }
220 temp = validParameter.validFile(parameters, "groupmode", false); if (temp == "not found") { temp = "T"; }
221 groupMode = m->isTrue(temp);
226 catch(exception& e) {
227 m->errorOut(e, "RareFactSharedCommand", "RareFactSharedCommand");
231 //**********************************************************************************************************************
233 int RareFactSharedCommand::execute(){
236 if (abort == true) { if (calledHelp) { return 0; } return 2; }
239 if (designfile == "") { //fake out designMap to run with process
240 process(designMap, "");
242 designMap.readDesignMap(designfile);
244 //fill Sets - checks for "all" and for any typo groups
246 vector<string> nameSets = designMap.getNamesOfGroups();
247 util.setGroups(Sets, nameSets);
249 for (int i = 0; i < Sets.size(); i++) {
250 process(designMap, Sets[i]);
253 if (groupMode) { outputNames = createGroupFile(outputNames); }
256 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
258 m->mothurOutEndLine();
259 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
260 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
261 m->mothurOutEndLine();
265 catch(exception& e) {
266 m->errorOut(e, "RareFactSharedCommand", "execute");
270 //**********************************************************************************************************************
272 int RareFactSharedCommand::process(GroupMap& designMap, string thisSet){
275 vector<Display*> rDisplays;
277 InputData input(sharedfile, "sharedfile");
278 lookup = input.getSharedRAbundVectors();
279 if (lookup.size() < 2) {
280 m->mothurOut("I cannot run the command without at least 2 valid groups.");
281 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
285 string fileNameRoot = outputDir + m->getRootName(m->getSimpleName(sharedfile));
287 vector<string> newGroups = m->getGroups();
288 if (thisSet != "") { //make groups only filled with groups from this set so that's all inputdata will read
289 vector<string> thisSets; thisSets.push_back(thisSet);
290 newGroups = designMap.getNamesSeqs(thisSets);
291 fileNameRoot += thisSet + ".";
294 vector<SharedRAbundVector*> subset;
295 if (thisSet == "") { subset.clear(); subset = lookup; }
296 else {//fill subset with this sets groups
298 for (int i = 0; i < lookup.size(); i++) {
299 if (m->inUsersGroups(lookup[i]->getGroup(), newGroups)) {
300 subset.push_back(lookup[i]);
305 ValidCalculators validCalculator;
306 for (int i=0; i<Estimators.size(); i++) {
307 if (validCalculator.isValidCalculator("sharedrarefaction", Estimators[i]) == true) {
308 if (Estimators[i] == "sharedobserved") {
309 rDisplays.push_back(new RareDisplay(new SharedSobs(), new SharedThreeColumnFile(fileNameRoot+getOutputFileNameTag("sharedrarefaction"), "")));
310 outputNames.push_back(fileNameRoot+getOutputFileNameTag("sharedrarefaction")); outputTypes["sharedrarefaction"].push_back(fileNameRoot+getOutputFileNameTag("sharedrarefaction"));
311 }else if (Estimators[i] == "sharednseqs") {
312 rDisplays.push_back(new RareDisplay(new SharedNSeqs(), new SharedThreeColumnFile(fileNameRoot+getOutputFileNameTag("sharedr_nseqs"), "")));
313 outputNames.push_back(fileNameRoot+getOutputFileNameTag("sharedr_nseqs")); outputTypes["sharedr_nseqs"].push_back(fileNameRoot+getOutputFileNameTag("sharedr_nseqs"));
316 file2Group[outputNames.size()-1] = thisSet;
319 //if the users entered no valid calculators don't execute command
320 if (rDisplays.size() == 0) { for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } return 0; }
322 if (m->control_pressed) {
323 for(int i=0;i<rDisplays.size();i++){ delete rDisplays[i]; }
324 for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); }
325 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
330 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
331 string lastLabel = subset[0]->getLabel();
332 set<string> processedLabels;
333 set<string> userLabels = labels;
335 //as long as you are not at the end of the file or done wih the lines you want
336 while((subset[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
337 if (m->control_pressed) {
338 for(int i=0;i<rDisplays.size();i++){ delete rDisplays[i]; }
339 for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); }
340 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
344 if(allLines == 1 || labels.count(subset[0]->getLabel()) == 1){
345 m->mothurOut(subset[0]->getLabel() + '\t' + thisSet); m->mothurOutEndLine();
346 rCurve = new Rarefact(subset, rDisplays);
347 rCurve->getSharedCurve(freq, nIters);
350 processedLabels.insert(subset[0]->getLabel());
351 userLabels.erase(subset[0]->getLabel());
354 if ((m->anyLabelsToProcess(subset[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
355 string saveLabel = subset[0]->getLabel();
357 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
358 lookup = input.getSharedRAbundVectors(lastLabel);
360 if (thisSet == "") { subset.clear(); subset = lookup; }
361 else {//fill subset with this sets groups
363 for (int i = 0; i < lookup.size(); i++) {
364 if (m->inUsersGroups(lookup[i]->getGroup(), newGroups)) {
365 subset.push_back(lookup[i]);
370 m->mothurOut(subset[0]->getLabel() + '\t' + thisSet); m->mothurOutEndLine();
371 rCurve = new Rarefact(subset, rDisplays);
372 rCurve->getSharedCurve(freq, nIters);
375 processedLabels.insert(subset[0]->getLabel());
376 userLabels.erase(subset[0]->getLabel());
378 //restore real lastlabel to save below
379 subset[0]->setLabel(saveLabel);
383 lastLabel = subset[0]->getLabel();
385 //get next line to process
386 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
387 lookup = input.getSharedRAbundVectors();
389 if (lookup[0] != NULL) {
390 if (thisSet == "") { subset.clear(); subset = lookup; }
391 else {//fill subset with this sets groups
393 for (int i = 0; i < lookup.size(); i++) {
394 if (m->inUsersGroups(lookup[i]->getGroup(), newGroups)) {
395 subset.push_back(lookup[i]);
399 }else { subset.clear(); subset.push_back(NULL); }
403 if (m->control_pressed) {
404 for(int i=0;i<rDisplays.size();i++){ delete rDisplays[i]; }
405 for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); }
409 //output error messages about any remaining user labels
410 set<string>::iterator it;
411 bool needToRun = false;
412 for (it = userLabels.begin(); it != userLabels.end(); it++) {
413 m->mothurOut("Your file does not include the label " + *it);
414 if (processedLabels.count(lastLabel) != 1) {
415 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
418 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
422 if (m->control_pressed) {
423 for(int i=0;i<rDisplays.size();i++){ delete rDisplays[i]; }
424 for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); }
428 //run last label if you need to
429 if (needToRun == true) {
430 for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }
431 lookup = input.getSharedRAbundVectors(lastLabel);
433 if (thisSet == "") { subset.clear(); subset = lookup; }
434 else {//fill subset with this sets groups
436 for (int i = 0; i < lookup.size(); i++) {
437 if (m->inUsersGroups(lookup[i]->getGroup(), newGroups)) {
438 subset.push_back(lookup[i]);
443 m->mothurOut(subset[0]->getLabel() + '\t' + thisSet); m->mothurOutEndLine();
444 rCurve = new Rarefact(subset, rDisplays);
445 rCurve->getSharedCurve(freq, nIters);
447 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
450 for(int i=0;i<rDisplays.size();i++){ delete rDisplays[i]; }
455 catch(exception& e) {
456 m->errorOut(e, "RareFactSharedCommand", "process");
460 //**********************************************************************************************************************
461 vector<string> RareFactSharedCommand::createGroupFile(vector<string>& outputNames) {
464 vector<string> newFileNames;
466 //find different types of files
467 map<string, map<string, string> > typesFiles;
468 map<string, vector< vector<string> > > fileLabels; //combofile name to labels. each label is a vector because it may be unique lci hci.
469 vector<string> groupNames;
470 for (int i = 0; i < outputNames.size(); i++) {
472 string extension = m->getExtension(outputNames[i]);
473 string combineFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + "groups" + extension;
474 m->mothurRemove(combineFileName); //remove old file
477 m->openInputFile(outputNames[i], in);
479 string labels = m->getline(in);
481 istringstream iss (labels,istringstream::in);
482 string newLabel = ""; vector<string> theseLabels;
483 while(!iss.eof()) { iss >> newLabel; m->gobble(iss); theseLabels.push_back(newLabel); }
484 vector< vector<string> > allLabels;
485 vector<string> thisSet; thisSet.push_back(theseLabels[0]); allLabels.push_back(thisSet); thisSet.clear(); //makes "numSampled" its own grouping
486 for (int j = 1; j < theseLabels.size()-1; j++) {
487 if (theseLabels[j+1] == "lci") {
488 thisSet.push_back(theseLabels[j]);
489 thisSet.push_back(theseLabels[j+1]);
490 thisSet.push_back(theseLabels[j+2]);
492 }else{ //no lci or hci for this calc.
493 thisSet.push_back(theseLabels[j]);
495 allLabels.push_back(thisSet);
498 fileLabels[combineFileName] = allLabels;
500 map<string, map<string, string> >::iterator itfind = typesFiles.find(extension);
501 if (itfind != typesFiles.end()) {
502 (itfind->second)[outputNames[i]] = file2Group[i];
504 map<string, string> temp;
505 temp[outputNames[i]] = file2Group[i];
506 typesFiles[extension] = temp;
508 if (!(m->inUsersGroups(file2Group[i], groupNames))) { groupNames.push_back(file2Group[i]); }
511 //for each type create a combo file
513 for (map<string, map<string, string> >::iterator it = typesFiles.begin(); it != typesFiles.end(); it++) {
516 string combineFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + "groups" + it->first;
517 m->openOutputFileAppend(combineFileName, out);
518 newFileNames.push_back(combineFileName);
519 map<string, string> thisTypesFiles = it->second; //it->second maps filename to group
520 set<int> numSampledSet;
522 //open each type summary file
523 map<string, map<int, vector< vector<string> > > > files; //maps file name to lines in file
525 for (map<string, string>::iterator itFileNameGroup = thisTypesFiles.begin(); itFileNameGroup != thisTypesFiles.end(); itFileNameGroup++) {
527 string thisfilename = itFileNameGroup->first;
528 string group = itFileNameGroup->second;
531 m->openInputFile(thisfilename, temp);
533 //read through first line - labels
534 m->getline(temp); m->gobble(temp);
536 map<int, vector< vector<string> > > thisFilesLines;
539 temp >> numSampled; m->gobble(temp);
541 vector< vector<string> > theseReads;
542 vector<string> thisSet; thisSet.push_back(toString(numSampled)); theseReads.push_back(thisSet); thisSet.clear();
543 for (int k = 1; k < fileLabels[combineFileName].size(); k++) { //output thing like 0.03-A lci-A hci-A
544 vector<string> reads;
546 for (int l = 0; l < fileLabels[combineFileName][k].size(); l++) { //output modified labels
547 temp >> next; m->gobble(temp);
548 reads.push_back(next);
550 theseReads.push_back(reads);
552 thisFilesLines[numSampled] = theseReads;
555 numSampledSet.insert(numSampled);
558 files[group] = thisFilesLines;
560 //save longest file for below
561 if (maxLines < thisFilesLines.size()) { maxLines = thisFilesLines.size(); }
564 m->mothurRemove(thisfilename);
567 //output new labels line
568 out << fileLabels[combineFileName][0][0] << '\t';
569 for (int k = 1; k < fileLabels[combineFileName].size(); k++) { //output thing like 0.03-A lci-A hci-A
570 for (int n = 0; n < groupNames.size(); n++) { // for each group
571 for (int l = 0; l < fileLabels[combineFileName][k].size(); l++) { //output modified labels
572 out << fileLabels[combineFileName][k][l] << '-' << groupNames[n] << '\t';
579 for (set<int>::iterator itNumSampled = numSampledSet.begin(); itNumSampled != numSampledSet.end(); itNumSampled++) {
581 out << (*itNumSampled) << '\t';
583 if (m->control_pressed) { break; }
585 for (int k = 1; k < fileLabels[combineFileName].size(); k++) { //each chunk
586 //grab data for each group
587 for (map<string, map<int, vector< vector<string> > > >::iterator itFileNameGroup = files.begin(); itFileNameGroup != files.end(); itFileNameGroup++) {
589 string group = itFileNameGroup->first;
591 map<int, vector< vector<string> > >::iterator itLine = files[group].find(*itNumSampled);
592 if (itLine != files[group].end()) {
593 for (int l = 0; l < (itLine->second)[k].size(); l++) {
594 out << (itLine->second)[k][l] << '\t';
598 for (int l = 0; l < fileLabels[combineFileName][k].size(); l++) {
609 //return combine file name
613 catch(exception& e) {
614 m->errorOut(e, "RareFactSharedCommand", "createGroupFile");
618 //**********************************************************************************************************************