2 * rarefactsharedcommand.cpp
5 * Created by Sarah Westcott on 1/6/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "rarefactsharedcommand.h"
11 #include "sharedsobs.h"
12 #include "sharednseqs.h"
13 #include "sharedutilities.h"
15 //**********************************************************************************************************************
16 vector<string> RareFactSharedCommand::setParameters(){
18 CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pshared);
19 CommandParameter pdesign("design", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pdesign);
20 CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
21 CommandParameter pfreq("freq", "Number", "", "100", "", "", "",false,false); parameters.push_back(pfreq);
22 CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
23 CommandParameter pcalc("calc", "Multiple", "sharednseqs-sharedobserved", "sharedobserved", "", "", "",true,false); parameters.push_back(pcalc);
24 CommandParameter pjumble("jumble", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pjumble);
25 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
26 CommandParameter psets("sets", "String", "", "", "", "", "",false,false); parameters.push_back(psets);
27 CommandParameter pgroupmode("groupmode", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pgroupmode);
28 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
29 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
31 vector<string> myArray;
32 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
36 m->errorOut(e, "RareFactSharedCommand", "setParameters");
40 //**********************************************************************************************************************
41 string RareFactSharedCommand::getHelpString(){
43 string helpString = "";
44 ValidCalculators validCalculator;
45 helpString += "The rarefaction.shared command parameters are shared, design, label, iters, groups, sets, jumble, groupmode and calc. shared is required if there is no current sharedfile. \n";
46 helpString += "The design parameter allows you to assign your groups to sets. If provided mothur will run rarefaction.shared on a per set basis. \n";
47 helpString += "The sets parameter allows you to specify which of the sets in your designfile you would like to analyze. The set names are separated by dashes. THe default is all sets in the designfile.\n";
48 helpString += "The rarefaction command should be in the following format: \n";
49 helpString += "rarefaction.shared(label=yourLabel, iters=yourIters, calc=yourEstimators, jumble=yourJumble, groups=yourGroups).\n";
50 helpString += "The freq parameter is used indicate when to output your data, by default it is set to 100. But you can set it to a percentage of the number of sequence. For example freq=0.10, means 10%. \n";
51 helpString += "Example rarefaction.shared(label=unique-0.01-0.03, iters=10000, groups=B-C, jumble=T, calc=sharedobserved).\n";
52 helpString += "The default values for iters is 1000, freq is 100, and calc is sharedobserved which calculates the shared rarefaction curve for the observed richness.\n";
53 helpString += "The default value for groups is all the groups in your groupfile, and jumble is true.\n";
54 helpString += validCalculator.printCalc("sharedrarefaction");
55 helpString += "The label parameter is used to analyze specific labels in your input.\n";
56 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 2 valid groups.\n";
57 helpString += "Note: No spaces between parameter labels (i.e. freq), '=' and parameters (i.e.yourFreq).\n";
61 m->errorOut(e, "RareFactSharedCommand", "getHelpString");
66 //**********************************************************************************************************************
67 RareFactSharedCommand::RareFactSharedCommand(){
69 abort = true; calledHelp = true;
71 vector<string> tempOutNames;
72 outputTypes["sharedrarefaction"] = tempOutNames;
73 outputTypes["sharedr_nseqs"] = tempOutNames;
76 m->errorOut(e, "RareFactSharedCommand", "RareFactSharedCommand");
80 //**********************************************************************************************************************
82 RareFactSharedCommand::RareFactSharedCommand(string option) {
84 abort = false; calledHelp = false;
87 //allow user to run help
88 if(option == "help") { help(); abort = true; calledHelp = true; }
89 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
92 vector<string> myArray = setParameters();
94 OptionParser parser(option);
95 map<string,string> parameters = parser.getParameters();
96 map<string,string>::iterator it;
98 ValidParameters validParameter;
100 //check to make sure all parameters are valid for command
101 for (it = parameters.begin(); it != parameters.end(); it++) {
102 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
105 //initialize outputTypes
106 vector<string> tempOutNames;
107 outputTypes["sharedrarefaction"] = tempOutNames;
108 outputTypes["sharedr_nseqs"] = tempOutNames;
110 //if the user changes the input directory command factory will send this info to us in the output parameter
111 string inputDir = validParameter.validFile(parameters, "inputdir", false);
112 if (inputDir == "not found"){ inputDir = ""; }
115 it = parameters.find("shared");
116 //user has given a template file
117 if(it != parameters.end()){
118 path = m->hasPath(it->second);
119 //if the user has not given a path then, add inputdir. else leave path alone.
120 if (path == "") { parameters["shared"] = inputDir + it->second; }
123 it = parameters.find("design");
124 //user has given a template file
125 if(it != parameters.end()){
126 path = m->hasPath(it->second);
127 //if the user has not given a path then, add inputdir. else leave path alone.
128 if (path == "") { parameters["design"] = inputDir + it->second; }
134 sharedfile = validParameter.validFile(parameters, "shared", true);
135 if (sharedfile == "not open") { sharedfile = ""; abort = true; }
136 else if (sharedfile == "not found") {
137 //if there is a current shared file, use it
138 sharedfile = m->getSharedFile();
139 if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
140 else { m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; }
141 }else { m->setSharedFile(sharedfile); }
143 designfile = validParameter.validFile(parameters, "design", true);
144 if (designfile == "not open") { abort = true; designfile = ""; }
145 else if (designfile == "not found") { designfile = ""; }
146 else { m->setDesignFile(designfile); }
149 //if the user changes the output directory command factory will send this info to us in the output parameter
150 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = m->hasPath(sharedfile); }
153 //check for optional parameter and set defaults
154 // ...at some point should added some additional type checking...
155 label = validParameter.validFile(parameters, "label", false);
156 if (label == "not found") { label = ""; }
158 if(label != "all") { m->splitAtDash(label, labels); allLines = 0; }
159 else { allLines = 1; }
163 calc = validParameter.validFile(parameters, "calc", false);
164 if (calc == "not found") { calc = "sharedobserved"; }
166 if (calc == "default") { calc = "sharedobserved"; }
168 m->splitAtDash(calc, Estimators);
169 if (m->inUsersGroups("citation", Estimators)) {
170 ValidCalculators validCalc; validCalc.printCitations(Estimators);
171 //remove citation from list of calcs
172 for (int i = 0; i < Estimators.size(); i++) { if (Estimators[i] == "citation") { Estimators.erase(Estimators.begin()+i); break; } }
175 groups = validParameter.validFile(parameters, "groups", false);
176 if (groups == "not found") { groups = ""; }
178 m->splitAtDash(groups, Groups);
180 m->setGroups(Groups);
182 string sets = validParameter.validFile(parameters, "sets", false);
183 if (sets == "not found") { sets = ""; }
185 m->splitAtDash(sets, Sets);
189 temp = validParameter.validFile(parameters, "freq", false); if (temp == "not found") { temp = "100"; }
190 m->mothurConvert(temp, freq);
192 temp = validParameter.validFile(parameters, "iters", false); if (temp == "not found") { temp = "1000"; }
193 m->mothurConvert(temp, nIters);
195 temp = validParameter.validFile(parameters, "jumble", false); if (temp == "not found") { temp = "T"; }
196 if (m->isTrue(temp)) { jumble = true; }
197 else { jumble = false; }
200 temp = validParameter.validFile(parameters, "groupmode", false); if (temp == "not found") { temp = "T"; }
201 groupMode = m->isTrue(temp);
206 catch(exception& e) {
207 m->errorOut(e, "RareFactSharedCommand", "RareFactSharedCommand");
211 //**********************************************************************************************************************
213 int RareFactSharedCommand::execute(){
216 if (abort == true) { if (calledHelp) { return 0; } return 2; }
219 if (designfile == "") { //fake out designMap to run with process
220 process(designMap, "");
222 designMap.readDesignMap(designfile);
224 //fill Sets - checks for "all" and for any typo groups
226 vector<string> nameSets = designMap.getNamesOfGroups();
227 util.setGroups(Sets, nameSets);
229 for (int i = 0; i < Sets.size(); i++) {
230 process(designMap, Sets[i]);
233 if (groupMode) { outputNames = createGroupFile(outputNames); }
236 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
238 m->mothurOutEndLine();
239 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
240 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
241 m->mothurOutEndLine();
245 catch(exception& e) {
246 m->errorOut(e, "RareFactSharedCommand", "execute");
250 //**********************************************************************************************************************
252 int RareFactSharedCommand::process(GroupMap& designMap, string thisSet){
255 vector<Display*> rDisplays;
257 InputData input(sharedfile, "sharedfile");
258 lookup = input.getSharedRAbundVectors();
259 if (lookup.size() < 2) {
260 m->mothurOut("I cannot run the command without at least 2 valid groups.");
261 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
265 string fileNameRoot = outputDir + m->getRootName(m->getSimpleName(sharedfile));
267 vector<string> newGroups = m->getGroups();
268 if (thisSet != "") { //make groups only filled with groups from this set so that's all inputdata will read
269 vector<string> thisSets; thisSets.push_back(thisSet);
270 newGroups = designMap.getNamesSeqs(thisSets);
271 fileNameRoot += thisSet + ".";
274 vector<SharedRAbundVector*> subset;
275 if (thisSet == "") { subset.clear(); subset = lookup; }
276 else {//fill subset with this sets groups
278 for (int i = 0; i < lookup.size(); i++) {
279 if (m->inUsersGroups(lookup[i]->getGroup(), newGroups)) {
280 subset.push_back(lookup[i]);
285 ValidCalculators validCalculator;
286 for (int i=0; i<Estimators.size(); i++) {
287 if (validCalculator.isValidCalculator("sharedrarefaction", Estimators[i]) == true) {
288 if (Estimators[i] == "sharedobserved") {
289 rDisplays.push_back(new RareDisplay(new SharedSobs(), new SharedThreeColumnFile(fileNameRoot+"shared.rarefaction", "")));
290 outputNames.push_back(fileNameRoot+"shared.rarefaction"); outputTypes["sharedrarefaction"].push_back(fileNameRoot+"shared.rarefaction");
291 }else if (Estimators[i] == "sharednseqs") {
292 rDisplays.push_back(new RareDisplay(new SharedNSeqs(), new SharedThreeColumnFile(fileNameRoot+"shared.r_nseqs", "")));
293 outputNames.push_back(fileNameRoot+"shared.r_nseqs"); outputTypes["sharedr_nseqs"].push_back(fileNameRoot+"shared.r_nseqs");
296 file2Group[outputNames.size()-1] = thisSet;
299 //if the users entered no valid calculators don't execute command
300 if (rDisplays.size() == 0) { for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } return 0; }
302 if (m->control_pressed) {
303 for(int i=0;i<rDisplays.size();i++){ delete rDisplays[i]; }
304 for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); }
305 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
310 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
311 string lastLabel = subset[0]->getLabel();
312 set<string> processedLabels;
313 set<string> userLabels = labels;
315 //as long as you are not at the end of the file or done wih the lines you want
316 while((subset[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
317 if (m->control_pressed) {
318 for(int i=0;i<rDisplays.size();i++){ delete rDisplays[i]; }
319 for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); }
320 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
324 if(allLines == 1 || labels.count(subset[0]->getLabel()) == 1){
325 m->mothurOut(subset[0]->getLabel() + '\t' + thisSet); m->mothurOutEndLine();
326 rCurve = new Rarefact(subset, rDisplays);
327 rCurve->getSharedCurve(freq, nIters);
330 processedLabels.insert(subset[0]->getLabel());
331 userLabels.erase(subset[0]->getLabel());
334 if ((m->anyLabelsToProcess(subset[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
335 string saveLabel = subset[0]->getLabel();
337 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
338 lookup = input.getSharedRAbundVectors(lastLabel);
340 if (thisSet == "") { subset.clear(); subset = lookup; }
341 else {//fill subset with this sets groups
343 for (int i = 0; i < lookup.size(); i++) {
344 if (m->inUsersGroups(lookup[i]->getGroup(), newGroups)) {
345 subset.push_back(lookup[i]);
350 m->mothurOut(subset[0]->getLabel() + '\t' + thisSet); m->mothurOutEndLine();
351 rCurve = new Rarefact(subset, rDisplays);
352 rCurve->getSharedCurve(freq, nIters);
355 processedLabels.insert(subset[0]->getLabel());
356 userLabels.erase(subset[0]->getLabel());
358 //restore real lastlabel to save below
359 subset[0]->setLabel(saveLabel);
363 lastLabel = subset[0]->getLabel();
365 //get next line to process
366 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
367 lookup = input.getSharedRAbundVectors();
369 if (lookup[0] != NULL) {
370 if (thisSet == "") { subset.clear(); subset = lookup; }
371 else {//fill subset with this sets groups
373 for (int i = 0; i < lookup.size(); i++) {
374 if (m->inUsersGroups(lookup[i]->getGroup(), newGroups)) {
375 subset.push_back(lookup[i]);
379 }else { subset.clear(); subset.push_back(NULL); }
383 if (m->control_pressed) {
384 for(int i=0;i<rDisplays.size();i++){ delete rDisplays[i]; }
385 for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); }
389 //output error messages about any remaining user labels
390 set<string>::iterator it;
391 bool needToRun = false;
392 for (it = userLabels.begin(); it != userLabels.end(); it++) {
393 m->mothurOut("Your file does not include the label " + *it);
394 if (processedLabels.count(lastLabel) != 1) {
395 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
398 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
402 if (m->control_pressed) {
403 for(int i=0;i<rDisplays.size();i++){ delete rDisplays[i]; }
404 for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); }
408 //run last label if you need to
409 if (needToRun == true) {
410 for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }
411 lookup = input.getSharedRAbundVectors(lastLabel);
413 if (thisSet == "") { subset.clear(); subset = lookup; }
414 else {//fill subset with this sets groups
416 for (int i = 0; i < lookup.size(); i++) {
417 if (m->inUsersGroups(lookup[i]->getGroup(), newGroups)) {
418 subset.push_back(lookup[i]);
423 m->mothurOut(subset[0]->getLabel() + '\t' + thisSet); m->mothurOutEndLine();
424 rCurve = new Rarefact(subset, rDisplays);
425 rCurve->getSharedCurve(freq, nIters);
427 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
430 for(int i=0;i<rDisplays.size();i++){ delete rDisplays[i]; }
435 catch(exception& e) {
436 m->errorOut(e, "RareFactSharedCommand", "process");
440 //**********************************************************************************************************************
441 vector<string> RareFactSharedCommand::createGroupFile(vector<string>& outputNames) {
444 vector<string> newFileNames;
446 //find different types of files
447 map<string, map<string, string> > typesFiles;
448 map<string, vector< vector<string> > > fileLabels; //combofile name to labels. each label is a vector because it may be unique lci hci.
449 vector<string> groupNames;
450 for (int i = 0; i < outputNames.size(); i++) {
452 string extension = m->getExtension(outputNames[i]);
453 string combineFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + "groups" + extension;
454 m->mothurRemove(combineFileName); //remove old file
457 m->openInputFile(outputNames[i], in);
459 string labels = m->getline(in);
461 istringstream iss (labels,istringstream::in);
462 string newLabel = ""; vector<string> theseLabels;
463 while(!iss.eof()) { iss >> newLabel; m->gobble(iss); theseLabels.push_back(newLabel); }
464 vector< vector<string> > allLabels;
465 vector<string> thisSet; thisSet.push_back(theseLabels[0]); allLabels.push_back(thisSet); thisSet.clear(); //makes "numSampled" its own grouping
466 for (int j = 1; j < theseLabels.size()-1; j++) {
467 if (theseLabels[j+1] == "lci") {
468 thisSet.push_back(theseLabels[j]);
469 thisSet.push_back(theseLabels[j+1]);
470 thisSet.push_back(theseLabels[j+2]);
472 }else{ //no lci or hci for this calc.
473 thisSet.push_back(theseLabels[j]);
475 allLabels.push_back(thisSet);
478 fileLabels[combineFileName] = allLabels;
480 map<string, map<string, string> >::iterator itfind = typesFiles.find(extension);
481 if (itfind != typesFiles.end()) {
482 (itfind->second)[outputNames[i]] = file2Group[i];
484 map<string, string> temp;
485 temp[outputNames[i]] = file2Group[i];
486 typesFiles[extension] = temp;
488 if (!(m->inUsersGroups(file2Group[i], groupNames))) { groupNames.push_back(file2Group[i]); }
491 //for each type create a combo file
493 for (map<string, map<string, string> >::iterator it = typesFiles.begin(); it != typesFiles.end(); it++) {
496 string combineFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + "groups" + it->first;
497 m->openOutputFileAppend(combineFileName, out);
498 newFileNames.push_back(combineFileName);
499 map<string, string> thisTypesFiles = it->second; //it->second maps filename to group
500 set<int> numSampledSet;
502 //open each type summary file
503 map<string, map<int, vector< vector<string> > > > files; //maps file name to lines in file
505 for (map<string, string>::iterator itFileNameGroup = thisTypesFiles.begin(); itFileNameGroup != thisTypesFiles.end(); itFileNameGroup++) {
507 string thisfilename = itFileNameGroup->first;
508 string group = itFileNameGroup->second;
511 m->openInputFile(thisfilename, temp);
513 //read through first line - labels
514 m->getline(temp); m->gobble(temp);
516 map<int, vector< vector<string> > > thisFilesLines;
519 temp >> numSampled; m->gobble(temp);
521 vector< vector<string> > theseReads;
522 vector<string> thisSet; thisSet.push_back(toString(numSampled)); theseReads.push_back(thisSet); thisSet.clear();
523 for (int k = 1; k < fileLabels[combineFileName].size(); k++) { //output thing like 0.03-A lci-A hci-A
524 vector<string> reads;
526 for (int l = 0; l < fileLabels[combineFileName][k].size(); l++) { //output modified labels
527 temp >> next; m->gobble(temp);
528 reads.push_back(next);
530 theseReads.push_back(reads);
532 thisFilesLines[numSampled] = theseReads;
535 numSampledSet.insert(numSampled);
538 files[group] = thisFilesLines;
540 //save longest file for below
541 if (maxLines < thisFilesLines.size()) { maxLines = thisFilesLines.size(); }
544 m->mothurRemove(thisfilename);
547 //output new labels line
548 out << fileLabels[combineFileName][0][0] << '\t';
549 for (int k = 1; k < fileLabels[combineFileName].size(); k++) { //output thing like 0.03-A lci-A hci-A
550 for (int n = 0; n < groupNames.size(); n++) { // for each group
551 for (int l = 0; l < fileLabels[combineFileName][k].size(); l++) { //output modified labels
552 out << fileLabels[combineFileName][k][l] << '-' << groupNames[n] << '\t';
559 for (set<int>::iterator itNumSampled = numSampledSet.begin(); itNumSampled != numSampledSet.end(); itNumSampled++) {
561 out << (*itNumSampled) << '\t';
563 if (m->control_pressed) { break; }
565 for (int k = 1; k < fileLabels[combineFileName].size(); k++) { //each chunk
566 //grab data for each group
567 for (map<string, map<int, vector< vector<string> > > >::iterator itFileNameGroup = files.begin(); itFileNameGroup != files.end(); itFileNameGroup++) {
569 string group = itFileNameGroup->first;
571 map<int, vector< vector<string> > >::iterator itLine = files[group].find(*itNumSampled);
572 if (itLine != files[group].end()) {
573 for (int l = 0; l < (itLine->second)[k].size(); l++) {
574 out << (itLine->second)[k][l] << '\t';
578 for (int l = 0; l < fileLabels[combineFileName][k].size(); l++) {
589 //return combine file name
593 catch(exception& e) {
594 m->errorOut(e, "RareFactSharedCommand", "createGroupFile");
598 //**********************************************************************************************************************