2 * rarefactsharedcommand.cpp
5 * Created by Sarah Westcott on 1/6/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "rarefactsharedcommand.h"
11 #include "sharedsobs.h"
12 #include "sharednseqs.h"
13 #include "sharedutilities.h"
14 #include "subsample.h"
16 //**********************************************************************************************************************
17 vector<string> RareFactSharedCommand::setParameters(){
19 CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(pshared);
20 CommandParameter pdesign("design", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(pdesign);
21 CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
22 CommandParameter pfreq("freq", "Number", "", "100", "", "", "","",false,false); parameters.push_back(pfreq);
23 CommandParameter piters("iters", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(piters);
24 CommandParameter pcalc("calc", "Multiple", "sharednseqs-sharedobserved", "sharedobserved", "", "", "","",true,false,true); parameters.push_back(pcalc);
25 CommandParameter psubsampleiters("subsampleiters", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(psubsampleiters);
26 CommandParameter psubsample("subsample", "String", "", "", "", "", "","",false,false); parameters.push_back(psubsample);
27 CommandParameter pjumble("jumble", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pjumble);
28 CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
29 CommandParameter psets("sets", "String", "", "", "", "", "","",false,false); parameters.push_back(psets);
30 CommandParameter pgroupmode("groupmode", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pgroupmode);
31 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
32 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
34 vector<string> myArray;
35 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
39 m->errorOut(e, "RareFactSharedCommand", "setParameters");
43 //**********************************************************************************************************************
44 string RareFactSharedCommand::getHelpString(){
46 string helpString = "";
47 ValidCalculators validCalculator;
48 helpString += "The rarefaction.shared command parameters are shared, design, label, iters, groups, sets, jumble, groupmode and calc. shared is required if there is no current sharedfile. \n";
49 helpString += "The design parameter allows you to assign your groups to sets. If provided mothur will run rarefaction.shared on a per set basis. \n";
50 helpString += "The sets parameter allows you to specify which of the sets in your designfile you would like to analyze. The set names are separated by dashes. THe default is all sets in the designfile.\n";
51 helpString += "The rarefaction command should be in the following format: \n";
52 helpString += "rarefaction.shared(label=yourLabel, iters=yourIters, calc=yourEstimators, jumble=yourJumble, groups=yourGroups).\n";
53 helpString += "The freq parameter is used indicate when to output your data, by default it is set to 100. But you can set it to a percentage of the number of sequence. For example freq=0.10, means 10%. \n";
54 helpString += "Example rarefaction.shared(label=unique-0.01-0.03, iters=10000, groups=B-C, jumble=T, calc=sharedobserved).\n";
55 helpString += "The default values for iters is 1000, freq is 100, and calc is sharedobserved which calculates the shared rarefaction curve for the observed richness.\n";
56 helpString += "The subsampleiters parameter allows you to choose the number of times you would like to run the subsample.\n";
57 helpString += "The subsample parameter allows you to enter the size pergroup of the sample or you can set subsample=T and mothur will use the size of your smallest group.\n";
58 helpString += "The default value for groups is all the groups in your groupfile, and jumble is true.\n";
59 helpString += validCalculator.printCalc("sharedrarefaction");
60 helpString += "The label parameter is used to analyze specific labels in your input.\n";
61 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 2 valid groups.\n";
62 helpString += "Note: No spaces between parameter labels (i.e. freq), '=' and parameters (i.e.yourFreq).\n";
66 m->errorOut(e, "RareFactSharedCommand", "getHelpString");
70 //**********************************************************************************************************************
71 string RareFactSharedCommand::getOutputPattern(string type) {
75 if (type == "sharedrarefaction") { pattern = "[filename],shared.rarefaction"; }
76 else if (type == "sharedr_nseqs") { pattern = "[filename],shared.r_nseqs"; }
77 else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
82 m->errorOut(e, "RareFactSharedCommand", "getOutputPattern");
86 //**********************************************************************************************************************
87 RareFactSharedCommand::RareFactSharedCommand(){
89 abort = true; calledHelp = true;
91 vector<string> tempOutNames;
92 outputTypes["sharedrarefaction"] = tempOutNames;
93 outputTypes["sharedr_nseqs"] = tempOutNames;
96 m->errorOut(e, "RareFactSharedCommand", "RareFactSharedCommand");
100 //**********************************************************************************************************************
102 RareFactSharedCommand::RareFactSharedCommand(string option) {
104 abort = false; calledHelp = false;
107 //allow user to run help
108 if(option == "help") { help(); abort = true; calledHelp = true; }
109 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
112 vector<string> myArray = setParameters();
114 OptionParser parser(option);
115 map<string,string> parameters = parser.getParameters();
116 map<string,string>::iterator it;
118 ValidParameters validParameter;
120 //check to make sure all parameters are valid for command
121 for (it = parameters.begin(); it != parameters.end(); it++) {
122 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
125 //initialize outputTypes
126 vector<string> tempOutNames;
127 outputTypes["sharedrarefaction"] = tempOutNames;
128 outputTypes["sharedr_nseqs"] = tempOutNames;
130 //if the user changes the input directory command factory will send this info to us in the output parameter
131 string inputDir = validParameter.validFile(parameters, "inputdir", false);
132 if (inputDir == "not found"){ inputDir = ""; }
135 it = parameters.find("shared");
136 //user has given a template file
137 if(it != parameters.end()){
138 path = m->hasPath(it->second);
139 //if the user has not given a path then, add inputdir. else leave path alone.
140 if (path == "") { parameters["shared"] = inputDir + it->second; }
143 it = parameters.find("design");
144 //user has given a template file
145 if(it != parameters.end()){
146 path = m->hasPath(it->second);
147 //if the user has not given a path then, add inputdir. else leave path alone.
148 if (path == "") { parameters["design"] = inputDir + it->second; }
154 sharedfile = validParameter.validFile(parameters, "shared", true);
155 if (sharedfile == "not open") { sharedfile = ""; abort = true; }
156 else if (sharedfile == "not found") {
157 //if there is a current shared file, use it
158 sharedfile = m->getSharedFile();
159 if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
160 else { m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; }
161 }else { m->setSharedFile(sharedfile); }
163 designfile = validParameter.validFile(parameters, "design", true);
164 if (designfile == "not open") { abort = true; designfile = ""; }
165 else if (designfile == "not found") { designfile = ""; }
166 else { m->setDesignFile(designfile); }
169 //if the user changes the output directory command factory will send this info to us in the output parameter
170 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = m->hasPath(sharedfile); }
173 //check for optional parameter and set defaults
174 // ...at some point should added some additional type checking...
175 label = validParameter.validFile(parameters, "label", false);
176 if (label == "not found") { label = ""; }
178 if(label != "all") { m->splitAtDash(label, labels); allLines = 0; }
179 else { allLines = 1; }
183 calc = validParameter.validFile(parameters, "calc", false);
184 if (calc == "not found") { calc = "sharedobserved"; }
186 if (calc == "default") { calc = "sharedobserved"; }
188 m->splitAtDash(calc, Estimators);
189 if (m->inUsersGroups("citation", Estimators)) {
190 ValidCalculators validCalc; validCalc.printCitations(Estimators);
191 //remove citation from list of calcs
192 for (int i = 0; i < Estimators.size(); i++) { if (Estimators[i] == "citation") { Estimators.erase(Estimators.begin()+i); break; } }
195 groups = validParameter.validFile(parameters, "groups", false);
196 if (groups == "not found") { groups = ""; }
198 m->splitAtDash(groups, Groups);
200 m->setGroups(Groups);
202 string sets = validParameter.validFile(parameters, "sets", false);
203 if (sets == "not found") { sets = ""; }
205 m->splitAtDash(sets, Sets);
209 temp = validParameter.validFile(parameters, "freq", false); if (temp == "not found") { temp = "100"; }
210 m->mothurConvert(temp, freq);
212 temp = validParameter.validFile(parameters, "iters", false); if (temp == "not found") { temp = "1000"; }
213 m->mothurConvert(temp, nIters);
215 temp = validParameter.validFile(parameters, "jumble", false); if (temp == "not found") { temp = "T"; }
216 if (m->isTrue(temp)) { jumble = true; }
217 else { jumble = false; }
220 temp = validParameter.validFile(parameters, "groupmode", false); if (temp == "not found") { temp = "T"; }
221 groupMode = m->isTrue(temp);
223 temp = validParameter.validFile(parameters, "subsampleiters", false); if (temp == "not found") { temp = "1000"; }
224 m->mothurConvert(temp, iters);
226 temp = validParameter.validFile(parameters, "subsample", false); if (temp == "not found") { temp = "F"; }
227 if (m->isNumeric1(temp)) { m->mothurConvert(temp, subsampleSize); subsample = true; }
229 if (m->isTrue(temp)) { subsample = true; subsampleSize = -1; } //we will set it to smallest group later
230 else { subsample = false; }
233 if (subsample == false) { iters = 1; }
238 catch(exception& e) {
239 m->errorOut(e, "RareFactSharedCommand", "RareFactSharedCommand");
243 //**********************************************************************************************************************
245 int RareFactSharedCommand::execute(){
248 if (abort == true) { if (calledHelp) { return 0; } return 2; }
251 if (designfile == "") { //fake out designMap to run with process
252 process(designMap, "");
254 designMap.readDesignMap(designfile);
256 //fill Sets - checks for "all" and for any typo groups
258 vector<string> nameSets = designMap.getNamesOfGroups();
259 util.setGroups(Sets, nameSets);
261 for (int i = 0; i < Sets.size(); i++) {
262 process(designMap, Sets[i]);
265 if (groupMode) { outputNames = createGroupFile(outputNames); }
268 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
270 m->mothurOutEndLine();
271 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
272 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
273 m->mothurOutEndLine();
277 catch(exception& e) {
278 m->errorOut(e, "RareFactSharedCommand", "execute");
282 //**********************************************************************************************************************
284 int RareFactSharedCommand::process(GroupMap& designMap, string thisSet){
287 vector<Display*> rDisplays;
289 InputData input(sharedfile, "sharedfile");
290 lookup = input.getSharedRAbundVectors();
291 if (lookup.size() < 2) {
292 m->mothurOut("I cannot run the command without at least 2 valid groups.");
293 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
297 string fileNameRoot = outputDir + m->getRootName(m->getSimpleName(sharedfile));
299 vector<string> newGroups = m->getGroups();
300 if (thisSet != "") { //make groups only filled with groups from this set so that's all inputdata will read
301 vector<string> thisSets; thisSets.push_back(thisSet);
302 newGroups = designMap.getNamesSeqs(thisSets);
303 fileNameRoot += thisSet + ".";
306 vector<SharedRAbundVector*> subset;
307 if (thisSet == "") { subset.clear(); subset = lookup; }
308 else {//fill subset with this sets groups
310 for (int i = 0; i < lookup.size(); i++) {
311 if (m->inUsersGroups(lookup[i]->getGroup(), newGroups)) {
312 subset.push_back(lookup[i]);
317 /******************************************************/
319 if (subsampleSize == -1) { //user has not set size, set size = smallest samples size
320 subsampleSize = subset[0]->getNumSeqs();
321 for (int i = 1; i < subset.size(); i++) {
322 int thisSize = subset[i]->getNumSeqs();
324 if (thisSize < subsampleSize) { subsampleSize = thisSize; }
328 vector<SharedRAbundVector*> temp;
329 for (int i = 0; i < subset.size(); i++) {
330 if (subset[i]->getNumSeqs() < subsampleSize) {
331 m->mothurOut(subset[i]->getGroup() + " contains " + toString(subset[i]->getNumSeqs()) + ". Eliminating."); m->mothurOutEndLine();
334 newGroups.push_back(subset[i]->getGroup());
335 temp.push_back(subset[i]);
341 if (subset.size() < 2) { m->mothurOut("You have not provided enough valid groups. I cannot run the command."); m->mothurOutEndLine(); m->control_pressed = true; return 0; }
343 /******************************************************/
345 map<string, string> variables;
346 variables["[filename]"] = fileNameRoot;
347 ValidCalculators validCalculator;
348 for (int i=0; i<Estimators.size(); i++) {
349 if (validCalculator.isValidCalculator("sharedrarefaction", Estimators[i]) == true) {
350 if (Estimators[i] == "sharedobserved") {
351 rDisplays.push_back(new RareDisplay(new SharedSobs(), new SharedThreeColumnFile(getOutputFileName("sharedrarefaction",variables), "")));
352 outputNames.push_back(getOutputFileName("sharedrarefaction",variables)); outputTypes["sharedrarefaction"].push_back(getOutputFileName("sharedrarefaction",variables));
353 }else if (Estimators[i] == "sharednseqs") {
354 rDisplays.push_back(new RareDisplay(new SharedNSeqs(), new SharedThreeColumnFile(getOutputFileName("sharedr_nseqs",variables), "")));
355 outputNames.push_back(getOutputFileName("sharedr_nseqs",variables)); outputTypes["sharedr_nseqs"].push_back(getOutputFileName("sharedr_nseqs",variables));
358 file2Group[outputNames.size()-1] = thisSet;
361 //if the users entered no valid calculators don't execute command
362 if (rDisplays.size() == 0) { for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } return 0; }
364 if (m->control_pressed) {
365 for(int i=0;i<rDisplays.size();i++){ delete rDisplays[i]; }
366 for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); }
367 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
372 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
373 string lastLabel = subset[0]->getLabel();
374 set<string> processedLabels;
375 set<string> userLabels = labels;
377 //as long as you are not at the end of the file or done wih the lines you want
378 while((subset[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
379 if (m->control_pressed) {
380 for(int i=0;i<rDisplays.size();i++){ delete rDisplays[i]; }
381 for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); }
382 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
386 if(allLines == 1 || labels.count(subset[0]->getLabel()) == 1){
387 m->mothurOut(subset[0]->getLabel() + '\t' + thisSet); m->mothurOutEndLine();
388 rCurve = new Rarefact(subset, rDisplays);
389 rCurve->getSharedCurve(freq, nIters);
392 if (subsample) { subsampleLookup(subset, fileNameRoot); }
394 processedLabels.insert(subset[0]->getLabel());
395 userLabels.erase(subset[0]->getLabel());
398 if ((m->anyLabelsToProcess(subset[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
399 string saveLabel = subset[0]->getLabel();
401 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
402 lookup = input.getSharedRAbundVectors(lastLabel);
404 if (thisSet == "") { subset.clear(); subset = lookup; }
405 else {//fill subset with this sets groups
407 for (int i = 0; i < lookup.size(); i++) {
408 if (m->inUsersGroups(lookup[i]->getGroup(), newGroups)) {
409 subset.push_back(lookup[i]);
414 m->mothurOut(subset[0]->getLabel() + '\t' + thisSet); m->mothurOutEndLine();
415 rCurve = new Rarefact(subset, rDisplays);
416 rCurve->getSharedCurve(freq, nIters);
419 if (subsample) { subsampleLookup(subset, fileNameRoot); }
421 processedLabels.insert(subset[0]->getLabel());
422 userLabels.erase(subset[0]->getLabel());
424 //restore real lastlabel to save below
425 subset[0]->setLabel(saveLabel);
429 lastLabel = subset[0]->getLabel();
431 //get next line to process
432 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
433 lookup = input.getSharedRAbundVectors();
435 if (lookup[0] != NULL) {
436 if (thisSet == "") { subset.clear(); subset = lookup; }
437 else {//fill subset with this sets groups
439 for (int i = 0; i < lookup.size(); i++) {
440 if (m->inUsersGroups(lookup[i]->getGroup(), newGroups)) {
441 subset.push_back(lookup[i]);
445 }else { subset.clear(); subset.push_back(NULL); }
449 if (m->control_pressed) {
450 for(int i=0;i<rDisplays.size();i++){ delete rDisplays[i]; }
451 for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); }
455 //output error messages about any remaining user labels
456 set<string>::iterator it;
457 bool needToRun = false;
458 for (it = userLabels.begin(); it != userLabels.end(); it++) {
459 m->mothurOut("Your file does not include the label " + *it);
460 if (processedLabels.count(lastLabel) != 1) {
461 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
464 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
468 if (m->control_pressed) {
469 for(int i=0;i<rDisplays.size();i++){ delete rDisplays[i]; }
470 for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); }
474 //run last label if you need to
475 if (needToRun == true) {
476 for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }
477 lookup = input.getSharedRAbundVectors(lastLabel);
479 if (thisSet == "") { subset.clear(); subset = lookup; }
480 else {//fill subset with this sets groups
482 for (int i = 0; i < lookup.size(); i++) {
483 if (m->inUsersGroups(lookup[i]->getGroup(), newGroups)) {
484 subset.push_back(lookup[i]);
489 m->mothurOut(subset[0]->getLabel() + '\t' + thisSet); m->mothurOutEndLine();
490 rCurve = new Rarefact(subset, rDisplays);
491 rCurve->getSharedCurve(freq, nIters);
494 if (subsample) { subsampleLookup(subset, fileNameRoot); }
496 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
499 for(int i=0;i<rDisplays.size();i++){ delete rDisplays[i]; }
504 catch(exception& e) {
505 m->errorOut(e, "RareFactSharedCommand", "process");
509 //**********************************************************************************************************************
510 int RareFactSharedCommand::subsampleLookup(vector<SharedRAbundVector*>& thisLookup, string fileNameRoot) {
513 map<string, vector<string> > filenames;
514 for (int thisIter = 0; thisIter < iters; thisIter++) {
516 vector<SharedRAbundVector*> thisItersLookup = thisLookup;
518 //we want the summary results for the whole dataset, then the subsampling
520 vector<string> tempLabels; //dont need since we arent printing the sampled sharedRabunds
522 //make copy of lookup so we don't get access violations
523 vector<SharedRAbundVector*> newLookup;
524 for (int k = 0; k < thisItersLookup.size(); k++) {
525 SharedRAbundVector* temp = new SharedRAbundVector();
526 temp->setLabel(thisItersLookup[k]->getLabel());
527 temp->setGroup(thisItersLookup[k]->getGroup());
528 newLookup.push_back(temp);
532 for (int k = 0; k < thisItersLookup[0]->getNumBins(); k++) {
533 if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } return 0; }
534 for (int j = 0; j < thisItersLookup.size(); j++) { newLookup[j]->push_back(thisItersLookup[j]->getAbundance(k), thisItersLookup[j]->getGroup()); }
537 tempLabels = sample.getSample(newLookup, subsampleSize);
538 thisItersLookup = newLookup;
542 vector<Display*> rDisplays;
544 string thisfileNameRoot = fileNameRoot + toString(thisIter);
546 map<string, string> variables;
547 variables["[filename]"] = thisfileNameRoot;
549 ValidCalculators validCalculator;
550 for (int i=0; i<Estimators.size(); i++) {
551 if (validCalculator.isValidCalculator("sharedrarefaction", Estimators[i]) == true) {
552 if (Estimators[i] == "sharedobserved") {
553 rDisplays.push_back(new RareDisplay(new SharedSobs(), new SharedThreeColumnFile(getOutputFileName("sharedrarefaction",variables), "")));
554 filenames["sharedrarefaction"].push_back(getOutputFileName("sharedrarefaction",variables));
555 }else if (Estimators[i] == "sharednseqs") {
556 rDisplays.push_back(new RareDisplay(new SharedNSeqs(), new SharedThreeColumnFile(getOutputFileName("sharedr_nseqs",variables), "")));
557 filenames["sharedr_nseqs"].push_back(getOutputFileName("sharedr_nseqs",variables));
562 rCurve = new Rarefact(thisItersLookup, rDisplays);
563 rCurve->getSharedCurve(freq, nIters);
567 for (int i = 0; i < thisItersLookup.size(); i++) { delete thisItersLookup[i]; }
568 thisItersLookup.clear();
569 for(int i=0;i<rDisplays.size();i++){ delete rDisplays[i]; }
572 //create std and ave outputs
573 vector< vector< vector< double > > > results; //iter -> numSampled -> data
574 for (map<string, vector<string> >::iterator it = filenames.begin(); it != filenames.end(); it++) {
575 vector<string> thisTypesFiles = it->second;
576 vector<string> columnHeaders;
577 for (int i = 0; i < thisTypesFiles.size(); i++) {
579 m->openInputFile(thisTypesFiles[i], in);
581 string headers = m->getline(in); m->gobble(in);
582 columnHeaders = m->splitWhiteSpace(headers);
583 int numCols = columnHeaders.size();
585 vector<vector<double> > thisFilesLines;
587 if (m->control_pressed) { break; }
588 vector<double> data; data.resize(numCols, 0);
589 //read numSampled line
590 for (int j = 0; j < numCols; j++) { in >> data[j]; m->gobble(in); }
591 thisFilesLines.push_back(data);
594 results.push_back(thisFilesLines);
595 m->mothurRemove(thisTypesFiles[i]);
598 if (!m->control_pressed) {
600 map<string, string> variables; variables["[filename]"] = fileNameRoot + "ave-std." + thisLookup[0]->getLabel() + ".";
602 string outputFile = getOutputFileName(it->first,variables);
604 m->openOutputFile(outputFile, out);
605 outputNames.push_back(outputFile); outputTypes[it->first].push_back(outputFile);
607 out << columnHeaders[0] << '\t' << "method\t";
608 for (int i = 1; i < columnHeaders.size(); i++) { out << columnHeaders[i] << '\t'; }
611 vector< vector<double> > aveResults; aveResults.resize(results[0].size());
612 for (int i = 0; i < aveResults.size(); i++) { aveResults[i].resize(results[0][i].size(), 0.0); }
614 for (int thisIter = 0; thisIter < iters; thisIter++) { //sum all groups dists for each calculator
615 for (int i = 0; i < aveResults.size(); i++) { //initialize sums to zero.
616 aveResults[i][0] = results[thisIter][i][0];
617 for (int j = 1; j < aveResults[i].size(); j++) {
618 aveResults[i][j] += results[thisIter][i][j];
623 for (int i = 0; i < aveResults.size(); i++) { //finds average.
624 for (int j = 1; j < aveResults[i].size(); j++) {
625 aveResults[i][j] /= (float) iters;
630 vector< vector<double> > stdResults; stdResults.resize(results[0].size());
631 for (int i = 0; i < stdResults.size(); i++) { stdResults[i].resize(results[0][i].size(), 0.0); }
633 for (int thisIter = 0; thisIter < iters; thisIter++) { //compute the difference of each dist from the mean, and square the result of each
634 for (int i = 0; i < stdResults.size(); i++) {
635 stdResults[i][0] = aveResults[i][0];
636 for (int j = 1; j < stdResults[i].size(); j++) {
637 stdResults[i][j] += ((results[thisIter][i][j] - aveResults[i][j]) * (results[thisIter][i][j] - aveResults[i][j]));
642 for (int i = 0; i < stdResults.size(); i++) { //finds average.
643 out << aveResults[i][0] << '\t' << "ave\t";
644 for (int j = 1; j < aveResults[i].size(); j++) { out << aveResults[i][j] << '\t'; }
646 out << stdResults[i][0] << '\t' << "std\t";
647 for (int j = 1; j < stdResults[i].size(); j++) {
648 stdResults[i][j] /= (float) iters;
649 stdResults[i][j] = sqrt(stdResults[i][j]);
650 out << stdResults[i][j] << '\t';
661 catch(exception& e) {
662 m->errorOut(e, "RareFactSharedCommand", "subsample");
666 //**********************************************************************************************************************
667 vector<string> RareFactSharedCommand::createGroupFile(vector<string>& outputNames) {
670 vector<string> newFileNames;
672 //find different types of files
673 map<string, map<string, string> > typesFiles;
674 map<string, vector< vector<string> > > fileLabels; //combofile name to labels. each label is a vector because it may be unique lci hci.
675 vector<string> groupNames;
676 for (int i = 0; i < outputNames.size(); i++) {
678 string extension = m->getExtension(outputNames[i]);
679 string combineFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + "groups" + extension;
680 m->mothurRemove(combineFileName); //remove old file
683 m->openInputFile(outputNames[i], in);
685 string labels = m->getline(in);
687 istringstream iss (labels,istringstream::in);
688 string newLabel = ""; vector<string> theseLabels;
689 while(!iss.eof()) { iss >> newLabel; m->gobble(iss); theseLabels.push_back(newLabel); }
690 vector< vector<string> > allLabels;
691 vector<string> thisSet; thisSet.push_back(theseLabels[0]); allLabels.push_back(thisSet); thisSet.clear(); //makes "numSampled" its own grouping
692 for (int j = 1; j < theseLabels.size()-1; j++) {
693 if (theseLabels[j+1] == "lci") {
694 thisSet.push_back(theseLabels[j]);
695 thisSet.push_back(theseLabels[j+1]);
696 thisSet.push_back(theseLabels[j+2]);
698 }else{ //no lci or hci for this calc.
699 thisSet.push_back(theseLabels[j]);
701 allLabels.push_back(thisSet);
704 fileLabels[combineFileName] = allLabels;
706 map<string, map<string, string> >::iterator itfind = typesFiles.find(extension);
707 if (itfind != typesFiles.end()) {
708 (itfind->second)[outputNames[i]] = file2Group[i];
710 map<string, string> temp;
711 temp[outputNames[i]] = file2Group[i];
712 typesFiles[extension] = temp;
714 if (!(m->inUsersGroups(file2Group[i], groupNames))) { groupNames.push_back(file2Group[i]); }
717 //for each type create a combo file
719 for (map<string, map<string, string> >::iterator it = typesFiles.begin(); it != typesFiles.end(); it++) {
722 string combineFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + "groups" + it->first;
723 m->openOutputFileAppend(combineFileName, out);
724 newFileNames.push_back(combineFileName);
725 map<string, string> thisTypesFiles = it->second; //it->second maps filename to group
726 set<int> numSampledSet;
728 //open each type summary file
729 map<string, map<int, vector< vector<string> > > > files; //maps file name to lines in file
731 for (map<string, string>::iterator itFileNameGroup = thisTypesFiles.begin(); itFileNameGroup != thisTypesFiles.end(); itFileNameGroup++) {
733 string thisfilename = itFileNameGroup->first;
734 string group = itFileNameGroup->second;
737 m->openInputFile(thisfilename, temp);
739 //read through first line - labels
740 m->getline(temp); m->gobble(temp);
742 map<int, vector< vector<string> > > thisFilesLines;
745 temp >> numSampled; m->gobble(temp);
747 vector< vector<string> > theseReads;
748 vector<string> thisSet; thisSet.push_back(toString(numSampled)); theseReads.push_back(thisSet); thisSet.clear();
749 for (int k = 1; k < fileLabels[combineFileName].size(); k++) { //output thing like 0.03-A lci-A hci-A
750 vector<string> reads;
752 for (int l = 0; l < fileLabels[combineFileName][k].size(); l++) { //output modified labels
753 temp >> next; m->gobble(temp);
754 reads.push_back(next);
756 theseReads.push_back(reads);
758 thisFilesLines[numSampled] = theseReads;
761 numSampledSet.insert(numSampled);
764 files[group] = thisFilesLines;
766 //save longest file for below
767 if (maxLines < thisFilesLines.size()) { maxLines = thisFilesLines.size(); }
770 m->mothurRemove(thisfilename);
773 //output new labels line
774 out << fileLabels[combineFileName][0][0] << '\t';
775 for (int k = 1; k < fileLabels[combineFileName].size(); k++) { //output thing like 0.03-A lci-A hci-A
776 for (int n = 0; n < groupNames.size(); n++) { // for each group
777 for (int l = 0; l < fileLabels[combineFileName][k].size(); l++) { //output modified labels
778 out << fileLabels[combineFileName][k][l] << '-' << groupNames[n] << '\t';
785 for (set<int>::iterator itNumSampled = numSampledSet.begin(); itNumSampled != numSampledSet.end(); itNumSampled++) {
787 out << (*itNumSampled) << '\t';
789 if (m->control_pressed) { break; }
791 for (int k = 1; k < fileLabels[combineFileName].size(); k++) { //each chunk
792 //grab data for each group
793 for (map<string, map<int, vector< vector<string> > > >::iterator itFileNameGroup = files.begin(); itFileNameGroup != files.end(); itFileNameGroup++) {
795 string group = itFileNameGroup->first;
797 map<int, vector< vector<string> > >::iterator itLine = files[group].find(*itNumSampled);
798 if (itLine != files[group].end()) {
799 for (int l = 0; l < (itLine->second)[k].size(); l++) {
800 out << (itLine->second)[k][l] << '\t';
804 for (int l = 0; l < fileLabels[combineFileName][k].size(); l++) {
815 //return combine file name
819 catch(exception& e) {
820 m->errorOut(e, "RareFactSharedCommand", "createGroupFile");
824 //**********************************************************************************************************************