2 * rarefactsharedcommand.cpp
5 * Created by Sarah Westcott on 1/6/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "rarefactsharedcommand.h"
11 #include "sharedsobs.h"
12 #include "sharednseqs.h"
13 #include "sharedutilities.h"
14 #include "subsample.h"
16 //**********************************************************************************************************************
17 vector<string> RareFactSharedCommand::setParameters(){
19 CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pshared);
20 CommandParameter pdesign("design", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pdesign);
21 CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
22 CommandParameter pfreq("freq", "Number", "", "100", "", "", "",false,false); parameters.push_back(pfreq);
23 CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
24 CommandParameter pcalc("calc", "Multiple", "sharednseqs-sharedobserved", "sharedobserved", "", "", "",true,false); parameters.push_back(pcalc);
25 CommandParameter psubsampleiters("subsampleiters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(psubsampleiters);
26 CommandParameter psubsample("subsample", "String", "", "", "", "", "",false,false); parameters.push_back(psubsample);
27 CommandParameter pjumble("jumble", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pjumble);
28 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
29 CommandParameter psets("sets", "String", "", "", "", "", "",false,false); parameters.push_back(psets);
30 CommandParameter pgroupmode("groupmode", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pgroupmode);
31 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
32 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
34 vector<string> myArray;
35 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
39 m->errorOut(e, "RareFactSharedCommand", "setParameters");
43 //**********************************************************************************************************************
44 string RareFactSharedCommand::getHelpString(){
46 string helpString = "";
47 ValidCalculators validCalculator;
48 helpString += "The rarefaction.shared command parameters are shared, design, label, iters, groups, sets, jumble, groupmode and calc. shared is required if there is no current sharedfile. \n";
49 helpString += "The design parameter allows you to assign your groups to sets. If provided mothur will run rarefaction.shared on a per set basis. \n";
50 helpString += "The sets parameter allows you to specify which of the sets in your designfile you would like to analyze. The set names are separated by dashes. THe default is all sets in the designfile.\n";
51 helpString += "The rarefaction command should be in the following format: \n";
52 helpString += "rarefaction.shared(label=yourLabel, iters=yourIters, calc=yourEstimators, jumble=yourJumble, groups=yourGroups).\n";
53 helpString += "The freq parameter is used indicate when to output your data, by default it is set to 100. But you can set it to a percentage of the number of sequence. For example freq=0.10, means 10%. \n";
54 helpString += "Example rarefaction.shared(label=unique-0.01-0.03, iters=10000, groups=B-C, jumble=T, calc=sharedobserved).\n";
55 helpString += "The default values for iters is 1000, freq is 100, and calc is sharedobserved which calculates the shared rarefaction curve for the observed richness.\n";
56 helpString += "The subsampleiters parameter allows you to choose the number of times you would like to run the subsample.\n";
57 helpString += "The subsample parameter allows you to enter the size pergroup of the sample or you can set subsample=T and mothur will use the size of your smallest group.\n";
58 helpString += "The default value for groups is all the groups in your groupfile, and jumble is true.\n";
59 helpString += validCalculator.printCalc("sharedrarefaction");
60 helpString += "The label parameter is used to analyze specific labels in your input.\n";
61 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 2 valid groups.\n";
62 helpString += "Note: No spaces between parameter labels (i.e. freq), '=' and parameters (i.e.yourFreq).\n";
66 m->errorOut(e, "RareFactSharedCommand", "getHelpString");
70 //**********************************************************************************************************************
71 string RareFactSharedCommand::getOutputFileNameTag(string type, string inputName=""){
73 string outputFileName = "";
74 map<string, vector<string> >::iterator it;
76 //is this a type this command creates
77 it = outputTypes.find(type);
78 if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
80 if (type == "sharedrarefaction") { outputFileName = "shared.rarefaction"; }
81 else if (type == "sharedr_nseqs") { outputFileName = "shared.r_nseqs"; }
82 else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
84 return outputFileName;
87 m->errorOut(e, "RareFactSharedCommand", "getOutputFileNameTag");
91 //**********************************************************************************************************************
92 RareFactSharedCommand::RareFactSharedCommand(){
94 abort = true; calledHelp = true;
96 vector<string> tempOutNames;
97 outputTypes["sharedrarefaction"] = tempOutNames;
98 outputTypes["sharedr_nseqs"] = tempOutNames;
100 catch(exception& e) {
101 m->errorOut(e, "RareFactSharedCommand", "RareFactSharedCommand");
105 //**********************************************************************************************************************
107 RareFactSharedCommand::RareFactSharedCommand(string option) {
109 abort = false; calledHelp = false;
112 //allow user to run help
113 if(option == "help") { help(); abort = true; calledHelp = true; }
114 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
117 vector<string> myArray = setParameters();
119 OptionParser parser(option);
120 map<string,string> parameters = parser.getParameters();
121 map<string,string>::iterator it;
123 ValidParameters validParameter;
125 //check to make sure all parameters are valid for command
126 for (it = parameters.begin(); it != parameters.end(); it++) {
127 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
130 //initialize outputTypes
131 vector<string> tempOutNames;
132 outputTypes["sharedrarefaction"] = tempOutNames;
133 outputTypes["sharedr_nseqs"] = tempOutNames;
135 //if the user changes the input directory command factory will send this info to us in the output parameter
136 string inputDir = validParameter.validFile(parameters, "inputdir", false);
137 if (inputDir == "not found"){ inputDir = ""; }
140 it = parameters.find("shared");
141 //user has given a template file
142 if(it != parameters.end()){
143 path = m->hasPath(it->second);
144 //if the user has not given a path then, add inputdir. else leave path alone.
145 if (path == "") { parameters["shared"] = inputDir + it->second; }
148 it = parameters.find("design");
149 //user has given a template file
150 if(it != parameters.end()){
151 path = m->hasPath(it->second);
152 //if the user has not given a path then, add inputdir. else leave path alone.
153 if (path == "") { parameters["design"] = inputDir + it->second; }
159 sharedfile = validParameter.validFile(parameters, "shared", true);
160 if (sharedfile == "not open") { sharedfile = ""; abort = true; }
161 else if (sharedfile == "not found") {
162 //if there is a current shared file, use it
163 sharedfile = m->getSharedFile();
164 if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
165 else { m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; }
166 }else { m->setSharedFile(sharedfile); }
168 designfile = validParameter.validFile(parameters, "design", true);
169 if (designfile == "not open") { abort = true; designfile = ""; }
170 else if (designfile == "not found") { designfile = ""; }
171 else { m->setDesignFile(designfile); }
174 //if the user changes the output directory command factory will send this info to us in the output parameter
175 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = m->hasPath(sharedfile); }
178 //check for optional parameter and set defaults
179 // ...at some point should added some additional type checking...
180 label = validParameter.validFile(parameters, "label", false);
181 if (label == "not found") { label = ""; }
183 if(label != "all") { m->splitAtDash(label, labels); allLines = 0; }
184 else { allLines = 1; }
188 calc = validParameter.validFile(parameters, "calc", false);
189 if (calc == "not found") { calc = "sharedobserved"; }
191 if (calc == "default") { calc = "sharedobserved"; }
193 m->splitAtDash(calc, Estimators);
194 if (m->inUsersGroups("citation", Estimators)) {
195 ValidCalculators validCalc; validCalc.printCitations(Estimators);
196 //remove citation from list of calcs
197 for (int i = 0; i < Estimators.size(); i++) { if (Estimators[i] == "citation") { Estimators.erase(Estimators.begin()+i); break; } }
200 groups = validParameter.validFile(parameters, "groups", false);
201 if (groups == "not found") { groups = ""; }
203 m->splitAtDash(groups, Groups);
205 m->setGroups(Groups);
207 string sets = validParameter.validFile(parameters, "sets", false);
208 if (sets == "not found") { sets = ""; }
210 m->splitAtDash(sets, Sets);
214 temp = validParameter.validFile(parameters, "freq", false); if (temp == "not found") { temp = "100"; }
215 m->mothurConvert(temp, freq);
217 temp = validParameter.validFile(parameters, "iters", false); if (temp == "not found") { temp = "1000"; }
218 m->mothurConvert(temp, nIters);
220 temp = validParameter.validFile(parameters, "jumble", false); if (temp == "not found") { temp = "T"; }
221 if (m->isTrue(temp)) { jumble = true; }
222 else { jumble = false; }
225 temp = validParameter.validFile(parameters, "groupmode", false); if (temp == "not found") { temp = "T"; }
226 groupMode = m->isTrue(temp);
228 temp = validParameter.validFile(parameters, "subsampleiters", false); if (temp == "not found") { temp = "1000"; }
229 m->mothurConvert(temp, iters);
231 temp = validParameter.validFile(parameters, "subsample", false); if (temp == "not found") { temp = "F"; }
232 if (m->isNumeric1(temp)) { m->mothurConvert(temp, subsampleSize); subsample = true; }
234 if (m->isTrue(temp)) { subsample = true; subsampleSize = -1; } //we will set it to smallest group later
235 else { subsample = false; }
238 if (subsample == false) { iters = 1; }
243 catch(exception& e) {
244 m->errorOut(e, "RareFactSharedCommand", "RareFactSharedCommand");
248 //**********************************************************************************************************************
250 int RareFactSharedCommand::execute(){
253 if (abort == true) { if (calledHelp) { return 0; } return 2; }
256 if (designfile == "") { //fake out designMap to run with process
257 process(designMap, "");
259 designMap.readDesignMap(designfile);
261 //fill Sets - checks for "all" and for any typo groups
263 vector<string> nameSets = designMap.getNamesOfGroups();
264 util.setGroups(Sets, nameSets);
266 for (int i = 0; i < Sets.size(); i++) {
267 process(designMap, Sets[i]);
270 if (groupMode) { outputNames = createGroupFile(outputNames); }
273 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
275 m->mothurOutEndLine();
276 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
277 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
278 m->mothurOutEndLine();
282 catch(exception& e) {
283 m->errorOut(e, "RareFactSharedCommand", "execute");
287 //**********************************************************************************************************************
289 int RareFactSharedCommand::process(GroupMap& designMap, string thisSet){
292 vector<Display*> rDisplays;
294 InputData input(sharedfile, "sharedfile");
295 lookup = input.getSharedRAbundVectors();
296 if (lookup.size() < 2) {
297 m->mothurOut("I cannot run the command without at least 2 valid groups.");
298 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
302 string fileNameRoot = outputDir + m->getRootName(m->getSimpleName(sharedfile));
304 vector<string> newGroups = m->getGroups();
305 if (thisSet != "") { //make groups only filled with groups from this set so that's all inputdata will read
306 vector<string> thisSets; thisSets.push_back(thisSet);
307 newGroups = designMap.getNamesSeqs(thisSets);
308 fileNameRoot += thisSet + ".";
311 vector<SharedRAbundVector*> subset;
312 if (thisSet == "") { subset.clear(); subset = lookup; }
313 else {//fill subset with this sets groups
315 for (int i = 0; i < lookup.size(); i++) {
316 if (m->inUsersGroups(lookup[i]->getGroup(), newGroups)) {
317 subset.push_back(lookup[i]);
322 /******************************************************/
324 if (subsampleSize == -1) { //user has not set size, set size = smallest samples size
325 subsampleSize = subset[0]->getNumSeqs();
326 for (int i = 1; i < subset.size(); i++) {
327 int thisSize = subset[i]->getNumSeqs();
329 if (thisSize < subsampleSize) { subsampleSize = thisSize; }
333 vector<SharedRAbundVector*> temp;
334 for (int i = 0; i < subset.size(); i++) {
335 if (subset[i]->getNumSeqs() < subsampleSize) {
336 m->mothurOut(subset[i]->getGroup() + " contains " + toString(subset[i]->getNumSeqs()) + ". Eliminating."); m->mothurOutEndLine();
339 newGroups.push_back(subset[i]->getGroup());
340 temp.push_back(subset[i]);
346 if (subset.size() < 2) { m->mothurOut("You have not provided enough valid groups. I cannot run the command."); m->mothurOutEndLine(); m->control_pressed = true; return 0; }
348 /******************************************************/
350 ValidCalculators validCalculator;
351 for (int i=0; i<Estimators.size(); i++) {
352 if (validCalculator.isValidCalculator("sharedrarefaction", Estimators[i]) == true) {
353 if (Estimators[i] == "sharedobserved") {
354 rDisplays.push_back(new RareDisplay(new SharedSobs(), new SharedThreeColumnFile(fileNameRoot+getOutputFileNameTag("sharedrarefaction"), "")));
355 outputNames.push_back(fileNameRoot+getOutputFileNameTag("sharedrarefaction")); outputTypes["sharedrarefaction"].push_back(fileNameRoot+getOutputFileNameTag("sharedrarefaction"));
356 }else if (Estimators[i] == "sharednseqs") {
357 rDisplays.push_back(new RareDisplay(new SharedNSeqs(), new SharedThreeColumnFile(fileNameRoot+getOutputFileNameTag("sharedr_nseqs"), "")));
358 outputNames.push_back(fileNameRoot+getOutputFileNameTag("sharedr_nseqs")); outputTypes["sharedr_nseqs"].push_back(fileNameRoot+getOutputFileNameTag("sharedr_nseqs"));
361 file2Group[outputNames.size()-1] = thisSet;
364 //if the users entered no valid calculators don't execute command
365 if (rDisplays.size() == 0) { for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } return 0; }
367 if (m->control_pressed) {
368 for(int i=0;i<rDisplays.size();i++){ delete rDisplays[i]; }
369 for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); }
370 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
375 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
376 string lastLabel = subset[0]->getLabel();
377 set<string> processedLabels;
378 set<string> userLabels = labels;
380 //as long as you are not at the end of the file or done wih the lines you want
381 while((subset[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
382 if (m->control_pressed) {
383 for(int i=0;i<rDisplays.size();i++){ delete rDisplays[i]; }
384 for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); }
385 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
389 if(allLines == 1 || labels.count(subset[0]->getLabel()) == 1){
390 m->mothurOut(subset[0]->getLabel() + '\t' + thisSet); m->mothurOutEndLine();
391 rCurve = new Rarefact(subset, rDisplays);
392 rCurve->getSharedCurve(freq, nIters);
395 if (subsample) { subsampleLookup(subset, fileNameRoot); }
397 processedLabels.insert(subset[0]->getLabel());
398 userLabels.erase(subset[0]->getLabel());
401 if ((m->anyLabelsToProcess(subset[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
402 string saveLabel = subset[0]->getLabel();
404 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
405 lookup = input.getSharedRAbundVectors(lastLabel);
407 if (thisSet == "") { subset.clear(); subset = lookup; }
408 else {//fill subset with this sets groups
410 for (int i = 0; i < lookup.size(); i++) {
411 if (m->inUsersGroups(lookup[i]->getGroup(), newGroups)) {
412 subset.push_back(lookup[i]);
417 m->mothurOut(subset[0]->getLabel() + '\t' + thisSet); m->mothurOutEndLine();
418 rCurve = new Rarefact(subset, rDisplays);
419 rCurve->getSharedCurve(freq, nIters);
422 if (subsample) { subsampleLookup(subset, fileNameRoot); }
424 processedLabels.insert(subset[0]->getLabel());
425 userLabels.erase(subset[0]->getLabel());
427 //restore real lastlabel to save below
428 subset[0]->setLabel(saveLabel);
432 lastLabel = subset[0]->getLabel();
434 //get next line to process
435 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
436 lookup = input.getSharedRAbundVectors();
438 if (lookup[0] != NULL) {
439 if (thisSet == "") { subset.clear(); subset = lookup; }
440 else {//fill subset with this sets groups
442 for (int i = 0; i < lookup.size(); i++) {
443 if (m->inUsersGroups(lookup[i]->getGroup(), newGroups)) {
444 subset.push_back(lookup[i]);
448 }else { subset.clear(); subset.push_back(NULL); }
452 if (m->control_pressed) {
453 for(int i=0;i<rDisplays.size();i++){ delete rDisplays[i]; }
454 for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); }
458 //output error messages about any remaining user labels
459 set<string>::iterator it;
460 bool needToRun = false;
461 for (it = userLabels.begin(); it != userLabels.end(); it++) {
462 m->mothurOut("Your file does not include the label " + *it);
463 if (processedLabels.count(lastLabel) != 1) {
464 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
467 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
471 if (m->control_pressed) {
472 for(int i=0;i<rDisplays.size();i++){ delete rDisplays[i]; }
473 for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); }
477 //run last label if you need to
478 if (needToRun == true) {
479 for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }
480 lookup = input.getSharedRAbundVectors(lastLabel);
482 if (thisSet == "") { subset.clear(); subset = lookup; }
483 else {//fill subset with this sets groups
485 for (int i = 0; i < lookup.size(); i++) {
486 if (m->inUsersGroups(lookup[i]->getGroup(), newGroups)) {
487 subset.push_back(lookup[i]);
492 m->mothurOut(subset[0]->getLabel() + '\t' + thisSet); m->mothurOutEndLine();
493 rCurve = new Rarefact(subset, rDisplays);
494 rCurve->getSharedCurve(freq, nIters);
497 if (subsample) { subsampleLookup(subset, fileNameRoot); }
499 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
502 for(int i=0;i<rDisplays.size();i++){ delete rDisplays[i]; }
507 catch(exception& e) {
508 m->errorOut(e, "RareFactSharedCommand", "process");
512 //**********************************************************************************************************************
513 int RareFactSharedCommand::subsampleLookup(vector<SharedRAbundVector*>& thisLookup, string fileNameRoot) {
516 map<string, vector<string> > filenames;
517 for (int thisIter = 0; thisIter < iters; thisIter++) {
519 vector<SharedRAbundVector*> thisItersLookup = thisLookup;
521 //we want the summary results for the whole dataset, then the subsampling
523 vector<string> tempLabels; //dont need since we arent printing the sampled sharedRabunds
525 //make copy of lookup so we don't get access violations
526 vector<SharedRAbundVector*> newLookup;
527 for (int k = 0; k < thisItersLookup.size(); k++) {
528 SharedRAbundVector* temp = new SharedRAbundVector();
529 temp->setLabel(thisItersLookup[k]->getLabel());
530 temp->setGroup(thisItersLookup[k]->getGroup());
531 newLookup.push_back(temp);
535 for (int k = 0; k < thisItersLookup[0]->getNumBins(); k++) {
536 if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } return 0; }
537 for (int j = 0; j < thisItersLookup.size(); j++) { newLookup[j]->push_back(thisItersLookup[j]->getAbundance(k), thisItersLookup[j]->getGroup()); }
540 tempLabels = sample.getSample(newLookup, subsampleSize);
541 thisItersLookup = newLookup;
545 vector<Display*> rDisplays;
547 string thisfileNameRoot = fileNameRoot + toString(thisIter);
549 ValidCalculators validCalculator;
550 for (int i=0; i<Estimators.size(); i++) {
551 if (validCalculator.isValidCalculator("sharedrarefaction", Estimators[i]) == true) {
552 if (Estimators[i] == "sharedobserved") {
553 rDisplays.push_back(new RareDisplay(new SharedSobs(), new SharedThreeColumnFile(thisfileNameRoot+getOutputFileNameTag("sharedrarefaction"), "")));
554 filenames["sharedrarefaction"].push_back(thisfileNameRoot+getOutputFileNameTag("sharedrarefaction"));
555 }else if (Estimators[i] == "sharednseqs") {
556 rDisplays.push_back(new RareDisplay(new SharedNSeqs(), new SharedThreeColumnFile(thisfileNameRoot+getOutputFileNameTag("sharedr_nseqs"), "")));
557 filenames["sharedr_nseqs"].push_back(thisfileNameRoot+getOutputFileNameTag("sharedr_nseqs"));
562 rCurve = new Rarefact(thisItersLookup, rDisplays);
563 rCurve->getSharedCurve(freq, nIters);
567 for (int i = 0; i < thisItersLookup.size(); i++) { delete thisItersLookup[i]; }
568 thisItersLookup.clear();
569 for(int i=0;i<rDisplays.size();i++){ delete rDisplays[i]; }
572 //create std and ave outputs
573 vector< vector< vector< double > > > results; //iter -> numSampled -> data
574 for (map<string, vector<string> >::iterator it = filenames.begin(); it != filenames.end(); it++) {
575 vector<string> thisTypesFiles = it->second;
576 vector<string> columnHeaders;
577 for (int i = 0; i < thisTypesFiles.size(); i++) {
579 m->openInputFile(thisTypesFiles[i], in);
581 string headers = m->getline(in); m->gobble(in);
582 columnHeaders = m->splitWhiteSpace(headers);
583 int numCols = columnHeaders.size();
585 vector<vector<double> > thisFilesLines;
587 if (m->control_pressed) { break; }
588 vector<double> data; data.resize(numCols, 0);
589 //read numSampled line
590 for (int j = 0; j < numCols; j++) { in >> data[j]; m->gobble(in); }
591 thisFilesLines.push_back(data);
594 results.push_back(thisFilesLines);
595 m->mothurRemove(thisTypesFiles[i]);
598 if (!m->control_pressed) {
600 string outputFile = fileNameRoot + "ave-std." + thisLookup[0]->getLabel() + "." + getOutputFileNameTag(it->first);
602 m->openOutputFile(outputFile, out);
603 outputNames.push_back(outputFile); outputTypes[it->first].push_back(outputFile);
605 out << columnHeaders[0] << '\t' << "method\t";
606 for (int i = 1; i < columnHeaders.size(); i++) { out << columnHeaders[i] << '\t'; }
609 vector< vector<double> > aveResults; aveResults.resize(results[0].size());
610 for (int i = 0; i < aveResults.size(); i++) { aveResults[i].resize(results[0][i].size(), 0.0); }
612 for (int thisIter = 0; thisIter < iters; thisIter++) { //sum all groups dists for each calculator
613 for (int i = 0; i < aveResults.size(); i++) { //initialize sums to zero.
614 aveResults[i][0] = results[thisIter][i][0];
615 for (int j = 1; j < aveResults[i].size(); j++) {
616 aveResults[i][j] += results[thisIter][i][j];
621 for (int i = 0; i < aveResults.size(); i++) { //finds average.
622 for (int j = 1; j < aveResults[i].size(); j++) {
623 aveResults[i][j] /= (float) iters;
628 vector< vector<double> > stdResults; stdResults.resize(results[0].size());
629 for (int i = 0; i < stdResults.size(); i++) { stdResults[i].resize(results[0][i].size(), 0.0); }
631 for (int thisIter = 0; thisIter < iters; thisIter++) { //compute the difference of each dist from the mean, and square the result of each
632 for (int i = 0; i < stdResults.size(); i++) {
633 stdResults[i][0] = aveResults[i][0];
634 for (int j = 1; j < stdResults[i].size(); j++) {
635 stdResults[i][j] += ((results[thisIter][i][j] - aveResults[i][j]) * (results[thisIter][i][j] - aveResults[i][j]));
640 for (int i = 0; i < stdResults.size(); i++) { //finds average.
641 out << aveResults[i][0] << '\t' << "ave\t";
642 for (int j = 1; j < aveResults[i].size(); j++) { out << aveResults[i][j] << '\t'; }
644 out << stdResults[i][0] << '\t' << "std\t";
645 for (int j = 1; j < stdResults[i].size(); j++) {
646 stdResults[i][j] /= (float) iters;
647 stdResults[i][j] = sqrt(stdResults[i][j]);
648 out << stdResults[i][j] << '\t';
659 catch(exception& e) {
660 m->errorOut(e, "RareFactSharedCommand", "subsample");
664 //**********************************************************************************************************************
665 vector<string> RareFactSharedCommand::createGroupFile(vector<string>& outputNames) {
668 vector<string> newFileNames;
670 //find different types of files
671 map<string, map<string, string> > typesFiles;
672 map<string, vector< vector<string> > > fileLabels; //combofile name to labels. each label is a vector because it may be unique lci hci.
673 vector<string> groupNames;
674 for (int i = 0; i < outputNames.size(); i++) {
676 string extension = m->getExtension(outputNames[i]);
677 string combineFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + "groups" + extension;
678 m->mothurRemove(combineFileName); //remove old file
681 m->openInputFile(outputNames[i], in);
683 string labels = m->getline(in);
685 istringstream iss (labels,istringstream::in);
686 string newLabel = ""; vector<string> theseLabels;
687 while(!iss.eof()) { iss >> newLabel; m->gobble(iss); theseLabels.push_back(newLabel); }
688 vector< vector<string> > allLabels;
689 vector<string> thisSet; thisSet.push_back(theseLabels[0]); allLabels.push_back(thisSet); thisSet.clear(); //makes "numSampled" its own grouping
690 for (int j = 1; j < theseLabels.size()-1; j++) {
691 if (theseLabels[j+1] == "lci") {
692 thisSet.push_back(theseLabels[j]);
693 thisSet.push_back(theseLabels[j+1]);
694 thisSet.push_back(theseLabels[j+2]);
696 }else{ //no lci or hci for this calc.
697 thisSet.push_back(theseLabels[j]);
699 allLabels.push_back(thisSet);
702 fileLabels[combineFileName] = allLabels;
704 map<string, map<string, string> >::iterator itfind = typesFiles.find(extension);
705 if (itfind != typesFiles.end()) {
706 (itfind->second)[outputNames[i]] = file2Group[i];
708 map<string, string> temp;
709 temp[outputNames[i]] = file2Group[i];
710 typesFiles[extension] = temp;
712 if (!(m->inUsersGroups(file2Group[i], groupNames))) { groupNames.push_back(file2Group[i]); }
715 //for each type create a combo file
717 for (map<string, map<string, string> >::iterator it = typesFiles.begin(); it != typesFiles.end(); it++) {
720 string combineFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + "groups" + it->first;
721 m->openOutputFileAppend(combineFileName, out);
722 newFileNames.push_back(combineFileName);
723 map<string, string> thisTypesFiles = it->second; //it->second maps filename to group
724 set<int> numSampledSet;
726 //open each type summary file
727 map<string, map<int, vector< vector<string> > > > files; //maps file name to lines in file
729 for (map<string, string>::iterator itFileNameGroup = thisTypesFiles.begin(); itFileNameGroup != thisTypesFiles.end(); itFileNameGroup++) {
731 string thisfilename = itFileNameGroup->first;
732 string group = itFileNameGroup->second;
735 m->openInputFile(thisfilename, temp);
737 //read through first line - labels
738 m->getline(temp); m->gobble(temp);
740 map<int, vector< vector<string> > > thisFilesLines;
743 temp >> numSampled; m->gobble(temp);
745 vector< vector<string> > theseReads;
746 vector<string> thisSet; thisSet.push_back(toString(numSampled)); theseReads.push_back(thisSet); thisSet.clear();
747 for (int k = 1; k < fileLabels[combineFileName].size(); k++) { //output thing like 0.03-A lci-A hci-A
748 vector<string> reads;
750 for (int l = 0; l < fileLabels[combineFileName][k].size(); l++) { //output modified labels
751 temp >> next; m->gobble(temp);
752 reads.push_back(next);
754 theseReads.push_back(reads);
756 thisFilesLines[numSampled] = theseReads;
759 numSampledSet.insert(numSampled);
762 files[group] = thisFilesLines;
764 //save longest file for below
765 if (maxLines < thisFilesLines.size()) { maxLines = thisFilesLines.size(); }
768 m->mothurRemove(thisfilename);
771 //output new labels line
772 out << fileLabels[combineFileName][0][0] << '\t';
773 for (int k = 1; k < fileLabels[combineFileName].size(); k++) { //output thing like 0.03-A lci-A hci-A
774 for (int n = 0; n < groupNames.size(); n++) { // for each group
775 for (int l = 0; l < fileLabels[combineFileName][k].size(); l++) { //output modified labels
776 out << fileLabels[combineFileName][k][l] << '-' << groupNames[n] << '\t';
783 for (set<int>::iterator itNumSampled = numSampledSet.begin(); itNumSampled != numSampledSet.end(); itNumSampled++) {
785 out << (*itNumSampled) << '\t';
787 if (m->control_pressed) { break; }
789 for (int k = 1; k < fileLabels[combineFileName].size(); k++) { //each chunk
790 //grab data for each group
791 for (map<string, map<int, vector< vector<string> > > >::iterator itFileNameGroup = files.begin(); itFileNameGroup != files.end(); itFileNameGroup++) {
793 string group = itFileNameGroup->first;
795 map<int, vector< vector<string> > >::iterator itLine = files[group].find(*itNumSampled);
796 if (itLine != files[group].end()) {
797 for (int l = 0; l < (itLine->second)[k].size(); l++) {
798 out << (itLine->second)[k][l] << '\t';
802 for (int l = 0; l < fileLabels[combineFileName][k].size(); l++) {
813 //return combine file name
817 catch(exception& e) {
818 m->errorOut(e, "RareFactSharedCommand", "createGroupFile");
822 //**********************************************************************************************************************