]> git.donarmstrong.com Git - mothur.git/blob - rarefactsharedcommand.cpp
working on pam
[mothur.git] / rarefactsharedcommand.cpp
1 /*
2  *  rarefactsharedcommand.cpp
3  *  Dotur
4  *
5  *  Created by Sarah Westcott on 1/6/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "rarefactsharedcommand.h"
11 #include "sharedsobs.h"
12 #include "sharednseqs.h"
13 #include "sharedutilities.h"
14 #include "subsample.h"
15
16 //**********************************************************************************************************************
17 vector<string> RareFactSharedCommand::setParameters(){  
18         try {
19                 CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(pshared);
20         CommandParameter pdesign("design", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(pdesign);
21                 CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
22                 CommandParameter pfreq("freq", "Number", "", "100", "", "", "","",false,false); parameters.push_back(pfreq);
23                 CommandParameter piters("iters", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(piters);
24                 CommandParameter pcalc("calc", "Multiple", "sharednseqs-sharedobserved", "sharedobserved", "", "", "","",true,false,true); parameters.push_back(pcalc);
25         CommandParameter psubsampleiters("subsampleiters", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(psubsampleiters);
26         CommandParameter psubsample("subsample", "String", "", "", "", "", "","",false,false); parameters.push_back(psubsample);
27                 CommandParameter pjumble("jumble", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pjumble);
28                 CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
29         CommandParameter psets("sets", "String", "", "", "", "", "","",false,false); parameters.push_back(psets);
30                 CommandParameter pgroupmode("groupmode", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pgroupmode);
31         CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
32                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
33                 
34                 vector<string> myArray;
35                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
36                 return myArray;
37         }
38         catch(exception& e) {
39                 m->errorOut(e, "RareFactSharedCommand", "setParameters");
40                 exit(1);
41         }
42 }
43 //**********************************************************************************************************************
44 string RareFactSharedCommand::getHelpString(){  
45         try {
46                 string helpString = "";
47                 ValidCalculators validCalculator;
48                 helpString += "The rarefaction.shared command parameters are shared, design, label, iters, groups, sets, jumble, groupmode and calc.  shared is required if there is no current sharedfile. \n";
49         helpString += "The design parameter allows you to assign your groups to sets. If provided mothur will run rarefaction.shared on a per set basis. \n";
50         helpString += "The sets parameter allows you to specify which of the sets in your designfile you would like to analyze. The set names are separated by dashes. THe default is all sets in the designfile.\n";
51                 helpString += "The rarefaction command should be in the following format: \n";
52                 helpString += "rarefaction.shared(label=yourLabel, iters=yourIters, calc=yourEstimators, jumble=yourJumble, groups=yourGroups).\n";
53                 helpString += "The freq parameter is used indicate when to output your data, by default it is set to 100. But you can set it to a percentage of the number of sequence. For example freq=0.10, means 10%. \n";
54                 helpString += "Example rarefaction.shared(label=unique-0.01-0.03,  iters=10000, groups=B-C, jumble=T, calc=sharedobserved).\n";
55                 helpString += "The default values for iters is 1000, freq is 100, and calc is sharedobserved which calculates the shared rarefaction curve for the observed richness.\n";
56         helpString += "The subsampleiters parameter allows you to choose the number of times you would like to run the subsample.\n";
57         helpString += "The subsample parameter allows you to enter the size pergroup of the sample or you can set subsample=T and mothur will use the size of your smallest group.\n";
58                 helpString += "The default value for groups is all the groups in your groupfile, and jumble is true.\n";
59                 helpString += validCalculator.printCalc("sharedrarefaction");
60                 helpString += "The label parameter is used to analyze specific labels in your input.\n";
61                 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed.  You must enter at least 2 valid groups.\n";
62                 helpString += "Note: No spaces between parameter labels (i.e. freq), '=' and parameters (i.e.yourFreq).\n";
63                 return helpString;
64         }
65         catch(exception& e) {
66                 m->errorOut(e, "RareFactSharedCommand", "getHelpString");
67                 exit(1);
68         }
69 }
70 //**********************************************************************************************************************
71 string RareFactSharedCommand::getOutputPattern(string type) {
72     try {
73         string pattern = "";
74         
75         if (type == "sharedrarefaction") {  pattern = "[filename],shared.rarefaction"; }
76         else if (type == "sharedr_nseqs") {  pattern = "[filename],shared.r_nseqs"; }
77         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
78         
79         return pattern;
80     }
81     catch(exception& e) {
82         m->errorOut(e, "RareFactSharedCommand", "getOutputPattern");
83         exit(1);
84     }
85 }
86 //**********************************************************************************************************************
87 RareFactSharedCommand::RareFactSharedCommand(){ 
88         try {
89                 abort = true; calledHelp = true; 
90                 setParameters();
91                 vector<string> tempOutNames;
92                 outputTypes["sharedrarefaction"] = tempOutNames;
93                 outputTypes["sharedr_nseqs"] = tempOutNames;
94         }
95         catch(exception& e) {
96                 m->errorOut(e, "RareFactSharedCommand", "RareFactSharedCommand");
97                 exit(1);
98         }
99 }
100 //**********************************************************************************************************************
101
102 RareFactSharedCommand::RareFactSharedCommand(string option)  {
103         try {
104                 abort = false; calledHelp = false;   
105                 allLines = 1;
106                                 
107                 //allow user to run help
108                 if(option == "help") {  help(); abort = true; calledHelp = true; }
109                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
110                 
111                 else {
112                         vector<string> myArray = setParameters();
113                         
114                         OptionParser parser(option);
115                         map<string,string> parameters = parser.getParameters();
116                         map<string,string>::iterator it;
117                         
118                         ValidParameters validParameter;
119                         
120                         //check to make sure all parameters are valid for command
121                         for (it = parameters.begin(); it != parameters.end(); it++) { 
122                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
123                         }
124                         
125                         //initialize outputTypes
126                         vector<string> tempOutNames;
127                         outputTypes["sharedrarefaction"] = tempOutNames;
128                         outputTypes["sharedr_nseqs"] = tempOutNames;
129                         
130                         //if the user changes the input directory command factory will send this info to us in the output parameter 
131                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
132                         if (inputDir == "not found"){   inputDir = "";          }
133                         else {
134                                 string path;
135                                 it = parameters.find("shared");
136                                 //user has given a template file
137                                 if(it != parameters.end()){ 
138                                         path = m->hasPath(it->second);
139                                         //if the user has not given a path then, add inputdir. else leave path alone.
140                                         if (path == "") {       parameters["shared"] = inputDir + it->second;           }
141                                 }
142                 
143                 it = parameters.find("design");
144                                 //user has given a template file
145                                 if(it != parameters.end()){ 
146                                         path = m->hasPath(it->second);
147                                         //if the user has not given a path then, add inputdir. else leave path alone.
148                                         if (path == "") {       parameters["design"] = inputDir + it->second;           }
149                                 }
150
151                         }
152                         
153                         //get shared file
154                         sharedfile = validParameter.validFile(parameters, "shared", true);
155                         if (sharedfile == "not open") { sharedfile = ""; abort = true; }        
156                         else if (sharedfile == "not found") { 
157                                 //if there is a current shared file, use it
158                                 sharedfile = m->getSharedFile(); 
159                                 if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
160                                 else {  m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; }
161                         }else { m->setSharedFile(sharedfile); }
162             
163             designfile = validParameter.validFile(parameters, "design", true);
164                         if (designfile == "not open") { abort = true; designfile = ""; }
165                         else if (designfile == "not found") {   designfile = "";        }
166                         else { m->setDesignFile(designfile); }
167                         
168                         
169                         //if the user changes the output directory command factory will send this info to us in the output parameter 
170                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(sharedfile);             }
171                         
172                         
173                         //check for optional parameter and set defaults
174                         // ...at some point should added some additional type checking...
175                         label = validParameter.validFile(parameters, "label", false);                   
176                         if (label == "not found") { label = ""; }
177                         else { 
178                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
179                                 else { allLines = 1;  }
180                         }
181                         
182                                 
183                         calc = validParameter.validFile(parameters, "calc", false);                     
184                         if (calc == "not found") { calc = "sharedobserved";  }
185                         else { 
186                                  if (calc == "default")  {  calc = "sharedobserved";  }
187                         }
188                         m->splitAtDash(calc, Estimators);
189                         if (m->inUsersGroups("citation", Estimators)) { 
190                                 ValidCalculators validCalc; validCalc.printCitations(Estimators); 
191                                 //remove citation from list of calcs
192                                 for (int i = 0; i < Estimators.size(); i++) { if (Estimators[i] == "citation") {  Estimators.erase(Estimators.begin()+i); break; } }
193                         }
194                         
195                         groups = validParameter.validFile(parameters, "groups", false);                 
196                         if (groups == "not found") { groups = ""; }
197                         else { 
198                                 m->splitAtDash(groups, Groups);
199                         }
200                         m->setGroups(Groups);
201             
202             string sets = validParameter.validFile(parameters, "sets", false);                  
203                         if (sets == "not found") { sets = ""; }
204                         else { 
205                                 m->splitAtDash(sets, Sets);
206                         }
207                         
208                         string temp;
209                         temp = validParameter.validFile(parameters, "freq", false);                     if (temp == "not found") { temp = "100"; }
210                         m->mothurConvert(temp, freq); 
211                         
212                         temp = validParameter.validFile(parameters, "iters", false);                    if (temp == "not found") { temp = "1000"; }
213                         m->mothurConvert(temp, nIters); 
214                         
215                         temp = validParameter.validFile(parameters, "jumble", false);                   if (temp == "not found") { temp = "T"; }
216                         if (m->isTrue(temp)) { jumble = true; }
217                         else { jumble = false; }
218                         m->jumble = jumble;
219             
220             temp = validParameter.validFile(parameters, "groupmode", false);            if (temp == "not found") { temp = "T"; }
221                         groupMode = m->isTrue(temp);
222             
223             temp = validParameter.validFile(parameters, "subsampleiters", false);                       if (temp == "not found") { temp = "1000"; }
224                         m->mothurConvert(temp, iters); 
225             
226             temp = validParameter.validFile(parameters, "subsample", false);            if (temp == "not found") { temp = "F"; }
227                         if (m->isNumeric1(temp)) { m->mothurConvert(temp, subsampleSize); subsample = true; }
228             else {  
229                 if (m->isTrue(temp)) { subsample = true; subsampleSize = -1; }  //we will set it to smallest group later 
230                 else { subsample = false; }
231             }
232             
233             if (subsample == false) { iters = 1; }
234                                 
235                 }
236
237         }
238         catch(exception& e) {
239                 m->errorOut(e, "RareFactSharedCommand", "RareFactSharedCommand");
240                 exit(1);
241         }
242 }
243 //**********************************************************************************************************************
244
245 int RareFactSharedCommand::execute(){
246         try {
247         
248                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
249         
250         GroupMap designMap;
251         if (designfile == "") { //fake out designMap to run with process
252             process(designMap, "");
253         }else {
254             designMap.readDesignMap(designfile);
255             
256             //fill Sets - checks for "all" and for any typo groups
257                         SharedUtil util;
258                         vector<string> nameSets = designMap.getNamesOfGroups();
259                         util.setGroups(Sets, nameSets);
260             
261             for (int i = 0; i < Sets.size(); i++) {
262                 process(designMap, Sets[i]);
263             }
264             
265             if (groupMode) { outputNames = createGroupFile(outputNames); }
266         }
267                     
268                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); } return 0; }
269                 
270                 m->mothurOutEndLine();
271                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
272                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
273                 m->mothurOutEndLine();
274
275                 return 0;
276         }
277         catch(exception& e) {
278                 m->errorOut(e, "RareFactSharedCommand", "execute");
279                 exit(1);
280         }
281 }
282 //**********************************************************************************************************************
283
284 int RareFactSharedCommand::process(GroupMap& designMap, string thisSet){
285         try {
286         Rarefact* rCurve;
287         vector<Display*> rDisplays;
288         
289         InputData input(sharedfile, "sharedfile");
290                 lookup = input.getSharedRAbundVectors();
291         if (lookup.size() < 2) { 
292                         m->mothurOut("I cannot run the command without at least 2 valid groups."); 
293             for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
294             return 0;
295         }
296         
297         string fileNameRoot = outputDir + m->getRootName(m->getSimpleName(sharedfile));
298         
299         vector<string> newGroups = m->getGroups();
300         if (thisSet != "") {  //make groups only filled with groups from this set so that's all inputdata will read
301             vector<string> thisSets; thisSets.push_back(thisSet);
302             newGroups = designMap.getNamesSeqs(thisSets);
303             fileNameRoot += thisSet + ".";
304         }
305         
306         vector<SharedRAbundVector*> subset;
307         if (thisSet == "") {  subset.clear(); subset = lookup;  }
308         else {//fill subset with this sets groups
309             subset.clear();
310             for (int i = 0; i < lookup.size(); i++) {
311                 if (m->inUsersGroups(lookup[i]->getGroup(), newGroups)) {
312                     subset.push_back(lookup[i]);
313                 }
314             }
315         }
316
317         /******************************************************/
318         if (subsample) { 
319             if (subsampleSize == -1) { //user has not set size, set size = smallest samples size
320                 subsampleSize = subset[0]->getNumSeqs();
321                 for (int i = 1; i < subset.size(); i++) {
322                     int thisSize = subset[i]->getNumSeqs();
323                     
324                     if (thisSize < subsampleSize) {     subsampleSize = thisSize;       }
325                 }
326             }else {
327                 newGroups.clear();
328                 vector<SharedRAbundVector*> temp;
329                 for (int i = 0; i < subset.size(); i++) {
330                     if (subset[i]->getNumSeqs() < subsampleSize) { 
331                         m->mothurOut(subset[i]->getGroup() + " contains " + toString(subset[i]->getNumSeqs()) + ". Eliminating."); m->mothurOutEndLine();
332                         delete subset[i];
333                     }else { 
334                         newGroups.push_back(subset[i]->getGroup()); 
335                         temp.push_back(subset[i]);
336                     }
337                 } 
338                 subset = temp;
339             }
340             
341             if (subset.size() < 2) { m->mothurOut("You have not provided enough valid groups.  I cannot run the command."); m->mothurOutEndLine(); m->control_pressed = true; return 0; }
342         }
343         /******************************************************/
344         
345         map<string, string> variables; 
346         variables["[filename]"] = fileNameRoot;
347                 ValidCalculators validCalculator;
348                 for (int i=0; i<Estimators.size(); i++) {
349                         if (validCalculator.isValidCalculator("sharedrarefaction", Estimators[i]) == true) { 
350                                 if (Estimators[i] == "sharedobserved") { 
351                                         rDisplays.push_back(new RareDisplay(new SharedSobs(), new SharedThreeColumnFile(getOutputFileName("sharedrarefaction",variables), "")));
352                                         outputNames.push_back(getOutputFileName("sharedrarefaction",variables)); outputTypes["sharedrarefaction"].push_back(getOutputFileName("sharedrarefaction",variables));
353                                 }else if (Estimators[i] == "sharednseqs") { 
354                                         rDisplays.push_back(new RareDisplay(new SharedNSeqs(), new SharedThreeColumnFile(getOutputFileName("sharedr_nseqs",variables), "")));
355                                         outputNames.push_back(getOutputFileName("sharedr_nseqs",variables)); outputTypes["sharedr_nseqs"].push_back(getOutputFileName("sharedr_nseqs",variables));
356                                 }
357                         }
358             file2Group[outputNames.size()-1] = thisSet;
359                 }
360                 
361                 //if the users entered no valid calculators don't execute command
362                 if (rDisplays.size() == 0) { for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }  return 0; }
363                 
364                 if (m->control_pressed) { 
365                         for(int i=0;i<rDisplays.size();i++){    delete rDisplays[i];    }
366                         for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]);        }
367                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
368                         return 0;
369                 }
370         
371         
372                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
373         string lastLabel = subset[0]->getLabel();
374                 set<string> processedLabels;
375                 set<string> userLabels = labels;
376         
377                 //as long as you are not at the end of the file or done wih the lines you want
378                 while((subset[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
379                         if (m->control_pressed) { 
380                                 for(int i=0;i<rDisplays.size();i++){    delete rDisplays[i];    }
381                                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]);        }
382                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
383                                 return 0;
384                         }
385                         
386                         if(allLines == 1 || labels.count(subset[0]->getLabel()) == 1){
387                                 m->mothurOut(subset[0]->getLabel() + '\t' + thisSet); m->mothurOutEndLine();
388                                 rCurve = new Rarefact(subset, rDisplays);
389                                 rCurve->getSharedCurve(freq, nIters);
390                                 delete rCurve;
391                 
392                 if (subsample) { subsampleLookup(subset, fileNameRoot);  }
393                     
394                                 processedLabels.insert(subset[0]->getLabel());
395                                 userLabels.erase(subset[0]->getLabel());
396                         }
397                         
398                         if ((m->anyLabelsToProcess(subset[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
399                 string saveLabel = subset[0]->getLabel();
400                 
401                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
402                 lookup = input.getSharedRAbundVectors(lastLabel);
403                 
404                 if (thisSet == "") {  subset.clear(); subset = lookup;  }
405                 else {//fill subset with this sets groups
406                     subset.clear();
407                     for (int i = 0; i < lookup.size(); i++) {
408                         if (m->inUsersGroups(lookup[i]->getGroup(), newGroups)) {
409                             subset.push_back(lookup[i]);
410                         }
411                     }
412                 }
413
414                 m->mothurOut(subset[0]->getLabel() + '\t' + thisSet); m->mothurOutEndLine();
415                 rCurve = new Rarefact(subset, rDisplays);
416                 rCurve->getSharedCurve(freq, nIters);
417                 delete rCurve;
418                 
419                 if (subsample) { subsampleLookup(subset, fileNameRoot);  }
420                 
421                 processedLabels.insert(subset[0]->getLabel());
422                 userLabels.erase(subset[0]->getLabel());
423                 
424                 //restore real lastlabel to save below
425                 subset[0]->setLabel(saveLabel);
426                         }
427             
428                         
429                         lastLabel = subset[0]->getLabel();
430                         
431                         //get next line to process
432                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
433                         lookup = input.getSharedRAbundVectors();
434             
435             if (lookup[0] != NULL) {
436                 if (thisSet == "") {  subset.clear(); subset = lookup;  }
437                 else {//fill subset with this sets groups
438                     subset.clear();
439                     for (int i = 0; i < lookup.size(); i++) {
440                         if (m->inUsersGroups(lookup[i]->getGroup(), newGroups)) {
441                             subset.push_back(lookup[i]);
442                         }
443                     }
444                 }
445             }else {  subset.clear(); subset.push_back(NULL); }
446
447                 }
448                 
449                 if (m->control_pressed) { 
450                         for(int i=0;i<rDisplays.size();i++){    delete rDisplays[i];    }
451                         for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]);        }
452                         return 0;
453                 }
454                 
455                 //output error messages about any remaining user labels
456                 set<string>::iterator it;
457                 bool needToRun = false;
458                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
459                         m->mothurOut("Your file does not include the label " + *it); 
460                         if (processedLabels.count(lastLabel) != 1) {
461                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
462                                 needToRun = true;
463                         }else {
464                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
465                         }
466                 }
467                 
468                 if (m->control_pressed) { 
469                         for(int i=0;i<rDisplays.size();i++){    delete rDisplays[i];    }
470                         for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]);        }
471                         return 0;
472                 }
473                 
474                 //run last label if you need to
475                 if (needToRun == true)  {
476                         for (int i = 0; i < lookup.size(); i++) {  if (lookup[i] != NULL) {     delete lookup[i]; }  } 
477                         lookup = input.getSharedRAbundVectors(lastLabel);
478             
479             if (thisSet == "") {  subset.clear(); subset = lookup;  }
480             else {//fill subset with this sets groups
481                 subset.clear();
482                 for (int i = 0; i < lookup.size(); i++) {
483                     if (m->inUsersGroups(lookup[i]->getGroup(), newGroups)) {
484                         subset.push_back(lookup[i]);
485                     }
486                 }
487             }
488             
489                         m->mothurOut(subset[0]->getLabel() + '\t' + thisSet); m->mothurOutEndLine();
490                         rCurve = new Rarefact(subset, rDisplays);
491                         rCurve->getSharedCurve(freq, nIters);
492                         delete rCurve;
493             
494             if (subsample) { subsampleLookup(subset, fileNameRoot);  }
495                 
496                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
497                 }
498                 
499                 for(int i=0;i<rDisplays.size();i++){    delete rDisplays[i];    }       
500
501         
502         return 0;
503     }
504         catch(exception& e) {
505                 m->errorOut(e, "RareFactSharedCommand", "process");
506                 exit(1);
507         }
508 }
509 //**********************************************************************************************************************
510 int RareFactSharedCommand::subsampleLookup(vector<SharedRAbundVector*>& thisLookup, string fileNameRoot) {
511         try {
512         
513         map<string, vector<string> > filenames;
514         for (int thisIter = 0; thisIter < iters; thisIter++) {
515             
516             vector<SharedRAbundVector*> thisItersLookup = thisLookup;
517             
518              //we want the summary results for the whole dataset, then the subsampling
519             SubSample sample;
520             vector<string> tempLabels; //dont need since we arent printing the sampled sharedRabunds
521                 
522             //make copy of lookup so we don't get access violations
523             vector<SharedRAbundVector*> newLookup;
524             for (int k = 0; k < thisItersLookup.size(); k++) {
525                 SharedRAbundVector* temp = new SharedRAbundVector();
526                 temp->setLabel(thisItersLookup[k]->getLabel());
527                 temp->setGroup(thisItersLookup[k]->getGroup());
528                 newLookup.push_back(temp);
529             }
530                 
531             //for each bin
532             for (int k = 0; k < thisItersLookup[0]->getNumBins(); k++) {
533                 if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
534                 for (int j = 0; j < thisItersLookup.size(); j++) { newLookup[j]->push_back(thisItersLookup[j]->getAbundance(k), thisItersLookup[j]->getGroup()); }
535                 }
536                 
537             tempLabels = sample.getSample(newLookup, subsampleSize);
538             thisItersLookup = newLookup;
539             
540             
541             Rarefact* rCurve;
542             vector<Display*> rDisplays;
543             
544             string thisfileNameRoot = fileNameRoot + toString(thisIter);
545             
546             map<string, string> variables; 
547             variables["[filename]"] = thisfileNameRoot;
548             
549             ValidCalculators validCalculator;
550             for (int i=0; i<Estimators.size(); i++) {
551                 if (validCalculator.isValidCalculator("sharedrarefaction", Estimators[i]) == true) { 
552                     if (Estimators[i] == "sharedobserved") { 
553                         rDisplays.push_back(new RareDisplay(new SharedSobs(), new SharedThreeColumnFile(getOutputFileName("sharedrarefaction",variables), "")));
554                         filenames["sharedrarefaction"].push_back(getOutputFileName("sharedrarefaction",variables));
555                     }else if (Estimators[i] == "sharednseqs") { 
556                         rDisplays.push_back(new RareDisplay(new SharedNSeqs(), new SharedThreeColumnFile(getOutputFileName("sharedr_nseqs",variables), "")));
557                         filenames["sharedr_nseqs"].push_back(getOutputFileName("sharedr_nseqs",variables));
558                     }
559                 }
560             }
561             
562             rCurve = new Rarefact(thisItersLookup, rDisplays);
563                         rCurve->getSharedCurve(freq, nIters);
564                         delete rCurve;
565             
566             //clean up memory
567             for (int i = 0; i < thisItersLookup.size(); i++) { delete thisItersLookup[i]; }
568             thisItersLookup.clear();
569             for(int i=0;i<rDisplays.size();i++){        delete rDisplays[i];    }
570         }
571         
572         //create std and ave outputs
573         vector< vector< vector< double > > > results; //iter -> numSampled -> data
574         for (map<string, vector<string> >::iterator it = filenames.begin(); it != filenames.end(); it++) {
575             vector<string> thisTypesFiles = it->second;
576             vector<string> columnHeaders;
577             for (int i = 0; i < thisTypesFiles.size(); i++) {
578                 ifstream in;
579                 m->openInputFile(thisTypesFiles[i], in);
580                 
581                 string headers = m->getline(in); m->gobble(in);
582                 columnHeaders = m->splitWhiteSpace(headers);
583                 int numCols = columnHeaders.size();
584                 
585                 vector<vector<double> > thisFilesLines;
586                 while (!in.eof()) {
587                     if (m->control_pressed) { break; }
588                     vector<double> data; data.resize(numCols, 0);
589                     //read numSampled line
590                     for (int j = 0; j < numCols; j++) { in >> data[j]; m->gobble(in); }
591                     thisFilesLines.push_back(data);
592                 }
593                 in.close();
594                 results.push_back(thisFilesLines);
595                 m->mothurRemove(thisTypesFiles[i]);
596             }
597             
598             if (!m->control_pressed) {
599                 //process results
600                 map<string, string> variables; variables["[filename]"] = fileNameRoot + "ave-std." + thisLookup[0]->getLabel() + ".";
601
602                 string outputFile = getOutputFileName(it->first,variables);
603                 ofstream out;
604                 m->openOutputFile(outputFile, out);
605                 outputNames.push_back(outputFile); outputTypes[it->first].push_back(outputFile);
606                 
607                 out << columnHeaders[0] << '\t' << "method\t";
608                 for (int i = 1; i < columnHeaders.size(); i++) { out << columnHeaders[i] << '\t'; }
609                 out << endl;
610             
611                 vector< vector<double> > aveResults; aveResults.resize(results[0].size());
612                 for (int i = 0; i < aveResults.size(); i++) { aveResults[i].resize(results[0][i].size(), 0.0); }
613                 
614                 for (int thisIter = 0; thisIter < iters; thisIter++) { //sum all groups dists for each calculator
615                     for (int i = 0; i < aveResults.size(); i++) {  //initialize sums to zero.
616                         aveResults[i][0] = results[thisIter][i][0];
617                         for (int j = 1; j < aveResults[i].size(); j++) {
618                             aveResults[i][j] += results[thisIter][i][j];
619                         }
620                     }
621                 }
622                 
623                 for (int i = 0; i < aveResults.size(); i++) {  //finds average.
624                     for (int j = 1; j < aveResults[i].size(); j++) {
625                         aveResults[i][j] /= (float) iters;
626                     }
627                 }
628                 
629                 //standard deviation
630                 vector< vector<double> > stdResults; stdResults.resize(results[0].size());
631                 for (int i = 0; i < stdResults.size(); i++) { stdResults[i].resize(results[0][i].size(), 0.0); }
632                 
633                 for (int thisIter = 0; thisIter < iters; thisIter++) { //compute the difference of each dist from the mean, and square the result of each
634                     for (int i = 0; i < stdResults.size(); i++) {  
635                         stdResults[i][0] = aveResults[i][0];
636                         for (int j = 1; j < stdResults[i].size(); j++) {
637                             stdResults[i][j] += ((results[thisIter][i][j] - aveResults[i][j]) * (results[thisIter][i][j] - aveResults[i][j]));
638                         }
639                     }
640                 }
641                 
642                 for (int i = 0; i < stdResults.size(); i++) {  //finds average.
643                     out << aveResults[i][0] << '\t' << "ave\t";
644                     for (int j = 1; j < aveResults[i].size(); j++) { out << aveResults[i][j] << '\t'; }
645                     out << endl;
646                     out << stdResults[i][0] << '\t' << "std\t";
647                     for (int j = 1; j < stdResults[i].size(); j++) {
648                         stdResults[i][j] /= (float) iters;
649                         stdResults[i][j] = sqrt(stdResults[i][j]);
650                         out << stdResults[i][j] << '\t';
651                     }
652                     out << endl;
653                 }
654                 out.close();
655             }
656         }
657         
658         
659         return 0;
660     }
661         catch(exception& e) {
662                 m->errorOut(e, "RareFactSharedCommand", "subsample");
663                 exit(1);
664         }
665 }
666 //**********************************************************************************************************************
667 vector<string> RareFactSharedCommand::createGroupFile(vector<string>& outputNames) {
668         try {
669                 
670                 vector<string> newFileNames;
671                 
672                 //find different types of files
673                 map<string, map<string, string> > typesFiles;
674         map<string, vector< vector<string> > > fileLabels; //combofile name to labels. each label is a vector because it may be unique lci hci.
675         vector<string> groupNames;
676                 for (int i = 0; i < outputNames.size(); i++) {
677             
678                         string extension = m->getExtension(outputNames[i]);
679             string combineFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + "groups" + extension;
680                         m->mothurRemove(combineFileName); //remove old file
681             
682                         ifstream in;
683                         m->openInputFile(outputNames[i], in);
684                         
685                         string labels = m->getline(in);
686             
687                         istringstream iss (labels,istringstream::in);
688             string newLabel = ""; vector<string> theseLabels;
689             while(!iss.eof()) {  iss >> newLabel; m->gobble(iss); theseLabels.push_back(newLabel); }
690             vector< vector<string> > allLabels;
691             vector<string> thisSet; thisSet.push_back(theseLabels[0]); allLabels.push_back(thisSet); thisSet.clear(); //makes "numSampled" its own grouping
692             for (int j = 1; j < theseLabels.size()-1; j++) {
693                 if (theseLabels[j+1] == "lci") {
694                     thisSet.push_back(theseLabels[j]); 
695                     thisSet.push_back(theseLabels[j+1]); 
696                     thisSet.push_back(theseLabels[j+2]);
697                     j++; j++;
698                 }else{ //no lci or hci for this calc.
699                     thisSet.push_back(theseLabels[j]); 
700                 }
701                 allLabels.push_back(thisSet); 
702                 thisSet.clear();
703             }
704             fileLabels[combineFileName] = allLabels;
705             
706             map<string, map<string, string> >::iterator itfind = typesFiles.find(extension);
707             if (itfind != typesFiles.end()) {
708                 (itfind->second)[outputNames[i]] = file2Group[i];
709             }else {
710                 map<string, string> temp;  
711                 temp[outputNames[i]] = file2Group[i];
712                 typesFiles[extension] = temp;
713             }
714             if (!(m->inUsersGroups(file2Group[i], groupNames))) {  groupNames.push_back(file2Group[i]); }
715                 }
716                 
717                 //for each type create a combo file
718                 
719                 for (map<string, map<string, string> >::iterator it = typesFiles.begin(); it != typesFiles.end(); it++) {
720                         
721                         ofstream out;
722                         string combineFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + "groups" + it->first;
723                         m->openOutputFileAppend(combineFileName, out);
724                         newFileNames.push_back(combineFileName);
725                         map<string, string> thisTypesFiles = it->second; //it->second maps filename to group
726             set<int> numSampledSet;
727             
728                         //open each type summary file
729                         map<string, map<int, vector< vector<string> > > > files; //maps file name to lines in file
730                         int maxLines = 0;
731                         for (map<string, string>::iterator itFileNameGroup = thisTypesFiles.begin(); itFileNameGroup != thisTypesFiles.end(); itFileNameGroup++) {
732                 
733                 string thisfilename = itFileNameGroup->first;
734                 string group = itFileNameGroup->second;
735                 
736                                 ifstream temp;
737                                 m->openInputFile(thisfilename, temp);
738                                 
739                                 //read through first line - labels
740                                 m->getline(temp);       m->gobble(temp);
741                                 
742                                 map<int, vector< vector<string> > > thisFilesLines;
743                                 while (!temp.eof()){
744                     int numSampled = 0;
745                     temp >> numSampled; m->gobble(temp);
746                     
747                     vector< vector<string> > theseReads;
748                     vector<string> thisSet; thisSet.push_back(toString(numSampled)); theseReads.push_back(thisSet); thisSet.clear();
749                     for (int k = 1; k < fileLabels[combineFileName].size(); k++) { //output thing like 0.03-A lci-A hci-A
750                         vector<string> reads;
751                         string next = "";
752                         for (int l = 0; l < fileLabels[combineFileName][k].size(); l++) { //output modified labels
753                             temp >> next; m->gobble(temp);
754                             reads.push_back(next);
755                         }
756                         theseReads.push_back(reads);
757                     }
758                     thisFilesLines[numSampled] = theseReads;
759                     m->gobble(temp);
760                     
761                     numSampledSet.insert(numSampled);
762                                 }
763                                 
764                                 files[group] = thisFilesLines;
765                                 
766                                 //save longest file for below
767                                 if (maxLines < thisFilesLines.size()) { maxLines = thisFilesLines.size(); }
768                                 
769                                 temp.close();
770                                 m->mothurRemove(thisfilename);
771                         }
772                         
773             //output new labels line
774             out << fileLabels[combineFileName][0][0] << '\t';
775             for (int k = 1; k < fileLabels[combineFileName].size(); k++) { //output thing like 0.03-A lci-A hci-A
776                 for (int n = 0; n < groupNames.size(); n++) { // for each group
777                     for (int l = 0; l < fileLabels[combineFileName][k].size(); l++) { //output modified labels
778                         out << fileLabels[combineFileName][k][l] << '-' << groupNames[n] << '\t';
779                     }
780                 }
781             }
782                         out << endl;
783             
784                         //for each label
785                         for (set<int>::iterator itNumSampled = numSampledSet.begin(); itNumSampled != numSampledSet.end(); itNumSampled++) {
786                                 
787                 out << (*itNumSampled) << '\t';
788                 
789                 if (m->control_pressed) { break; }
790                 
791                 for (int k = 1; k < fileLabels[combineFileName].size(); k++) { //each chunk
792                                     //grab data for each group
793                     for (map<string, map<int, vector< vector<string> > > >::iterator itFileNameGroup = files.begin(); itFileNameGroup != files.end(); itFileNameGroup++) {
794                         
795                         string group = itFileNameGroup->first;
796                         
797                         map<int, vector< vector<string> > >::iterator itLine = files[group].find(*itNumSampled);
798                         if (itLine != files[group].end()) { 
799                             for (int l = 0; l < (itLine->second)[k].size(); l++) { 
800                                 out << (itLine->second)[k][l] << '\t';
801                                 
802                             }                             
803                         }else { 
804                             for (int l = 0; l < fileLabels[combineFileName][k].size(); l++) { 
805                                 out << "NA" << '\t';
806                             } 
807                         }
808                     }
809                 }
810                 out << endl;
811                         }       
812                         out.close();
813                 }
814                 
815                 //return combine file name
816                 return newFileNames;
817                 
818         }
819         catch(exception& e) {
820                 m->errorOut(e, "RareFactSharedCommand", "createGroupFile");
821                 exit(1);
822         }
823 }
824 //**********************************************************************************************************************