]> git.donarmstrong.com Git - mothur.git/blob - rarefactcommand.cpp
added shannonrange calc.
[mothur.git] / rarefactcommand.cpp
1 /*
2  *  rarefactcommand.cpp
3  *  Dotur
4  *
5  *  Created by Sarah Westcott on 1/2/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "rarefactcommand.h"
11 #include "ace.h"
12 #include "sobs.h"
13 #include "nseqs.h"
14 #include "chao1.h"
15 #include "bootstrap.h"
16 #include "simpson.h"
17 #include "simpsoneven.h"
18 #include "heip.h"
19 #include "smithwilson.h"
20 #include "invsimpson.h"
21 #include "npshannon.h"
22 #include "shannoneven.h"
23 #include "shannon.h"
24 #include "jackknife.h"
25 #include "coverage.h"
26 #include "shannonrange.h"
27
28
29 //**********************************************************************************************************************
30 vector<string> RareFactCommand::setParameters(){        
31         try {
32                 CommandParameter plist("list", "InputTypes", "", "", "LRSS", "LRSS", "none","",false,false,true); parameters.push_back(plist);
33                 CommandParameter prabund("rabund", "InputTypes", "", "", "LRSS", "LRSS", "none","",false,false); parameters.push_back(prabund);
34                 CommandParameter psabund("sabund", "InputTypes", "", "", "LRSS", "LRSS", "none","",false,false); parameters.push_back(psabund);
35                 CommandParameter pshared("shared", "InputTypes", "", "", "LRSS", "LRSS", "none","",false,false,true); parameters.push_back(pshared);
36                 CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
37                 CommandParameter pfreq("freq", "Number", "", "100", "", "", "","",false,false); parameters.push_back(pfreq);
38                 CommandParameter piters("iters", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(piters);
39                 CommandParameter pcalc("calc", "Multiple", "sobs-chao-nseqs-coverage-ace-jack-shannon-shannoneven-npshannon-heip-smithwilson-simpson-simpsoneven-invsimpson-bootstrap-shannonrange", "sobs", "", "", "","",true,false,true); parameters.push_back(pcalc);
40                 CommandParameter pabund("abund", "Number", "", "10", "", "", "","",false,false); parameters.push_back(pabund);
41         CommandParameter palpha("alpha", "Multiple", "0-1-2", "1", "", "", "","",false,false,true); parameters.push_back(palpha);
42                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
43                 CommandParameter pgroupmode("groupmode", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pgroupmode);
44                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
45                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
46                 
47                 vector<string> myArray;
48                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
49                 return myArray;
50         }
51         catch(exception& e) {
52                 m->errorOut(e, "RareFactCommand", "setParameters");
53                 exit(1);
54         }
55 }
56 //**********************************************************************************************************************
57 string RareFactCommand::getHelpString(){        
58         try {
59                 ValidCalculators validCalculator;
60                 string helpString = "";
61                 helpString += "The rarefaction.single command parameters are list, sabund, rabund, shared, label, iters, freq, calc, processors, groupmode and abund.  list, sabund, rabund or shared is required unless you have a valid current file. \n";
62                 helpString += "The freq parameter is used indicate when to output your data, by default it is set to 100. But you can set it to a percentage of the number of sequence. For example freq=0.10, means 10%. \n";
63                 helpString += "The processors parameter allows you to specify the number of processors to use. The default is 1.\n";
64                 helpString += "The rarefaction.single command should be in the following format: \n";
65                 helpString += "rarefaction.single(label=yourLabel, iters=yourIters, freq=yourFreq, calc=yourEstimators).\n";
66                 helpString += "Example rarefaction.single(label=unique-.01-.03, iters=10000, freq=10, calc=sobs-rchao-race-rjack-rbootstrap-rshannon-rnpshannon-rsimpson).\n";
67                 helpString += "The default values for iters is 1000, freq is 100, and calc is rarefaction which calculates the rarefaction curve for the observed richness.\n";
68         helpString += "The alpha parameter is used to set the alpha value for the shannonrange calculator.\n";
69                 validCalculator.printCalc("rarefaction");
70                 helpString += "If you are running rarefaction.single with a shared file and would like your results collated in one file, set groupmode=t. (Default=true).\n";
71                 helpString += "The label parameter is used to analyze specific labels in your input.\n";
72                 helpString += "Note: No spaces between parameter labels (i.e. freq), '=' and parameters (i.e.yourFreq).\n";
73                 return helpString;
74         }
75         catch(exception& e) {
76                 m->errorOut(e, "RareFactCommand", "getHelpString");
77                 exit(1);
78         }
79 }
80 //**********************************************************************************************************************
81 string RareFactCommand::getOutputPattern(string type) {
82     try {
83         string pattern = "";
84         if (type == "rarefaction") {  pattern =  "[filename],rarefaction"; }
85         else if (type == "r_chao") {  pattern =  "[filename],r_chao"; }
86         else if (type == "r_ace") {  pattern =  "[filename],r_ace"; }
87         else if (type == "r_jack") {  pattern =  "[filename],r_jack"; }
88         else if (type == "r_shannon") {  pattern =  "[filename],r_shannon"; }
89         else if (type == "r_shannoneven") {  pattern =  "[filename],r_shannoneven"; }
90         else if (type == "r_smithwilson") {  pattern =  "[filename],r_smithwilson"; }
91         else if (type == "r_npshannon") {  pattern =  "[filename],r_npshannon"; }
92         else if (type == "r_shannonrange"){  pattern =  "[filename],r_shannonrange";    }
93         else if (type == "r_simpson") {  pattern =  "[filename],r_simpson"; }
94         else if (type == "r_simpsoneven") {  pattern =  "[filename],r_simpsoneven"; }
95         else if (type == "r_invsimpson") {  pattern =  "[filename],r_invsimpson"; }
96         else if (type == "r_bootstrap") {  pattern =  "[filename],r_bootstrap"; }
97         else if (type == "r_coverage") {  pattern =  "[filename],r_coverage"; }
98         else if (type == "r_nseqs") {  pattern =  "[filename],r_nseqs"; }
99         else if (type == "r_heip") {  pattern =  "[filename],r_heip"; }
100         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
101         
102         return pattern;
103     }
104     catch(exception& e) {
105         m->errorOut(e, "RareFactCommand", "getOutputPattern");
106         exit(1);
107     }
108 }
109 //**********************************************************************************************************************
110 RareFactCommand::RareFactCommand(){     
111         try {
112                 abort = true; calledHelp = true; 
113                 setParameters();
114                 vector<string> tempOutNames;
115                 outputTypes["rarefaction"] = tempOutNames;
116                 outputTypes["r_chao"] = tempOutNames;
117                 outputTypes["r_ace"] = tempOutNames;
118                 outputTypes["r_jack"] = tempOutNames;
119                 outputTypes["r_shannon"] = tempOutNames;
120                 outputTypes["r_shannoneven"] = tempOutNames;
121         outputTypes["r_shannonrange"] = tempOutNames;
122                 outputTypes["r_heip"] = tempOutNames;
123                 outputTypes["r_smithwilson"] = tempOutNames;
124                 outputTypes["r_npshannon"] = tempOutNames;
125                 outputTypes["r_simpson"] = tempOutNames;
126                 outputTypes["r_simpsoneven"] = tempOutNames;
127                 outputTypes["r_invsimpson"] = tempOutNames;
128                 outputTypes["r_bootstrap"] = tempOutNames;
129                 outputTypes["r_coverage"] = tempOutNames;
130                 outputTypes["r_nseqs"] = tempOutNames;
131         }
132         catch(exception& e) {
133                 m->errorOut(e, "RareFactCommand", "RareFactCommand");
134                 exit(1);
135         }
136 }
137 //**********************************************************************************************************************
138 RareFactCommand::RareFactCommand(string option)  {
139         try {
140                 abort = false; calledHelp = false;   
141                 allLines = 1;
142                                                 
143                 //allow user to run help
144                 if(option == "help") { help(); abort = true; calledHelp = true; }
145                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
146                 
147                 else {
148                         vector<string> myArray = setParameters();
149                         
150                         OptionParser parser(option);
151                         map<string,string> parameters = parser.getParameters();
152                         map<string,string>::iterator it;
153                         
154                         ValidParameters validParameter;
155                 
156                         //check to make sure all parameters are valid for command
157                         for (it = parameters.begin(); it != parameters.end(); it++) { 
158                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
159                         }
160                         
161                         //initialize outputTypes
162                         vector<string> tempOutNames;
163                         outputTypes["rarefaction"] = tempOutNames;
164                         outputTypes["r_chao"] = tempOutNames;
165                         outputTypes["r_ace"] = tempOutNames;
166                         outputTypes["r_jack"] = tempOutNames;
167                         outputTypes["r_shannon"] = tempOutNames;
168                         outputTypes["r_shannoneven"] = tempOutNames;
169             outputTypes["r_shannonrange"] = tempOutNames;
170                         outputTypes["r_heip"] = tempOutNames;
171                         outputTypes["r_smithwilson"] = tempOutNames;
172                         outputTypes["r_npshannon"] = tempOutNames;
173                         outputTypes["r_simpson"] = tempOutNames;
174                         outputTypes["r_simpsoneven"] = tempOutNames;
175                         outputTypes["r_invsimpson"] = tempOutNames;
176                         outputTypes["r_bootstrap"] = tempOutNames;
177                         outputTypes["r_coverage"] = tempOutNames;
178                         outputTypes["r_nseqs"] = tempOutNames;
179                         
180                         //if the user changes the input directory command factory will send this info to us in the output parameter 
181                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
182                         if (inputDir == "not found"){   inputDir = "";          }
183                         else {
184                                 string path;
185                                 it = parameters.find("shared");
186                                 //user has given a template file
187                                 if(it != parameters.end()){ 
188                                         path = m->hasPath(it->second);
189                                         //if the user has not given a path then, add inputdir. else leave path alone.
190                                         if (path == "") {       parameters["shared"] = inputDir + it->second;           }
191                                 }
192                                 
193                                 it = parameters.find("rabund");
194                                 //user has given a template file
195                                 if(it != parameters.end()){ 
196                                         path = m->hasPath(it->second);
197                                         //if the user has not given a path then, add inputdir. else leave path alone.
198                                         if (path == "") {       parameters["rabund"] = inputDir + it->second;           }
199                                 }
200                                 
201                                 it = parameters.find("sabund");
202                                 //user has given a template file
203                                 if(it != parameters.end()){ 
204                                         path = m->hasPath(it->second);
205                                         //if the user has not given a path then, add inputdir. else leave path alone.
206                                         if (path == "") {       parameters["sabund"] = inputDir + it->second;           }
207                                 }
208                                 
209                                 it = parameters.find("list");
210                                 //user has given a template file
211                                 if(it != parameters.end()){ 
212                                         path = m->hasPath(it->second);
213                                         //if the user has not given a path then, add inputdir. else leave path alone.
214                                         if (path == "") {       parameters["list"] = inputDir + it->second;             }
215                                 }
216                         }
217                         
218                         //check for required parameters
219                         listfile = validParameter.validFile(parameters, "list", true);
220                         if (listfile == "not open") { listfile = ""; abort = true; }
221                         else if (listfile == "not found") { listfile = ""; }
222                         else {  format = "list"; inputfile = listfile; m->setListFile(listfile); }
223                         
224                         sabundfile = validParameter.validFile(parameters, "sabund", true);
225                         if (sabundfile == "not open") { sabundfile = ""; abort = true; }        
226                         else if (sabundfile == "not found") { sabundfile = ""; }
227                         else {  format = "sabund"; inputfile = sabundfile; m->setSabundFile(sabundfile); }
228                         
229                         rabundfile = validParameter.validFile(parameters, "rabund", true);
230                         if (rabundfile == "not open") { rabundfile = ""; abort = true; }        
231                         else if (rabundfile == "not found") { rabundfile = ""; }
232                         else {  format = "rabund"; inputfile = rabundfile; m->setRabundFile(rabundfile); }
233                         
234                         sharedfile = validParameter.validFile(parameters, "shared", true);
235                         if (sharedfile == "not open") { sharedfile = ""; abort = true; }        
236                         else if (sharedfile == "not found") { sharedfile = ""; }
237                         else {  format = "sharedfile"; inputfile = sharedfile; m->setSharedFile(sharedfile); }
238                                 
239                         if ((sharedfile == "") && (listfile == "") && (rabundfile == "") && (sabundfile == "")) { 
240                                 //is there are current file available for any of these?
241                                 //give priority to shared, then list, then rabund, then sabund
242                                 //if there is a current shared file, use it
243                                 sharedfile = m->getSharedFile(); 
244                                 if (sharedfile != "") { inputfile = sharedfile; format = "sharedfile"; m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
245                                 else { 
246                                         listfile = m->getListFile(); 
247                                         if (listfile != "") { inputfile = listfile; format = "list"; m->mothurOut("Using " + listfile + " as input file for the list parameter."); m->mothurOutEndLine(); }
248                                         else { 
249                                                 rabundfile = m->getRabundFile(); 
250                                                 if (rabundfile != "") { inputfile = rabundfile; format = "rabund"; m->mothurOut("Using " + rabundfile + " as input file for the rabund parameter."); m->mothurOutEndLine(); }
251                                                 else { 
252                                                         sabundfile = m->getSabundFile(); 
253                                                         if (sabundfile != "") { inputfile = sabundfile; format = "sabund"; m->mothurOut("Using " + sabundfile + " as input file for the sabund parameter."); m->mothurOutEndLine(); }
254                                                         else { 
255                                                                 m->mothurOut("No valid current files. You must provide a list, sabund, rabund or shared file before you can use the collect.single command."); m->mothurOutEndLine(); 
256                                                                 abort = true;
257                                                         }
258                                                 }
259                                         }
260                                 }
261                         }
262                         
263                         //if the user changes the output directory command factory will send this info to us in the output parameter 
264                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(inputfile);              }
265
266                         //check for optional parameter and set defaults
267                         // ...at some point should added some additional type checking...
268                         label = validParameter.validFile(parameters, "label", false);                   
269                         if (label == "not found") { label = ""; }
270                         else { 
271                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
272                                 else { allLines = 1;  }
273                         }
274                                 
275                         calc = validParameter.validFile(parameters, "calc", false);                     
276                         if (calc == "not found") { calc = "sobs";  }
277                         else { 
278                                  if (calc == "default")  {  calc = "sobs";  }
279                         }
280                         m->splitAtDash(calc, Estimators);
281                         if (m->inUsersGroups("citation", Estimators)) { 
282                                 ValidCalculators validCalc; validCalc.printCitations(Estimators); 
283                                 //remove citation from list of calcs
284                                 for (int i = 0; i < Estimators.size(); i++) { if (Estimators[i] == "citation") {  Estimators.erase(Estimators.begin()+i); break; } }
285                         }
286
287                         string temp;
288                         temp = validParameter.validFile(parameters, "freq", false);                     if (temp == "not found") { temp = "100"; }
289                         m->mothurConvert(temp, freq); 
290                         
291                         temp = validParameter.validFile(parameters, "abund", false);                    if (temp == "not found") { temp = "10"; }
292                         m->mothurConvert(temp, abund); 
293                         
294                         temp = validParameter.validFile(parameters, "iters", false);                    if (temp == "not found") { temp = "1000"; }
295                         m->mothurConvert(temp, nIters); 
296                         
297                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
298                         m->setProcessors(temp);
299                         m->mothurConvert(temp, processors);
300             
301             temp = validParameter.validFile(parameters, "alpha", false);                if (temp == "not found") { temp = "1"; }
302                         m->mothurConvert(temp, alpha);
303             
304             if ((alpha != 0) && (alpha != 1) && (alpha != 2)) { m->mothurOut("[ERROR]: Not a valid alpha value. Valid values are 0, 1 and 2."); m->mothurOutEndLine(); abort=true; }
305                         
306                         temp = validParameter.validFile(parameters, "groupmode", false);                if (temp == "not found") { temp = "T"; }
307                         groupMode = m->isTrue(temp);
308                 }
309                 
310         }
311         catch(exception& e) {
312                 m->errorOut(e, "RareFactCommand", "RareFactCommand");
313                 exit(1);
314         }
315 }
316 //**********************************************************************************************************************
317
318 int RareFactCommand::execute(){
319         try {
320         
321                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
322                 
323         map<string, set<int> > labelToEnds;
324                 if ((format != "sharedfile")) { inputFileNames.push_back(inputfile);  }
325                 else {  inputFileNames = parseSharedFile(sharedfile, labelToEnds);  format = "rabund"; }
326         
327         if (m->control_pressed) { return 0; }
328                 
329                 map<int, string> file2Group; //index in outputNames[i] -> group
330                 for (int p = 0; p < inputFileNames.size(); p++) {
331                         
332                         string fileNameRoot = outputDir + m->getRootName(m->getSimpleName(inputFileNames[p]));
333                                                 
334                         if (m->control_pressed) {  outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]);        }  m->clearGroups();  return 0; }
335                         
336                         if (inputFileNames.size() > 1) {
337                                 m->mothurOutEndLine(); m->mothurOut("Processing group " + groups[p]); m->mothurOutEndLine(); m->mothurOutEndLine();
338                         }
339                         int i;
340                         ValidCalculators validCalculator;
341                         
342             map<string, string> variables; 
343             variables["[filename]"] = fileNameRoot;
344                           
345                         for (i=0; i<Estimators.size(); i++) {
346                                 if (validCalculator.isValidCalculator("rarefaction", Estimators[i]) == true) { 
347                                         if (Estimators[i] == "sobs") { 
348                                                 rDisplays.push_back(new RareDisplay(new Sobs(), new ThreeColumnFile(getOutputFileName("rarefaction",variables))));
349                                                 outputNames.push_back(getOutputFileName("rarefaction",variables)); outputTypes["rarefaction"].push_back(getOutputFileName("rarefaction",variables));
350                                         }else if (Estimators[i] == "chao") { 
351                                                 rDisplays.push_back(new RareDisplay(new Chao1(), new ThreeColumnFile(getOutputFileName("r_chao",variables))));
352                                                 outputNames.push_back(getOutputFileName("r_chao",variables)); outputTypes["r_chao"].push_back(getOutputFileName("r_chao",variables));
353                                         }else if (Estimators[i] == "ace") { 
354                                                 if(abund < 5)
355                                                         abund = 10;
356                                                 rDisplays.push_back(new RareDisplay(new Ace(abund), new ThreeColumnFile(getOutputFileName("r_ace",variables))));
357                                                 outputNames.push_back(getOutputFileName("r_ace",variables)); outputTypes["r_ace"].push_back(getOutputFileName("r_ace",variables));
358                                         }else if (Estimators[i] == "jack") { 
359                                                 rDisplays.push_back(new RareDisplay(new Jackknife(), new ThreeColumnFile(getOutputFileName("r_jack",variables))));
360                                                 outputNames.push_back(getOutputFileName("r_jack",variables)); outputTypes["r_jack"].push_back(getOutputFileName("r_jack",variables));
361                                         }else if (Estimators[i] == "shannon") { 
362                                                 rDisplays.push_back(new RareDisplay(new Shannon(), new ThreeColumnFile(getOutputFileName("r_shannon",variables))));
363                                                 outputNames.push_back(getOutputFileName("r_shannon",variables)); outputTypes["r_shannon"].push_back(getOutputFileName("r_shannon",variables));
364                                         }else if (Estimators[i] == "shannoneven") { 
365                                                 rDisplays.push_back(new RareDisplay(new ShannonEven(), new ThreeColumnFile(getOutputFileName("r_shannoneven",variables))));
366                                                 outputNames.push_back(getOutputFileName("r_shannoneven",variables)); outputTypes["r_shannoneven"].push_back(getOutputFileName("r_shannoneven",variables));
367                                         }else if (Estimators[i] == "heip") { 
368                                                 rDisplays.push_back(new RareDisplay(new Heip(), new ThreeColumnFile(getOutputFileName("r_heip",variables))));
369                                                 outputNames.push_back(getOutputFileName("r_heip",variables)); outputTypes["r_heip"].push_back(getOutputFileName("r_heip",variables));
370                     }else if (Estimators[i] == "r_shannonrange") {
371                         rDisplays.push_back(new RareDisplay(new RangeShannon(alpha), new ThreeColumnFile(getOutputFileName("r_shannonrange", variables))));
372                         outputNames.push_back(getOutputFileName("r_shannonrange", variables)); outputTypes["r_shannoneven"].push_back(getOutputFileName("r_shannonrange", variables));
373                                         }else if (Estimators[i] == "smithwilson") {
374                                                 rDisplays.push_back(new RareDisplay(new SmithWilson(), new ThreeColumnFile(getOutputFileName("r_smithwilson",variables))));
375                                                 outputNames.push_back(getOutputFileName("r_smithwilson",variables)); outputTypes["r_smithwilson"].push_back(getOutputFileName("r_smithwilson",variables));
376                                         }else if (Estimators[i] == "npshannon") { 
377                                                 rDisplays.push_back(new RareDisplay(new NPShannon(), new ThreeColumnFile(getOutputFileName("r_npshannon",variables))));
378                                                 outputNames.push_back(getOutputFileName("r_npshannon",variables)); outputTypes["r_npshannon"].push_back(getOutputFileName("r_npshannon",variables));
379                                         }else if (Estimators[i] == "simpson") { 
380                                                 rDisplays.push_back(new RareDisplay(new Simpson(), new ThreeColumnFile(getOutputFileName("r_simpson",variables))));
381                                                 outputNames.push_back(getOutputFileName("r_simpson",variables)); outputTypes["r_simpson"].push_back(getOutputFileName("r_simpson",variables));
382                                         }else if (Estimators[i] == "simpsoneven") { 
383                                                 rDisplays.push_back(new RareDisplay(new SimpsonEven(), new ThreeColumnFile(getOutputFileName("r_simpsoneven",variables))));
384                                                 outputNames.push_back(getOutputFileName("r_simpsoneven",variables)); outputTypes["r_simpsoneven"].push_back(getOutputFileName("r_simpsoneven",variables));
385                                         }else if (Estimators[i] == "invsimpson") { 
386                                                 rDisplays.push_back(new RareDisplay(new InvSimpson(), new ThreeColumnFile(getOutputFileName("r_invsimpson",variables))));
387                                                 outputNames.push_back(getOutputFileName("r_invsimpson",variables)); outputTypes["r_invsimpson"].push_back(getOutputFileName("r_invsimpson",variables));
388                                         }else if (Estimators[i] == "bootstrap") { 
389                                                 rDisplays.push_back(new RareDisplay(new Bootstrap(), new ThreeColumnFile(getOutputFileName("r_bootstrap",variables))));
390                                                 outputNames.push_back(getOutputFileName("r_bootstrap",variables)); outputTypes["r_bootstrap"].push_back(getOutputFileName("r_bootstrap",variables));
391                                         }else if (Estimators[i] == "coverage") { 
392                                                 rDisplays.push_back(new RareDisplay(new Coverage(), new ThreeColumnFile(getOutputFileName("r_coverage",variables))));
393                                                 outputNames.push_back(getOutputFileName("r_coverage",variables)); outputTypes["r_coverage"].push_back(getOutputFileName("r_coverage",variables));
394                                         }else if (Estimators[i] == "nseqs") { 
395                                                 rDisplays.push_back(new RareDisplay(new NSeqs(), new ThreeColumnFile(getOutputFileName("r_nseqs",variables))));
396                                                 outputNames.push_back(getOutputFileName("r_nseqs",variables)); outputTypes["r_nseqs"].push_back(getOutputFileName("r_nseqs",variables));
397                                         }
398                     if (inputFileNames.size() > 1) { file2Group[outputNames.size()-1] = groups[p]; }
399                                 }
400                         }
401                         
402                         
403                         //if the users entered no valid calculators don't execute command
404                         if (rDisplays.size() == 0) { for(int i=0;i<rDisplays.size();i++){       delete rDisplays[i];    }  return 0; }
405                         
406                         input = new InputData(inputFileNames[p], format);                       
407                         order = input->getOrderVector();
408                         string lastLabel = order->getLabel();
409                         
410                         //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
411                         set<string> processedLabels;
412                         set<string> userLabels = labels;
413                         
414                         if (m->control_pressed) { for(int i=0;i<rDisplays.size();i++){  delete rDisplays[i];    }  delete input;  delete order;  for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
415                         
416                         //as long as you are not at the end of the file or done wih the lines you want
417                         while((order != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
418                                 
419                                 if (m->control_pressed) { for(int i=0;i<rDisplays.size();i++){  delete rDisplays[i];    }  delete input;  delete order;  for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
420
421                                 
422                                 if(allLines == 1 || labels.count(order->getLabel()) == 1){
423                                         
424                                         m->mothurOut(order->getLabel()); m->mothurOutEndLine();
425                     map<string, set<int> >::iterator itEndings = labelToEnds.find(order->getLabel());
426                     set<int> ends;
427                     if (itEndings != labelToEnds.end()) { ends = itEndings->second; }
428                                         rCurve = new Rarefact(order, rDisplays, processors, ends);
429                                         rCurve->getCurve(freq, nIters);
430                                         delete rCurve;
431                                         
432                                         processedLabels.insert(order->getLabel());
433                                         userLabels.erase(order->getLabel());
434                                 }
435                                 
436                                 if ((m->anyLabelsToProcess(order->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
437                                         string saveLabel = order->getLabel();
438                                         
439                                         delete order;
440                                         order = (input->getOrderVector(lastLabel));
441                                         
442                                         m->mothurOut(order->getLabel()); m->mothurOutEndLine();
443                                         map<string, set<int> >::iterator itEndings = labelToEnds.find(order->getLabel());
444                     set<int> ends;
445                     if (itEndings != labelToEnds.end()) { ends = itEndings->second; }
446                                         rCurve = new Rarefact(order, rDisplays, processors, ends);
447
448                                         rCurve->getCurve(freq, nIters);
449                                         delete rCurve;
450                                         
451                                         processedLabels.insert(order->getLabel());
452                                         userLabels.erase(order->getLabel());
453                                         
454                                         //restore real lastlabel to save below
455                                         order->setLabel(saveLabel);
456                                 }
457                                 
458                                 lastLabel = order->getLabel();          
459                                 
460                                 delete order;
461                                 order = (input->getOrderVector());
462                         }
463                         
464                         if (m->control_pressed) { for(int i=0;i<rDisplays.size();i++){  delete rDisplays[i];    }  delete input;   for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]); } return 0; }
465
466                         //output error messages about any remaining user labels
467                         set<string>::iterator it;
468                         bool needToRun = false;
469                         for (it = userLabels.begin(); it != userLabels.end(); it++) {  
470                                 m->mothurOut("Your file does not include the label " + *it);
471                                 if (processedLabels.count(lastLabel) != 1) {
472                                         m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
473                                         needToRun = true;
474                                 }else {
475                                         m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
476                                 }
477                         }
478                         
479                         if (m->control_pressed) { for(int i=0;i<rDisplays.size();i++){  delete rDisplays[i];    }  delete input;   for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]); } return 0; }
480
481                         //run last label if you need to
482                         if (needToRun == true)  {
483                                 if (order != NULL) {    delete order;   }
484                                 order = (input->getOrderVector(lastLabel));
485                                 
486                                 m->mothurOut(order->getLabel()); m->mothurOutEndLine();
487                                 map<string, set<int> >::iterator itEndings = labelToEnds.find(order->getLabel());
488                 set<int> ends;
489                 if (itEndings != labelToEnds.end()) { ends = itEndings->second; }
490                 rCurve = new Rarefact(order, rDisplays, processors, ends);
491
492                                 rCurve->getCurve(freq, nIters);
493                                 delete rCurve;
494                                 
495                                 delete order;
496                         }
497                         
498                         
499                         for(int i=0;i<rDisplays.size();i++){    delete rDisplays[i];    }       
500                         rDisplays.clear();
501                         delete input;  
502                 }
503                 
504                 
505                 if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]); } return 0; }
506
507                 //create summary file containing all the groups data for each label - this function just combines the info from the files already created.
508                 if ((sharedfile != "") && (groupMode)) {   outputNames = createGroupFile(outputNames, file2Group);  }
509
510                 if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]); } return 0; }
511
512                 m->mothurOutEndLine();
513                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
514                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
515                 m->mothurOutEndLine();
516
517                 return 0;
518         }
519         catch(exception& e) {
520                 m->errorOut(e, "RareFactCommand", "execute");
521                 exit(1);
522         }
523 }
524 //**********************************************************************************************************************
525 vector<string> RareFactCommand::createGroupFile(vector<string>& outputNames, map<int, string> file2Group) {
526         try {
527                 
528                 vector<string> newFileNames;
529                 
530                 //find different types of files
531                 map<string, map<string, string> > typesFiles;
532         map<string, vector< vector<string> > > fileLabels; //combofile name to labels. each label is a vector because it may be unique lci hci.
533         vector<string> groupNames;
534                 for (int i = 0; i < outputNames.size(); i++) {
535             
536                         string extension = m->getExtension(outputNames[i]);
537             string combineFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + "groups" + extension;
538                         m->mothurRemove(combineFileName); //remove old file
539             
540                         ifstream in;
541                         m->openInputFile(outputNames[i], in);
542                         
543                         string labels = m->getline(in);
544             
545                         istringstream iss (labels,istringstream::in);
546             string newLabel = ""; vector<string> theseLabels;
547             while(!iss.eof()) {  iss >> newLabel; m->gobble(iss); theseLabels.push_back(newLabel); }
548             vector< vector<string> > allLabels;
549             vector<string> thisSet; thisSet.push_back(theseLabels[0]); allLabels.push_back(thisSet); thisSet.clear(); //makes "numSampled" its own grouping
550             for (int j = 1; j < theseLabels.size()-1; j++) {
551                 if (theseLabels[j+1] == "lci") {
552                     thisSet.push_back(theseLabels[j]); 
553                     thisSet.push_back(theseLabels[j+1]); 
554                     thisSet.push_back(theseLabels[j+2]);
555                     j++; j++;
556                 }else{ //no lci or hci for this calc.
557                     thisSet.push_back(theseLabels[j]); 
558                 }
559                 allLabels.push_back(thisSet); 
560                 thisSet.clear();
561             }
562             fileLabels[combineFileName] = allLabels;
563                     
564             map<string, map<string, string> >::iterator itfind = typesFiles.find(extension);
565             if (itfind != typesFiles.end()) {
566                 (itfind->second)[outputNames[i]] = file2Group[i];
567             }else {
568                 map<string, string> temp;  
569                 temp[outputNames[i]] = file2Group[i];
570                 typesFiles[extension] = temp;
571             }
572             if (!(m->inUsersGroups(file2Group[i], groupNames))) {  groupNames.push_back(file2Group[i]); }
573                 }
574                 
575                 //for each type create a combo file
576                 
577                 for (map<string, map<string, string> >::iterator it = typesFiles.begin(); it != typesFiles.end(); it++) {
578                         
579                         ofstream out;
580                         string combineFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + "groups" + it->first;
581                         m->openOutputFileAppend(combineFileName, out);
582                         newFileNames.push_back(combineFileName);
583                         map<string, string> thisTypesFiles = it->second; //it->second maps filename to group
584             set<int> numSampledSet;
585             
586                         //open each type summary file
587                         map<string, map<int, vector< vector<string> > > > files; //maps file name to lines in file
588                         int maxLines = 0;
589                         for (map<string, string>::iterator itFileNameGroup = thisTypesFiles.begin(); itFileNameGroup != thisTypesFiles.end(); itFileNameGroup++) {
590                 
591                 string thisfilename = itFileNameGroup->first;
592                 string group = itFileNameGroup->second;
593                 
594                                 ifstream temp;
595                                 m->openInputFile(thisfilename, temp);
596                                 
597                                 //read through first line - labels
598                                 m->getline(temp);       m->gobble(temp);
599                                 
600                                 map<int, vector< vector<string> > > thisFilesLines;
601                                 while (!temp.eof()){
602                     int numSampled = 0;
603                     temp >> numSampled; m->gobble(temp);
604                 
605                     vector< vector<string> > theseReads;
606                     vector<string> thisSet; thisSet.push_back(toString(numSampled)); theseReads.push_back(thisSet); thisSet.clear();
607                     for (int k = 1; k < fileLabels[combineFileName].size(); k++) { //output thing like 0.03-A lci-A hci-A
608                         vector<string> reads;
609                         string next = "";
610                         for (int l = 0; l < fileLabels[combineFileName][k].size(); l++) { //output modified labels
611                             temp >> next; m->gobble(temp);
612                             reads.push_back(next);
613                         }
614                         theseReads.push_back(reads);
615                     }
616                     thisFilesLines[numSampled] = theseReads;
617                     m->gobble(temp);
618                    
619                     numSampledSet.insert(numSampled);
620                                 }
621                                 
622                                 files[group] = thisFilesLines;
623                                 
624                                 //save longest file for below
625                                 if (maxLines < thisFilesLines.size()) { maxLines = thisFilesLines.size(); }
626                                 
627                                 temp.close();
628                                 m->mothurRemove(thisfilename);
629                         }
630                         
631             //output new labels line
632             out << fileLabels[combineFileName][0][0] << '\t';
633             for (int k = 1; k < fileLabels[combineFileName].size(); k++) { //output thing like 0.03-A lci-A hci-A
634                 for (int n = 0; n < groupNames.size(); n++) { // for each group
635                     for (int l = 0; l < fileLabels[combineFileName][k].size(); l++) { //output modified labels
636                         out << fileLabels[combineFileName][k][l] << '-' << groupNames[n] << '\t';
637                     }
638                 }
639             }
640                         out << endl;
641             
642                         //for each label
643                         for (set<int>::iterator itNumSampled = numSampledSet.begin(); itNumSampled != numSampledSet.end(); itNumSampled++) {
644                                 
645                 out << (*itNumSampled) << '\t';
646                                
647                 if (m->control_pressed) { break; }
648                 
649                 for (int k = 1; k < fileLabels[combineFileName].size(); k++) { //each chunk
650                                     //grab data for each group
651                     for (map<string, map<int, vector< vector<string> > > >::iterator itFileNameGroup = files.begin(); itFileNameGroup != files.end(); itFileNameGroup++) {
652                         
653                         string group = itFileNameGroup->first;
654                        
655                         map<int, vector< vector<string> > >::iterator itLine = files[group].find(*itNumSampled);
656                         if (itLine != files[group].end()) { 
657                             for (int l = 0; l < (itLine->second)[k].size(); l++) { 
658                                 out << (itLine->second)[k][l] << '\t';
659                                
660                             }                             
661                         }else { 
662                             for (int l = 0; l < fileLabels[combineFileName][k].size(); l++) { 
663                                 out << "NA" << '\t';
664                             } 
665                         }
666                     }
667                 }
668                 out << endl;
669                         }       
670                         out.close();
671                 }
672                 
673                 //return combine file name
674                 return newFileNames;
675                 
676         }
677         catch(exception& e) {
678                 m->errorOut(e, "RareFactCommand", "createGroupFile");
679                 exit(1);
680         }
681 }
682 //**********************************************************************************************************************
683 vector<string> RareFactCommand::parseSharedFile(string filename, map<string, set<int> >& label2Ends) {
684         try {
685                 vector<string> filenames;
686                 
687                 map<string, ofstream*> filehandles;
688                 map<string, ofstream*>::iterator it3;
689                 
690                 input = new InputData(filename, "sharedfile");
691                 vector<SharedRAbundVector*> lookup = input->getSharedRAbundVectors();
692                 
693                 string sharedFileRoot = m->getRootName(filename);
694                 
695                 //clears file before we start to write to it below
696                 for (int i=0; i<lookup.size(); i++) {
697                         m->mothurRemove((sharedFileRoot + lookup[i]->getGroup() + ".rabund"));
698                         filenames.push_back((sharedFileRoot + lookup[i]->getGroup() + ".rabund"));
699                 }
700                 
701                 ofstream* temp;
702                 for (int i=0; i<lookup.size(); i++) {
703                         temp = new ofstream;
704                         filehandles[lookup[i]->getGroup()] = temp;
705                         groups.push_back(lookup[i]->getGroup());
706                 }
707
708                 while(lookup[0] != NULL) {
709                 
710                         for (int i = 0; i < lookup.size(); i++) {
711                                 RAbundVector rav = lookup[i]->getRAbundVector();
712                                 m->openOutputFileAppend(sharedFileRoot + lookup[i]->getGroup() + ".rabund", *(filehandles[lookup[i]->getGroup()]));
713                                 rav.print(*(filehandles[lookup[i]->getGroup()]));
714                                 (*(filehandles[lookup[i]->getGroup()])).close();
715                 label2Ends[lookup[i]->getLabel()].insert(rav.getNumSeqs());
716                         }
717                 
718                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
719                         lookup = input->getSharedRAbundVectors();
720                 }
721                 
722                 //free memory
723                 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
724                         delete it3->second;
725                 }
726                 
727                 delete input;
728                 m->clearGroups();
729
730                 return filenames;
731         }
732         catch(exception& e) {
733                 m->errorOut(e, "RareFactCommand", "parseSharedFile");
734                 exit(1);
735         }
736 }
737 //**********************************************************************************************************************
738
739
740