]> git.donarmstrong.com Git - mothur.git/blob - rarefactcommand.cpp
Merge remote-tracking branch 'mothur/master'
[mothur.git] / rarefactcommand.cpp
1 /*
2  *  rarefactcommand.cpp
3  *  Dotur
4  *
5  *  Created by Sarah Westcott on 1/2/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "rarefactcommand.h"
11 #include "ace.h"
12 #include "sobs.h"
13 #include "nseqs.h"
14 #include "chao1.h"
15 #include "bootstrap.h"
16 #include "simpson.h"
17 #include "simpsoneven.h"
18 #include "heip.h"
19 #include "smithwilson.h"
20 #include "invsimpson.h"
21 #include "npshannon.h"
22 #include "shannoneven.h"
23 #include "shannon.h"
24 #include "jackknife.h"
25 #include "coverage.h"
26
27
28 //**********************************************************************************************************************
29 vector<string> RareFactCommand::setParameters(){        
30         try {
31                 CommandParameter plist("list", "InputTypes", "", "", "LRSS", "LRSS", "none",false,false); parameters.push_back(plist);
32                 CommandParameter prabund("rabund", "InputTypes", "", "", "LRSS", "LRSS", "none",false,false); parameters.push_back(prabund);
33                 CommandParameter psabund("sabund", "InputTypes", "", "", "LRSS", "LRSS", "none",false,false); parameters.push_back(psabund);
34                 CommandParameter pshared("shared", "InputTypes", "", "", "LRSS", "LRSS", "none",false,false); parameters.push_back(pshared);
35                 CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
36                 CommandParameter pfreq("freq", "Number", "", "100", "", "", "",false,false); parameters.push_back(pfreq);
37                 CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
38                 CommandParameter pcalc("calc", "Multiple", "sobs-chao-nseqs-coverage-ace-jack-shannon-shannoneven-npshannon-heip-smithwilson-simpson-simpsoneven-invsimpson-bootstrap", "sobs", "", "", "",true,false); parameters.push_back(pcalc);
39                 CommandParameter pabund("abund", "Number", "", "10", "", "", "",false,false); parameters.push_back(pabund);
40                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
41                 CommandParameter pgroupmode("groupmode", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pgroupmode);
42                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
43                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
44                 
45                 vector<string> myArray;
46                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
47                 return myArray;
48         }
49         catch(exception& e) {
50                 m->errorOut(e, "RareFactCommand", "setParameters");
51                 exit(1);
52         }
53 }
54 //**********************************************************************************************************************
55 string RareFactCommand::getHelpString(){        
56         try {
57                 ValidCalculators validCalculator;
58                 string helpString = "";
59                 helpString += "The rarefaction.single command parameters are list, sabund, rabund, shared, label, iters, freq, calc, processors and abund.  list, sabund, rabund or shared is required unless you have a valid current file. \n";
60                 helpString += "The freq parameter is used indicate when to output your data, by default it is set to 100. But you can set it to a percentage of the number of sequence. For example freq=0.10, means 10%. \n";
61                 helpString += "The processors parameter allows you to specify the number of processors to use. The default is 1.\n";
62                 helpString += "The rarefaction.single command should be in the following format: \n";
63                 helpString += "rarefaction.single(label=yourLabel, iters=yourIters, freq=yourFreq, calc=yourEstimators).\n";
64                 helpString += "Example rarefaction.single(label=unique-.01-.03, iters=10000, freq=10, calc=sobs-rchao-race-rjack-rbootstrap-rshannon-rnpshannon-rsimpson).\n";
65                 helpString += "The default values for iters is 1000, freq is 100, and calc is rarefaction which calculates the rarefaction curve for the observed richness.\n";
66                 validCalculator.printCalc("rarefaction");
67                 helpString += "If you are running rarefaction.single with a shared file and would like your results collated in one file, set groupmode=t. (Default=true).\n";
68                 helpString += "The label parameter is used to analyze specific labels in your input.\n";
69                 helpString += "Note: No spaces between parameter labels (i.e. freq), '=' and parameters (i.e.yourFreq).\n";
70                 return helpString;
71         }
72         catch(exception& e) {
73                 m->errorOut(e, "RareFactCommand", "getHelpString");
74                 exit(1);
75         }
76 }
77
78 //**********************************************************************************************************************
79 RareFactCommand::RareFactCommand(){     
80         try {
81                 abort = true; calledHelp = true; 
82                 setParameters();
83                 vector<string> tempOutNames;
84                 outputTypes["rarefaction"] = tempOutNames;
85                 outputTypes["r_chao"] = tempOutNames;
86                 outputTypes["r_ace"] = tempOutNames;
87                 outputTypes["r_jack"] = tempOutNames;
88                 outputTypes["r_shannon"] = tempOutNames;
89                 outputTypes["r_shannoneven"] = tempOutNames;
90                 outputTypes["r_heip"] = tempOutNames;
91                 outputTypes["r_smithwilson"] = tempOutNames;
92                 outputTypes["r_npshannon"] = tempOutNames;
93                 outputTypes["r_simpson"] = tempOutNames;
94                 outputTypes["r_simpsoneven"] = tempOutNames;
95                 outputTypes["r_invsimpson"] = tempOutNames;
96                 outputTypes["r_bootstrap"] = tempOutNames;
97                 outputTypes["r_coverage"] = tempOutNames;
98                 outputTypes["r_nseqs"] = tempOutNames;
99         }
100         catch(exception& e) {
101                 m->errorOut(e, "RareFactCommand", "RareFactCommand");
102                 exit(1);
103         }
104 }
105 //**********************************************************************************************************************
106 RareFactCommand::RareFactCommand(string option)  {
107         try {
108                 abort = false; calledHelp = false;   
109                 allLines = 1;
110                                                 
111                 //allow user to run help
112                 if(option == "help") { help(); abort = true; calledHelp = true; }
113                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
114                 
115                 else {
116                         vector<string> myArray = setParameters();
117                         
118                         OptionParser parser(option);
119                         map<string,string> parameters = parser.getParameters();
120                         map<string,string>::iterator it;
121                         
122                         ValidParameters validParameter;
123                 
124                         //check to make sure all parameters are valid for command
125                         for (it = parameters.begin(); it != parameters.end(); it++) { 
126                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
127                         }
128                         
129                         //initialize outputTypes
130                         vector<string> tempOutNames;
131                         outputTypes["rarefaction"] = tempOutNames;
132                         outputTypes["r_chao"] = tempOutNames;
133                         outputTypes["r_ace"] = tempOutNames;
134                         outputTypes["r_jack"] = tempOutNames;
135                         outputTypes["r_shannon"] = tempOutNames;
136                         outputTypes["r_shannoneven"] = tempOutNames;
137                         outputTypes["r_heip"] = tempOutNames;
138                         outputTypes["r_smithwilson"] = tempOutNames;
139                         outputTypes["r_npshannon"] = tempOutNames;
140                         outputTypes["r_simpson"] = tempOutNames;
141                         outputTypes["r_simpsoneven"] = tempOutNames;
142                         outputTypes["r_invsimpson"] = tempOutNames;
143                         outputTypes["r_bootstrap"] = tempOutNames;
144                         outputTypes["r_coverage"] = tempOutNames;
145                         outputTypes["r_nseqs"] = tempOutNames;
146                         
147                         //if the user changes the input directory command factory will send this info to us in the output parameter 
148                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
149                         if (inputDir == "not found"){   inputDir = "";          }
150                         else {
151                                 string path;
152                                 it = parameters.find("shared");
153                                 //user has given a template file
154                                 if(it != parameters.end()){ 
155                                         path = m->hasPath(it->second);
156                                         //if the user has not given a path then, add inputdir. else leave path alone.
157                                         if (path == "") {       parameters["shared"] = inputDir + it->second;           }
158                                 }
159                                 
160                                 it = parameters.find("rabund");
161                                 //user has given a template file
162                                 if(it != parameters.end()){ 
163                                         path = m->hasPath(it->second);
164                                         //if the user has not given a path then, add inputdir. else leave path alone.
165                                         if (path == "") {       parameters["rabund"] = inputDir + it->second;           }
166                                 }
167                                 
168                                 it = parameters.find("sabund");
169                                 //user has given a template file
170                                 if(it != parameters.end()){ 
171                                         path = m->hasPath(it->second);
172                                         //if the user has not given a path then, add inputdir. else leave path alone.
173                                         if (path == "") {       parameters["sabund"] = inputDir + it->second;           }
174                                 }
175                                 
176                                 it = parameters.find("list");
177                                 //user has given a template file
178                                 if(it != parameters.end()){ 
179                                         path = m->hasPath(it->second);
180                                         //if the user has not given a path then, add inputdir. else leave path alone.
181                                         if (path == "") {       parameters["list"] = inputDir + it->second;             }
182                                 }
183                         }
184                         
185                         //check for required parameters
186                         listfile = validParameter.validFile(parameters, "list", true);
187                         if (listfile == "not open") { listfile = ""; abort = true; }
188                         else if (listfile == "not found") { listfile = ""; }
189                         else {  format = "list"; inputfile = listfile; m->setListFile(listfile); }
190                         
191                         sabundfile = validParameter.validFile(parameters, "sabund", true);
192                         if (sabundfile == "not open") { sabundfile = ""; abort = true; }        
193                         else if (sabundfile == "not found") { sabundfile = ""; }
194                         else {  format = "sabund"; inputfile = sabundfile; m->setSabundFile(sabundfile); }
195                         
196                         rabundfile = validParameter.validFile(parameters, "rabund", true);
197                         if (rabundfile == "not open") { rabundfile = ""; abort = true; }        
198                         else if (rabundfile == "not found") { rabundfile = ""; }
199                         else {  format = "rabund"; inputfile = rabundfile; m->setRabundFile(rabundfile); }
200                         
201                         sharedfile = validParameter.validFile(parameters, "shared", true);
202                         if (sharedfile == "not open") { sharedfile = ""; abort = true; }        
203                         else if (sharedfile == "not found") { sharedfile = ""; }
204                         else {  format = "sharedfile"; inputfile = sharedfile; m->setSharedFile(sharedfile); }
205                                 
206                         if ((sharedfile == "") && (listfile == "") && (rabundfile == "") && (sabundfile == "")) { 
207                                 //is there are current file available for any of these?
208                                 //give priority to shared, then list, then rabund, then sabund
209                                 //if there is a current shared file, use it
210                                 sharedfile = m->getSharedFile(); 
211                                 if (sharedfile != "") { inputfile = sharedfile; format = "sharedfile"; m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
212                                 else { 
213                                         listfile = m->getListFile(); 
214                                         if (listfile != "") { inputfile = listfile; format = "list"; m->mothurOut("Using " + listfile + " as input file for the list parameter."); m->mothurOutEndLine(); }
215                                         else { 
216                                                 rabundfile = m->getRabundFile(); 
217                                                 if (rabundfile != "") { inputfile = rabundfile; format = "rabund"; m->mothurOut("Using " + rabundfile + " as input file for the rabund parameter."); m->mothurOutEndLine(); }
218                                                 else { 
219                                                         sabundfile = m->getSabundFile(); 
220                                                         if (sabundfile != "") { inputfile = sabundfile; format = "sabund"; m->mothurOut("Using " + sabundfile + " as input file for the sabund parameter."); m->mothurOutEndLine(); }
221                                                         else { 
222                                                                 m->mothurOut("No valid current files. You must provide a list, sabund, rabund or shared file before you can use the collect.single command."); m->mothurOutEndLine(); 
223                                                                 abort = true;
224                                                         }
225                                                 }
226                                         }
227                                 }
228                         }
229                         
230                         //if the user changes the output directory command factory will send this info to us in the output parameter 
231                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(inputfile);              }
232
233                         //check for optional parameter and set defaults
234                         // ...at some point should added some additional type checking...
235                         label = validParameter.validFile(parameters, "label", false);                   
236                         if (label == "not found") { label = ""; }
237                         else { 
238                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
239                                 else { allLines = 1;  }
240                         }
241                                 
242                         calc = validParameter.validFile(parameters, "calc", false);                     
243                         if (calc == "not found") { calc = "sobs";  }
244                         else { 
245                                  if (calc == "default")  {  calc = "sobs";  }
246                         }
247                         m->splitAtDash(calc, Estimators);
248                         if (m->inUsersGroups("citation", Estimators)) { 
249                                 ValidCalculators validCalc; validCalc.printCitations(Estimators); 
250                                 //remove citation from list of calcs
251                                 for (int i = 0; i < Estimators.size(); i++) { if (Estimators[i] == "citation") {  Estimators.erase(Estimators.begin()+i); break; } }
252                         }
253
254                         string temp;
255                         temp = validParameter.validFile(parameters, "freq", false);                     if (temp == "not found") { temp = "100"; }
256                         m->mothurConvert(temp, freq); 
257                         
258                         temp = validParameter.validFile(parameters, "abund", false);                    if (temp == "not found") { temp = "10"; }
259                         m->mothurConvert(temp, abund); 
260                         
261                         temp = validParameter.validFile(parameters, "iters", false);                    if (temp == "not found") { temp = "1000"; }
262                         m->mothurConvert(temp, nIters); 
263                         
264                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
265                         m->setProcessors(temp);
266                         m->mothurConvert(temp, processors);
267                         
268                         temp = validParameter.validFile(parameters, "groupmode", false);                if (temp == "not found") { temp = "T"; }
269                         groupMode = m->isTrue(temp);
270                 }
271                 
272         }
273         catch(exception& e) {
274                 m->errorOut(e, "RareFactCommand", "RareFactCommand");
275                 exit(1);
276         }
277 }
278 //**********************************************************************************************************************
279
280 int RareFactCommand::execute(){
281         try {
282         
283                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
284                 
285         map<string, set<int> > labelToEnds;
286                 if ((format != "sharedfile")) { inputFileNames.push_back(inputfile);  }
287                 else {  inputFileNames = parseSharedFile(sharedfile, labelToEnds);  format = "rabund"; }
288         
289         if (m->control_pressed) { return 0; }
290                 
291                 map<int, string> file2Group; //index in outputNames[i] -> group
292                 for (int p = 0; p < inputFileNames.size(); p++) {
293                         
294                         string fileNameRoot = outputDir + m->getRootName(m->getSimpleName(inputFileNames[p]));
295                                                 
296                         if (m->control_pressed) {  outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]);        }  m->clearGroups();  return 0; }
297                         
298                         if (inputFileNames.size() > 1) {
299                                 m->mothurOutEndLine(); m->mothurOut("Processing group " + groups[p]); m->mothurOutEndLine(); m->mothurOutEndLine();
300                         }
301                         int i;
302                         ValidCalculators validCalculator;
303                         
304                           
305                         for (i=0; i<Estimators.size(); i++) {
306                                 if (validCalculator.isValidCalculator("rarefaction", Estimators[i]) == true) { 
307                                         if (Estimators[i] == "sobs") { 
308                                                 rDisplays.push_back(new RareDisplay(new Sobs(), new ThreeColumnFile(fileNameRoot+"rarefaction")));
309                                                 outputNames.push_back(fileNameRoot+"rarefaction"); outputTypes["rarefaction"].push_back(fileNameRoot+"rarefaction");
310                                         }else if (Estimators[i] == "chao") { 
311                                                 rDisplays.push_back(new RareDisplay(new Chao1(), new ThreeColumnFile(fileNameRoot+"r_chao")));
312                                                 outputNames.push_back(fileNameRoot+"r_chao"); outputTypes["r_chao"].push_back(fileNameRoot+"r_chao");
313                                         }else if (Estimators[i] == "ace") { 
314                                                 if(abund < 5)
315                                                         abund = 10;
316                                                 rDisplays.push_back(new RareDisplay(new Ace(abund), new ThreeColumnFile(fileNameRoot+"r_ace")));
317                                                 outputNames.push_back(fileNameRoot+"r_ace"); outputTypes["r_ace"].push_back(fileNameRoot+"r_ace");
318                                         }else if (Estimators[i] == "jack") { 
319                                                 rDisplays.push_back(new RareDisplay(new Jackknife(), new ThreeColumnFile(fileNameRoot+"r_jack")));
320                                                 outputNames.push_back(fileNameRoot+"r_jack"); outputTypes["r_jack"].push_back(fileNameRoot+"r_jack");
321                                         }else if (Estimators[i] == "shannon") { 
322                                                 rDisplays.push_back(new RareDisplay(new Shannon(), new ThreeColumnFile(fileNameRoot+"r_shannon")));
323                                                 outputNames.push_back(fileNameRoot+"r_shannon"); outputTypes["r_shannon"].push_back(fileNameRoot+"r_shannon");
324                                         }else if (Estimators[i] == "shannoneven") { 
325                                                 rDisplays.push_back(new RareDisplay(new ShannonEven(), new ThreeColumnFile(fileNameRoot+"r_shannoneven")));
326                                                 outputNames.push_back(fileNameRoot+"r_shannoneven"); outputTypes["r_shannoneven"].push_back(fileNameRoot+"r_shannoneven");
327                                         }else if (Estimators[i] == "heip") { 
328                                                 rDisplays.push_back(new RareDisplay(new Heip(), new ThreeColumnFile(fileNameRoot+"r_heip")));
329                                                 outputNames.push_back(fileNameRoot+"r_heip"); outputTypes["r_heip"].push_back(fileNameRoot+"r_heip");
330                                         }else if (Estimators[i] == "smithwilson") { 
331                                                 rDisplays.push_back(new RareDisplay(new SmithWilson(), new ThreeColumnFile(fileNameRoot+"r_smithwilson")));
332                                                 outputNames.push_back(fileNameRoot+"r_smithwilson"); outputTypes["r_smithwilson"].push_back(fileNameRoot+"r_smithwilson");
333                                         }else if (Estimators[i] == "npshannon") { 
334                                                 rDisplays.push_back(new RareDisplay(new NPShannon(), new ThreeColumnFile(fileNameRoot+"r_npshannon")));
335                                                 outputNames.push_back(fileNameRoot+"r_npshannon"); outputTypes["r_npshannon"].push_back(fileNameRoot+"r_npshannon");
336                                         }else if (Estimators[i] == "simpson") { 
337                                                 rDisplays.push_back(new RareDisplay(new Simpson(), new ThreeColumnFile(fileNameRoot+"r_simpson")));
338                                                 outputNames.push_back(fileNameRoot+"r_simpson"); outputTypes["r_simpson"].push_back(fileNameRoot+"r_simpson");
339                                         }else if (Estimators[i] == "simpsoneven") { 
340                                                 rDisplays.push_back(new RareDisplay(new SimpsonEven(), new ThreeColumnFile(fileNameRoot+"r_simpsoneven")));
341                                                 outputNames.push_back(fileNameRoot+"r_simpsoneven"); outputTypes["r_simpsoneven"].push_back(fileNameRoot+"r_simpsoneven");
342                                         }else if (Estimators[i] == "invsimpson") { 
343                                                 rDisplays.push_back(new RareDisplay(new InvSimpson(), new ThreeColumnFile(fileNameRoot+"r_invsimpson")));
344                                                 outputNames.push_back(fileNameRoot+"r_invsimpson"); outputTypes["r_invsimpson"].push_back(fileNameRoot+"r_invsimpson");
345                                         }else if (Estimators[i] == "bootstrap") { 
346                                                 rDisplays.push_back(new RareDisplay(new Bootstrap(), new ThreeColumnFile(fileNameRoot+"r_bootstrap")));
347                                                 outputNames.push_back(fileNameRoot+"r_bootstrap"); outputTypes["r_bootstrap"].push_back(fileNameRoot+"r_bootstrap");
348                                         }else if (Estimators[i] == "coverage") { 
349                                                 rDisplays.push_back(new RareDisplay(new Coverage(), new ThreeColumnFile(fileNameRoot+"r_coverage")));
350                                                 outputNames.push_back(fileNameRoot+"r_coverage"); outputTypes["r_coverage"].push_back(fileNameRoot+"r_coverage");
351                                         }else if (Estimators[i] == "nseqs") { 
352                                                 rDisplays.push_back(new RareDisplay(new NSeqs(), new ThreeColumnFile(fileNameRoot+"r_nseqs")));
353                                                 outputNames.push_back(fileNameRoot+"r_nseqs"); outputTypes["r_nseqs"].push_back(fileNameRoot+"r_nseqs");
354                                         }
355                     if (inputFileNames.size() > 1) { file2Group[outputNames.size()-1] = groups[p]; }
356                                 }
357                         }
358                         
359                         
360                         //if the users entered no valid calculators don't execute command
361                         if (rDisplays.size() == 0) { for(int i=0;i<rDisplays.size();i++){       delete rDisplays[i];    }  return 0; }
362                         
363                         input = new InputData(inputFileNames[p], format);                       
364                         order = input->getOrderVector();
365                         string lastLabel = order->getLabel();
366                         
367                         //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
368                         set<string> processedLabels;
369                         set<string> userLabels = labels;
370                         
371                         if (m->control_pressed) { for(int i=0;i<rDisplays.size();i++){  delete rDisplays[i];    }  delete input;  delete order;  for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
372                         
373                         //as long as you are not at the end of the file or done wih the lines you want
374                         while((order != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
375                                 
376                                 if (m->control_pressed) { for(int i=0;i<rDisplays.size();i++){  delete rDisplays[i];    }  delete input;  delete order;  for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
377
378                                 
379                                 if(allLines == 1 || labels.count(order->getLabel()) == 1){
380                                         
381                                         m->mothurOut(order->getLabel()); m->mothurOutEndLine();
382                     map<string, set<int> >::iterator itEndings = labelToEnds.find(order->getLabel());
383                     set<int> ends;
384                     if (itEndings != labelToEnds.end()) { ends = itEndings->second; }
385                                         rCurve = new Rarefact(order, rDisplays, processors, ends);
386                                         rCurve->getCurve(freq, nIters);
387                                         delete rCurve;
388                                         
389                                         processedLabels.insert(order->getLabel());
390                                         userLabels.erase(order->getLabel());
391                                 }
392                                 
393                                 if ((m->anyLabelsToProcess(order->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
394                                         string saveLabel = order->getLabel();
395                                         
396                                         delete order;
397                                         order = (input->getOrderVector(lastLabel));
398                                         
399                                         m->mothurOut(order->getLabel()); m->mothurOutEndLine();
400                                         map<string, set<int> >::iterator itEndings = labelToEnds.find(order->getLabel());
401                     set<int> ends;
402                     if (itEndings != labelToEnds.end()) { ends = itEndings->second; }
403                                         rCurve = new Rarefact(order, rDisplays, processors, ends);
404
405                                         rCurve->getCurve(freq, nIters);
406                                         delete rCurve;
407                                         
408                                         processedLabels.insert(order->getLabel());
409                                         userLabels.erase(order->getLabel());
410                                         
411                                         //restore real lastlabel to save below
412                                         order->setLabel(saveLabel);
413                                 }
414                                 
415                                 lastLabel = order->getLabel();          
416                                 
417                                 delete order;
418                                 order = (input->getOrderVector());
419                         }
420                         
421                         if (m->control_pressed) { for(int i=0;i<rDisplays.size();i++){  delete rDisplays[i];    }  delete input;   for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]); } return 0; }
422
423                         //output error messages about any remaining user labels
424                         set<string>::iterator it;
425                         bool needToRun = false;
426                         for (it = userLabels.begin(); it != userLabels.end(); it++) {  
427                                 m->mothurOut("Your file does not include the label " + *it);
428                                 if (processedLabels.count(lastLabel) != 1) {
429                                         m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
430                                         needToRun = true;
431                                 }else {
432                                         m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
433                                 }
434                         }
435                         
436                         if (m->control_pressed) { for(int i=0;i<rDisplays.size();i++){  delete rDisplays[i];    }  delete input;   for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]); } return 0; }
437
438                         //run last label if you need to
439                         if (needToRun == true)  {
440                                 if (order != NULL) {    delete order;   }
441                                 order = (input->getOrderVector(lastLabel));
442                                 
443                                 m->mothurOut(order->getLabel()); m->mothurOutEndLine();
444                                 map<string, set<int> >::iterator itEndings = labelToEnds.find(order->getLabel());
445                 set<int> ends;
446                 if (itEndings != labelToEnds.end()) { ends = itEndings->second; }
447                 rCurve = new Rarefact(order, rDisplays, processors, ends);
448
449                                 rCurve->getCurve(freq, nIters);
450                                 delete rCurve;
451                                 
452                                 delete order;
453                         }
454                         
455                         
456                         for(int i=0;i<rDisplays.size();i++){    delete rDisplays[i];    }       
457                         rDisplays.clear();
458                         delete input;  
459                 }
460                 
461                 
462                 if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]); } return 0; }
463
464                 //create summary file containing all the groups data for each label - this function just combines the info from the files already created.
465                 if ((sharedfile != "") && (groupMode)) {   outputNames = createGroupFile(outputNames, file2Group);  }
466
467                 if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]); } return 0; }
468
469                 m->mothurOutEndLine();
470                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
471                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
472                 m->mothurOutEndLine();
473
474                 return 0;
475         }
476         catch(exception& e) {
477                 m->errorOut(e, "RareFactCommand", "execute");
478                 exit(1);
479         }
480 }
481 //**********************************************************************************************************************
482 vector<string> RareFactCommand::createGroupFile(vector<string>& outputNames, map<int, string> file2Group) {
483         try {
484                 
485                 vector<string> newFileNames;
486                 
487                 //find different types of files
488                 map<string, map<string, string> > typesFiles;
489         map<string, vector< vector<string> > > fileLabels; //combofile name to labels. each label is a vector because it may be unique lci hci.
490         vector<string> groupNames;
491                 for (int i = 0; i < outputNames.size(); i++) {
492             
493                         string extension = m->getExtension(outputNames[i]);
494             string combineFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + "groups" + extension;
495                         m->mothurRemove(combineFileName); //remove old file
496             
497                         ifstream in;
498                         m->openInputFile(outputNames[i], in);
499                         
500                         string labels = m->getline(in);
501             
502                         istringstream iss (labels,istringstream::in);
503             string newLabel = ""; vector<string> theseLabels;
504             while(!iss.eof()) {  iss >> newLabel; m->gobble(iss); theseLabels.push_back(newLabel); }
505             vector< vector<string> > allLabels;
506             vector<string> thisSet; thisSet.push_back(theseLabels[0]); allLabels.push_back(thisSet); thisSet.clear(); //makes "numSampled" its own grouping
507             for (int j = 1; j < theseLabels.size()-1; j++) {
508                 if (theseLabels[j+1] == "lci") {
509                     thisSet.push_back(theseLabels[j]); 
510                     thisSet.push_back(theseLabels[j+1]); 
511                     thisSet.push_back(theseLabels[j+2]);
512                     j++; j++;
513                 }else{ //no lci or hci for this calc.
514                     thisSet.push_back(theseLabels[j]); 
515                 }
516                 allLabels.push_back(thisSet); 
517                 thisSet.clear();
518             }
519             fileLabels[combineFileName] = allLabels;
520                     
521             map<string, map<string, string> >::iterator itfind = typesFiles.find(extension);
522             if (itfind != typesFiles.end()) {
523                 (itfind->second)[outputNames[i]] = file2Group[i];
524             }else {
525                 map<string, string> temp;  
526                 temp[outputNames[i]] = file2Group[i];
527                 typesFiles[extension] = temp;
528             }
529             if (!(m->inUsersGroups(file2Group[i], groupNames))) {  groupNames.push_back(file2Group[i]); }
530                 }
531                 
532                 //for each type create a combo file
533                 
534                 for (map<string, map<string, string> >::iterator it = typesFiles.begin(); it != typesFiles.end(); it++) {
535                         
536                         ofstream out;
537                         string combineFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + "groups" + it->first;
538                         m->openOutputFileAppend(combineFileName, out);
539                         newFileNames.push_back(combineFileName);
540                         map<string, string> thisTypesFiles = it->second; //it->second maps filename to group
541             set<int> numSampledSet;
542             
543                         //open each type summary file
544                         map<string, map<int, vector< vector<string> > > > files; //maps file name to lines in file
545                         int maxLines = 0;
546                         for (map<string, string>::iterator itFileNameGroup = thisTypesFiles.begin(); itFileNameGroup != thisTypesFiles.end(); itFileNameGroup++) {
547                 
548                 string thisfilename = itFileNameGroup->first;
549                 string group = itFileNameGroup->second;
550                 
551                                 ifstream temp;
552                                 m->openInputFile(thisfilename, temp);
553                                 
554                                 //read through first line - labels
555                                 m->getline(temp);       m->gobble(temp);
556                                 
557                                 map<int, vector< vector<string> > > thisFilesLines;
558                                 while (!temp.eof()){
559                     int numSampled = 0;
560                     temp >> numSampled; m->gobble(temp);
561                 
562                     vector< vector<string> > theseReads;
563                     vector<string> thisSet; thisSet.push_back(toString(numSampled)); theseReads.push_back(thisSet); thisSet.clear();
564                     for (int k = 1; k < fileLabels[combineFileName].size(); k++) { //output thing like 0.03-A lci-A hci-A
565                         vector<string> reads;
566                         string next = "";
567                         for (int l = 0; l < fileLabels[combineFileName][k].size(); l++) { //output modified labels
568                             temp >> next; m->gobble(temp);
569                             reads.push_back(next);
570                         }
571                         theseReads.push_back(reads);
572                     }
573                     thisFilesLines[numSampled] = theseReads;
574                     m->gobble(temp);
575                    
576                     numSampledSet.insert(numSampled);
577                                 }
578                                 
579                                 files[group] = thisFilesLines;
580                                 
581                                 //save longest file for below
582                                 if (maxLines < thisFilesLines.size()) { maxLines = thisFilesLines.size(); }
583                                 
584                                 temp.close();
585                                 m->mothurRemove(thisfilename);
586                         }
587                         
588             //output new labels line
589             out << fileLabels[combineFileName][0][0] << '\t';
590             for (int k = 1; k < fileLabels[combineFileName].size(); k++) { //output thing like 0.03-A lci-A hci-A
591                 for (int n = 0; n < groupNames.size(); n++) { // for each group
592                     for (int l = 0; l < fileLabels[combineFileName][k].size(); l++) { //output modified labels
593                         out << fileLabels[combineFileName][k][l] << '-' << groupNames[n] << '\t';
594                     }
595                 }
596             }
597                         out << endl;
598             
599                         //for each label
600                         for (set<int>::iterator itNumSampled = numSampledSet.begin(); itNumSampled != numSampledSet.end(); itNumSampled++) {
601                                 
602                 out << (*itNumSampled) << '\t';
603                                
604                 if (m->control_pressed) { break; }
605                 
606                 for (int k = 1; k < fileLabels[combineFileName].size(); k++) { //each chunk
607                                     //grab data for each group
608                     for (map<string, map<int, vector< vector<string> > > >::iterator itFileNameGroup = files.begin(); itFileNameGroup != files.end(); itFileNameGroup++) {
609                         
610                         string group = itFileNameGroup->first;
611                        
612                         map<int, vector< vector<string> > >::iterator itLine = files[group].find(*itNumSampled);
613                         if (itLine != files[group].end()) { 
614                             for (int l = 0; l < (itLine->second)[k].size(); l++) { 
615                                 out << (itLine->second)[k][l] << '\t';
616                                
617                             }                             
618                         }else { 
619                             for (int l = 0; l < fileLabels[combineFileName][k].size(); l++) { 
620                                 out << "NA" << '\t';
621                             } 
622                         }
623                     }
624                 }
625                 out << endl;
626                         }       
627                         out.close();
628                 }
629                 
630                 //return combine file name
631                 return newFileNames;
632                 
633         }
634         catch(exception& e) {
635                 m->errorOut(e, "RareFactCommand", "createGroupFile");
636                 exit(1);
637         }
638 }
639 //**********************************************************************************************************************
640 vector<string> RareFactCommand::parseSharedFile(string filename, map<string, set<int> >& label2Ends) {
641         try {
642                 vector<string> filenames;
643                 
644                 map<string, ofstream*> filehandles;
645                 map<string, ofstream*>::iterator it3;
646                 
647                 input = new InputData(filename, "sharedfile");
648                 vector<SharedRAbundVector*> lookup = input->getSharedRAbundVectors();
649                 
650                 string sharedFileRoot = m->getRootName(filename);
651                 
652                 //clears file before we start to write to it below
653                 for (int i=0; i<lookup.size(); i++) {
654                         m->mothurRemove((sharedFileRoot + lookup[i]->getGroup() + ".rabund"));
655                         filenames.push_back((sharedFileRoot + lookup[i]->getGroup() + ".rabund"));
656                 }
657                 
658                 ofstream* temp;
659                 for (int i=0; i<lookup.size(); i++) {
660                         temp = new ofstream;
661                         filehandles[lookup[i]->getGroup()] = temp;
662                         groups.push_back(lookup[i]->getGroup());
663                 }
664
665                 while(lookup[0] != NULL) {
666                 
667                         for (int i = 0; i < lookup.size(); i++) {
668                                 RAbundVector rav = lookup[i]->getRAbundVector();
669                                 m->openOutputFileAppend(sharedFileRoot + lookup[i]->getGroup() + ".rabund", *(filehandles[lookup[i]->getGroup()]));
670                                 rav.print(*(filehandles[lookup[i]->getGroup()]));
671                                 (*(filehandles[lookup[i]->getGroup()])).close();
672                 label2Ends[lookup[i]->getLabel()].insert(rav.getNumSeqs());
673                         }
674                 
675                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
676                         lookup = input->getSharedRAbundVectors();
677                 }
678                 
679                 //free memory
680                 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
681                         delete it3->second;
682                 }
683                 
684                 delete input;
685                 m->clearGroups();
686
687                 return filenames;
688         }
689         catch(exception& e) {
690                 m->errorOut(e, "RareFactCommand", "parseSharedFile");
691                 exit(1);
692         }
693 }
694 //**********************************************************************************************************************
695
696
697