]> git.donarmstrong.com Git - mothur.git/blob - summarycommand.cpp
added shannonrange calc.
[mothur.git] / summarycommand.cpp
1 /*
2  *  summarycommand.cpp
3  *  Dotur
4  *
5  *  Created by Sarah Westcott on 1/2/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "summarycommand.h"
11 #include "ace.h"
12 #include "sobs.h"
13 #include "nseqs.h"
14 #include "chao1.h"
15 #include "bootstrap.h"
16 #include "simpson.h"
17 #include "simpsoneven.h"
18 #include "invsimpson.h"
19 #include "npshannon.h"
20 #include "shannon.h"
21 #include "heip.h"
22 #include "smithwilson.h"
23 #include "shannoneven.h"
24 #include "jackknife.h"
25 #include "geom.h"
26 #include "logsd.h"
27 #include "qstat.h"
28 #include "bergerparker.h"
29 #include "bstick.h"
30 #include "goodscoverage.h"
31 #include "coverage.h"
32 #include "efron.h"
33 #include "boneh.h"
34 #include "solow.h"
35 #include "shen.h"
36 #include "subsample.h"
37 #include "shannonrange.h"
38
39 //**********************************************************************************************************************
40 vector<string> SummaryCommand::setParameters(){ 
41         try {
42                 CommandParameter plist("list", "InputTypes", "", "", "LRSS", "LRSS", "none","summary",false,false,true); parameters.push_back(plist);
43                 CommandParameter prabund("rabund", "InputTypes", "", "", "LRSS", "LRSS", "none","summary",false,false); parameters.push_back(prabund);
44                 CommandParameter psabund("sabund", "InputTypes", "", "", "LRSS", "LRSS", "none","summary",false,false); parameters.push_back(psabund);
45                 CommandParameter pshared("shared", "InputTypes", "", "", "LRSS", "LRSS", "none","summary",false,false,true); parameters.push_back(pshared);
46         CommandParameter psubsample("subsample", "String", "", "", "", "", "","",false,false); parameters.push_back(psubsample);
47         CommandParameter piters("iters", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(piters);
48                 CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
49                 CommandParameter pcalc("calc", "Multiple", "sobs-chao-nseqs-coverage-ace-jack-shannon-shannoneven-npshannon-heip-smithwilson-simpson-simpsoneven-invsimpson-bootstrap-geometric-qstat-logseries-bergerparker-bstick-goodscoverage-efron-boneh-solow-shen", "sobs-chao-ace-jack-shannon-npshannon-simpson-shannonrange", "", "", "","",true,false,true); parameters.push_back(pcalc);
50                 CommandParameter palpha("alpha", "Multiple", "0-1-2", "1", "", "", "","",false,false,true); parameters.push_back(palpha);
51         CommandParameter pabund("abund", "Number", "", "10", "", "", "","",false,false); parameters.push_back(pabund);
52                 CommandParameter psize("size", "Number", "", "0", "", "", "","",false,false); parameters.push_back(psize);
53                 CommandParameter pgroupmode("groupmode", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pgroupmode);
54                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
55                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
56                 
57                 vector<string> myArray;
58                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
59                 return myArray;
60         }
61         catch(exception& e) {
62                 m->errorOut(e, "SummaryCommand", "setParameters");
63                 exit(1);
64         }
65 }
66 //**********************************************************************************************************************
67 string SummaryCommand::getHelpString(){ 
68         try {
69                 string helpString = "";
70                 ValidCalculators validCalculator;
71                 helpString += "The summary.single command parameters are list, sabund, rabund, shared, subsample, iters, label, calc, abund and groupmode.  list, sabund, rabund or shared is required unless you have a valid current file.\n";
72                 helpString += "The summary.single command should be in the following format: \n";
73                 helpString += "summary.single(label=yourLabel, calc=yourEstimators).\n";
74                 helpString += "Example summary.single(label=unique-.01-.03, calc=sobs-chao-ace-jack-bootstrap-shannon-npshannon-simpson).\n";
75                 helpString += validCalculator.printCalc("summary");
76         helpString += "The subsample parameter allows you to enter the size of the sample or you can set subsample=T and mothur will use the size of your smallest group in the case of a shared file. With a list, sabund or rabund file you must provide a subsample size.\n";
77         helpString += "The iters parameter allows you to choose the number of times you would like to run the subsample.\n";
78                 helpString += "The default value calc is sobs-chao-ace-jack-shannon-npshannon-simpson\n";
79                 helpString += "If you are running summary.single with a shared file and would like your summary results collated in one file, set groupmode=t. (Default=true).\n";
80         helpString += "The alpha parameter is used to set the alpha value for the shannonrange calculator.\n";
81                 helpString += "The label parameter is used to analyze specific labels in your input.\n";
82                 helpString += "Note: No spaces between parameter labels (i.e. label), '=' and parameters (i.e.yourLabels).\n";
83                 return helpString;
84         }
85         catch(exception& e) {
86                 m->errorOut(e, "SummaryCommand", "getHelpString");
87                 exit(1);
88         }
89 }
90 //**********************************************************************************************************************
91 string SummaryCommand::getOutputPattern(string type) {
92     try {
93         string pattern = "";
94         
95         if (type == "summary") {  pattern = "[filename],summary-[filename],[tag],summary"; } 
96         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
97         
98         return pattern;
99     }
100     catch(exception& e) {
101         m->errorOut(e, "SummaryCommand", "getOutputPattern");
102         exit(1);
103     }
104 }
105 //**********************************************************************************************************************
106 SummaryCommand::SummaryCommand(){       
107         try {
108                 abort = true; calledHelp = true; 
109                 setParameters();
110                 vector<string> tempOutNames;
111                 outputTypes["summary"] = tempOutNames;
112         }
113         catch(exception& e) {
114                 m->errorOut(e, "SummaryCommand", "SummaryCommand");
115                 exit(1);
116         }
117 }
118 //**********************************************************************************************************************
119
120 SummaryCommand::SummaryCommand(string option)  {
121         try {
122                 abort = false; calledHelp = false;   
123                 allLines = 1;
124                                 
125                 //allow user to run help
126                 if(option == "help") {  help();  abort = true; calledHelp = true; }
127                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
128                 
129                 else {
130                         vector<string> myArray = setParameters();
131                         
132                         OptionParser parser(option);
133                         map<string,string> parameters = parser.getParameters();
134                         map<string,string>::iterator it;
135                         
136                         ValidParameters validParameter;
137                         
138                         //check to make sure all parameters are valid for command
139                         for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
140                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
141                         }
142                         
143                         //initialize outputTypes
144                         vector<string> tempOutNames;
145                         outputTypes["summary"] = tempOutNames;
146                         
147                         //if the user changes the input directory command factory will send this info to us in the output parameter 
148                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
149                         if (inputDir == "not found"){   inputDir = "";          }
150                         else {
151                                 string path;
152                                 it = parameters.find("shared");
153                                 //user has given a template file
154                                 if(it != parameters.end()){ 
155                                         path = m->hasPath(it->second);
156                                         //if the user has not given a path then, add inputdir. else leave path alone.
157                                         if (path == "") {       parameters["shared"] = inputDir + it->second;           }
158                                 }
159                                 
160                                 it = parameters.find("rabund");
161                                 //user has given a template file
162                                 if(it != parameters.end()){ 
163                                         path = m->hasPath(it->second);
164                                         //if the user has not given a path then, add inputdir. else leave path alone.
165                                         if (path == "") {       parameters["rabund"] = inputDir + it->second;           }
166                                 }
167                                 
168                                 it = parameters.find("sabund");
169                                 //user has given a template file
170                                 if(it != parameters.end()){ 
171                                         path = m->hasPath(it->second);
172                                         //if the user has not given a path then, add inputdir. else leave path alone.
173                                         if (path == "") {       parameters["sabund"] = inputDir + it->second;           }
174                                 }
175                                 
176                                 it = parameters.find("list");
177                                 //user has given a template file
178                                 if(it != parameters.end()){ 
179                                         path = m->hasPath(it->second);
180                                         //if the user has not given a path then, add inputdir. else leave path alone.
181                                         if (path == "") {       parameters["list"] = inputDir + it->second;             }
182                                 }
183                         }
184                         
185                         //check for required parameters
186                         listfile = validParameter.validFile(parameters, "list", true);
187                         if (listfile == "not open") { listfile = ""; abort = true; }
188                         else if (listfile == "not found") { listfile = ""; }
189                         else {  format = "list"; inputfile = listfile; m->setListFile(listfile); }
190                         
191                         sabundfile = validParameter.validFile(parameters, "sabund", true);
192                         if (sabundfile == "not open") { sabundfile = ""; abort = true; }        
193                         else if (sabundfile == "not found") { sabundfile = ""; }
194                         else {  format = "sabund"; inputfile = sabundfile; m->setSabundFile(sabundfile); }
195                         
196                         rabundfile = validParameter.validFile(parameters, "rabund", true);
197                         if (rabundfile == "not open") { rabundfile = ""; abort = true; }        
198                         else if (rabundfile == "not found") { rabundfile = ""; }
199                         else {  format = "rabund"; inputfile = rabundfile; m->setRabundFile(rabundfile); }
200                         
201                         sharedfile = validParameter.validFile(parameters, "shared", true);
202                         if (sharedfile == "not open") { sharedfile = ""; abort = true; }        
203                         else if (sharedfile == "not found") { sharedfile = ""; }
204                         else {  format = "sharedfile"; inputfile = sharedfile; m->setSharedFile(sharedfile); }
205                         
206                         if ((sharedfile == "") && (listfile == "") && (rabundfile == "") && (sabundfile == "")) { 
207                                 //is there are current file available for any of these?
208                                 //give priority to shared, then list, then rabund, then sabund
209                                 //if there is a current shared file, use it
210                                 sharedfile = m->getSharedFile(); 
211                                 if (sharedfile != "") { inputfile = sharedfile; format = "sharedfile"; m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
212                                 else { 
213                                         listfile = m->getListFile(); 
214                                         if (listfile != "") { inputfile = listfile; format = "list"; m->mothurOut("Using " + listfile + " as input file for the list parameter."); m->mothurOutEndLine(); }
215                                         else { 
216                                                 rabundfile = m->getRabundFile(); 
217                                                 if (rabundfile != "") { inputfile = rabundfile; format = "rabund"; m->mothurOut("Using " + rabundfile + " as input file for the rabund parameter."); m->mothurOutEndLine(); }
218                                                 else { 
219                                                         sabundfile = m->getSabundFile(); 
220                                                         if (sabundfile != "") { inputfile = sabundfile; format = "sabund"; m->mothurOut("Using " + sabundfile + " as input file for the sabund parameter."); m->mothurOutEndLine(); }
221                                                         else { 
222                                                                 m->mothurOut("No valid current files. You must provide a list, sabund, rabund or shared file before you can use the collect.single command."); m->mothurOutEndLine(); 
223                                                                 abort = true;
224                                                         }
225                                                 }
226                                         }
227                                 }
228                         }
229                         
230                         //if the user changes the output directory command factory will send this info to us in the output parameter 
231                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(inputfile);              }
232
233                         //check for optional parameter and set defaults
234                         // ...at some point should added some additional type checking...
235                         label = validParameter.validFile(parameters, "label", false);                   
236                         if (label == "not found") { label = ""; }
237                         else { 
238                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
239                                 else { allLines = 1;  }
240                         }
241                                 
242                         calc = validParameter.validFile(parameters, "calc", false);                     
243                         if (calc == "not found") { calc = "sobs-chao-ace-jack-shannon-npshannon-simpson";  }
244                         else { 
245                                  if (calc == "default")  {  calc = "sobs-chao-ace-jack-shannon-npshannon-simpson";  }
246                         }
247                         m->splitAtDash(calc, Estimators);
248                         if (m->inUsersGroups("citation", Estimators)) { 
249                                 ValidCalculators validCalc; validCalc.printCitations(Estimators); 
250                                 //remove citation from list of calcs
251                                 for (int i = 0; i < Estimators.size(); i++) { if (Estimators[i] == "citation") {  Estimators.erase(Estimators.begin()+i); break; } }
252                         }
253
254                         string temp;
255                         temp = validParameter.validFile(parameters, "abund", false);            if (temp == "not found") { temp = "10"; }
256                         m->mothurConvert(temp, abund); 
257                         
258                         temp = validParameter.validFile(parameters, "size", false);                     if (temp == "not found") { temp = "0"; }
259                         m->mothurConvert(temp, size); 
260                         
261                         temp = validParameter.validFile(parameters, "groupmode", false);                if (temp == "not found") { temp = "T"; }
262                         groupMode = m->isTrue(temp);
263                         
264             temp = validParameter.validFile(parameters, "iters", false);                        if (temp == "not found") { temp = "1000"; }
265                         m->mothurConvert(temp, iters);
266             
267             temp = validParameter.validFile(parameters, "subsample", false);            if (temp == "not found") { temp = "F"; }
268                         if (m->isNumeric1(temp)) { m->mothurConvert(temp, subsampleSize); subsample = true; }
269             else {  
270                 if (m->isTrue(temp)) { subsample = true; subsampleSize = -1; }  //we will set it to smallest group later 
271                 else { subsample = false; subsampleSize = -1; }
272             }
273             
274             temp = validParameter.validFile(parameters, "alpha", false);                if (temp == "not found") { temp = "1"; }
275                         m->mothurConvert(temp, alpha);
276             
277             if ((alpha != 0) && (alpha != 1) && (alpha != 2)) { m->mothurOut("[ERROR]: Not a valid alpha value. Valid values are 0, 1 and 2."); m->mothurOutEndLine(); abort=true; }
278             
279             if (subsample == false) { iters = 0; }
280             else {
281                 //if you did not set a samplesize and are not using a sharedfile
282                 if ((subsampleSize == -1) && (format != "sharedfile"))  { m->mothurOut("[ERROR]: If you want to subsample with a list, rabund or sabund file, you must provide the sample size.  You can do this by setting subsample=yourSampleSize.\n");  abort=true; }
283             }
284
285                 }
286         }
287         catch(exception& e) {
288                 m->errorOut(e, "SummaryCommand", "SummaryCommand");
289                 exit(1);
290         }
291 }
292 //**********************************************************************************************************************
293
294 int SummaryCommand::execute(){
295         try {
296         
297                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
298                 
299                 if ((format != "sharedfile")) { inputFileNames.push_back(inputfile);  }
300                 else {  inputFileNames = parseSharedFile(sharedfile);  format = "rabund"; }
301                 
302                 if (m->control_pressed) { return 0; }
303                 
304                 int numLines = 0;
305                 int numCols = 0;
306                 map<string, string> groupIndex;
307         
308                 for (int p = 0; p < inputFileNames.size(); p++) {
309                         
310                         numLines = 0;
311                         numCols = 0;
312                         
313             map<string, string> variables; 
314             variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(inputFileNames[p]));
315                         string fileNameRoot = getOutputFileName("summary",variables);
316             variables["[tag]"] = "ave-std";
317             string fileNameAve = getOutputFileName("summary",variables);
318             outputNames.push_back(fileNameRoot); outputTypes["summary"].push_back(fileNameRoot);
319             
320                         if (inputFileNames.size() > 1) {
321                                 m->mothurOutEndLine(); m->mothurOut("Processing group " + groups[p]); m->mothurOutEndLine(); m->mothurOutEndLine();
322                 groupIndex[fileNameRoot] = groups[p];
323                         }
324                         
325                         sumCalculators.clear();
326                         
327                         ValidCalculators validCalculator;
328                         
329                         for (int i=0; i<Estimators.size(); i++) {
330                                 if (validCalculator.isValidCalculator("summary", Estimators[i]) == true) { 
331                                         if(Estimators[i] == "sobs"){
332                                                 sumCalculators.push_back(new Sobs());
333                                         }else if(Estimators[i] == "chao"){
334                                                 sumCalculators.push_back(new Chao1());
335                                         }else if(Estimators[i] == "coverage"){
336                                                 sumCalculators.push_back(new Coverage());
337                                         }else if(Estimators[i] == "geometric"){
338                                                 sumCalculators.push_back(new Geom());
339                                         }else if(Estimators[i] == "logseries"){
340                                                 sumCalculators.push_back(new LogSD());
341                                         }else if(Estimators[i] == "qstat"){
342                                                 sumCalculators.push_back(new QStat());
343                                         }else if(Estimators[i] == "bergerparker"){
344                                                 sumCalculators.push_back(new BergerParker());
345                                         }else if(Estimators[i] == "bstick"){
346                                                 sumCalculators.push_back(new BStick());
347                                         }else if(Estimators[i] == "ace"){
348                                                 if(abund < 5)
349                                                         abund = 10;
350                                                 sumCalculators.push_back(new Ace(abund));
351                                         }else if(Estimators[i] == "jack"){
352                                                 sumCalculators.push_back(new Jackknife());
353                                         }else if(Estimators[i] == "shannon"){
354                                                 sumCalculators.push_back(new Shannon());
355                                         }else if(Estimators[i] == "shannoneven"){
356                                                 sumCalculators.push_back(new ShannonEven());
357                     }else if(Estimators[i] == "shannonrange"){
358                                                 sumCalculators.push_back(new RangeShannon(alpha));
359                                         }else if(Estimators[i] == "npshannon"){
360                                                 sumCalculators.push_back(new NPShannon());
361                                         }else if(Estimators[i] == "heip"){
362                                                 sumCalculators.push_back(new Heip());
363                                         }else if(Estimators[i] == "smithwilson"){
364                                                 sumCalculators.push_back(new SmithWilson());
365                                         }else if(Estimators[i] == "simpson"){
366                                                 sumCalculators.push_back(new Simpson());
367                                         }else if(Estimators[i] == "simpsoneven"){
368                                                 sumCalculators.push_back(new SimpsonEven());
369                                         }else if(Estimators[i] == "invsimpson"){
370                                                 sumCalculators.push_back(new InvSimpson());
371                                         }else if(Estimators[i] == "bootstrap"){
372                                                 sumCalculators.push_back(new Bootstrap());
373                                         }else if (Estimators[i] == "nseqs") { 
374                                                 sumCalculators.push_back(new NSeqs());
375                                         }else if (Estimators[i] == "goodscoverage") { 
376                                                 sumCalculators.push_back(new GoodsCoverage());
377                                         }else if (Estimators[i] == "efron") { 
378                                                 sumCalculators.push_back(new Efron(size));
379                                         }else if (Estimators[i] == "boneh") { 
380                                                 sumCalculators.push_back(new Boneh(size));
381                                         }else if (Estimators[i] == "solow") { 
382                                                 sumCalculators.push_back(new Solow(size));
383                                         }else if (Estimators[i] == "shen") { 
384                                                 sumCalculators.push_back(new Shen(size, abund));
385                                         }
386                                 }
387                         }
388                         
389                         //if the users entered no valid calculators don't execute command
390                         if (sumCalculators.size() == 0) {  for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]);  } return 0; }
391                         
392                         ofstream outputFileHandle;
393                         m->openOutputFile(fileNameRoot, outputFileHandle);
394                         outputFileHandle << "label";
395             
396             ofstream outAve;
397             if (subsample) {
398                 m->openOutputFile(fileNameAve, outAve);
399                 outputNames.push_back(fileNameAve); outputTypes["summary"].push_back(fileNameAve);
400                 outAve << "label\tmethod"; 
401                 outAve.setf(ios::fixed, ios::floatfield); outAve.setf(ios::showpoint);
402                 if (inputFileNames.size() > 1) {
403                     groupIndex[fileNameAve] = groups[p];
404                 }
405             }
406                 
407                         input = new InputData(inputFileNames[p], format);
408                         sabund = input->getSAbundVector();
409                         string lastLabel = sabund->getLabel();
410                 
411                         for(int i=0;i<sumCalculators.size();i++){
412                                 if(sumCalculators[i]->getCols() == 1){
413                                         outputFileHandle << '\t' << sumCalculators[i]->getName();
414                     if (subsample) { outAve << '\t' << sumCalculators[i]->getName();  }
415                                         numCols++;
416                                 }
417                                 else{
418                                         outputFileHandle << '\t' << sumCalculators[i]->getName() << "\t" << sumCalculators[i]->getName() << "_lci\t" << sumCalculators[i]->getName() << "_hci";
419                     if (subsample) { outAve << '\t' << sumCalculators[i]->getName() << "\t" << sumCalculators[i]->getName() << "_lci\t" << sumCalculators[i]->getName() << "_hci";  }
420                                         numCols += 3;
421                                 }
422                         }
423                         outputFileHandle << endl;
424             if (subsample) {  outAve << endl; }
425                         
426                         //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
427                         set<string> processedLabels;
428                         set<string> userLabels = labels;
429                         
430             
431             
432                         if (m->control_pressed) {  outputFileHandle.close(); outAve.close(); for (int i = 0; i < outputNames.size(); i++) {     m->mothurRemove(outputNames[i]);  } for(int i=0;i<sumCalculators.size();i++){  delete sumCalculators[i]; }  delete sabund;  delete input;  return 0;  }
433                         
434                         while((sabund != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
435                                 
436                                 if (m->control_pressed) { outputFileHandle.close(); outAve.close();  for (int i = 0; i < outputNames.size(); i++) {     m->mothurRemove(outputNames[i]);  } for(int i=0;i<sumCalculators.size();i++){  delete sumCalculators[i]; }  delete sabund;  delete input;  return 0;  }
437                                 
438                                 if(allLines == 1 || labels.count(sabund->getLabel()) == 1){                     
439                                         
440                                         m->mothurOut(sabund->getLabel()); m->mothurOutEndLine();
441                                         processedLabels.insert(sabund->getLabel());
442                                         userLabels.erase(sabund->getLabel());
443                                         
444                     process(sabund, outputFileHandle, outAve);
445                     
446                     if (m->control_pressed) { outputFileHandle.close(); outAve.close();  for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]);  } for(int i=0;i<sumCalculators.size();i++){  delete sumCalculators[i]; }  delete sabund;  delete input;  return 0;  }
447                                         numLines++;
448                                 }
449                                 
450                                 if ((m->anyLabelsToProcess(sabund->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
451                                         string saveLabel = sabund->getLabel();
452                                         
453                                         delete sabund;
454                                         sabund = input->getSAbundVector(lastLabel);
455                                         
456                                         m->mothurOut(sabund->getLabel()); m->mothurOutEndLine();
457                                         processedLabels.insert(sabund->getLabel());
458                                         userLabels.erase(sabund->getLabel());
459                                         
460                     process(sabund, outputFileHandle, outAve);
461                     
462                     if (m->control_pressed) { outputFileHandle.close(); outAve.close(); for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]);  } for(int i=0;i<sumCalculators.size();i++){  delete sumCalculators[i]; }  delete sabund;  delete input;  return 0;  }
463                                         numLines++;
464                                         
465                                         //restore real lastlabel to save below
466                                         sabund->setLabel(saveLabel);
467                                 }               
468                                 
469                                 lastLabel = sabund->getLabel();                 
470                                 
471                                 delete sabund;
472                                 sabund = input->getSAbundVector();
473                         }
474                         
475                         if (m->control_pressed) {  outputFileHandle.close(); outAve.close();  for (int i = 0; i < outputNames.size(); i++) {    m->mothurRemove(outputNames[i]);  } for(int i=0;i<sumCalculators.size();i++){  delete sumCalculators[i]; }   delete input;  return 0;  }
476
477                         //output error messages about any remaining user labels
478                         set<string>::iterator it;
479                         bool needToRun = false;
480                         for (it = userLabels.begin(); it != userLabels.end(); it++) {  
481                                 m->mothurOut("Your file does not include the label " + *it); 
482                                 if (processedLabels.count(lastLabel) != 1) {
483                                         m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
484                                         needToRun = true;
485                                 }else {
486                                         m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
487                                 }
488                         }
489                         
490                         //run last label if you need to
491                         if (needToRun == true)  {
492                                 if (sabund != NULL) {   delete sabund;  }
493                                 sabund = input->getSAbundVector(lastLabel);
494                                 
495                                 m->mothurOut(sabund->getLabel()); m->mothurOutEndLine();
496                 process(sabund, outputFileHandle, outAve);
497                 
498                 if (m->control_pressed) { outputFileHandle.close(); outAve.close(); for (int i = 0; i < outputNames.size(); i++) {      m->mothurRemove(outputNames[i]);  } for(int i=0;i<sumCalculators.size();i++){  delete sumCalculators[i]; }  delete sabund;  delete input;  return 0;  }
499                                 numLines++;
500                                 delete sabund;
501                         }
502                         
503                         outputFileHandle.close();
504             if (subsample) { outAve.close(); }
505                         
506                         if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]);  } for(int i=0;i<sumCalculators.size();i++){  delete sumCalculators[i]; }   delete input;  return 0;  }
507
508                         
509                         delete input;  
510                         for(int i=0;i<sumCalculators.size();i++){  delete sumCalculators[i]; }
511                 }
512                 
513                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);  }  return 0;  }
514                 
515                 //create summary file containing all the groups data for each label - this function just combines the info from the files already created.
516                 if ((sharedfile != "") && (groupMode)) {   vector<string> comboNames = createGroupSummaryFile(numLines, numCols, outputNames, groupIndex);  for (int i = 0; i < comboNames.size(); i++) { outputNames.push_back(comboNames[i]); } }
517                 
518                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);  }  return 0;  }
519                 
520                 m->mothurOutEndLine();
521                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
522                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
523                 m->mothurOutEndLine();
524                 
525                 return 0;
526         }
527         catch(exception& e) {
528                 m->errorOut(e, "SummaryCommand", "execute");
529                 exit(1);
530         }
531 }
532 //**********************************************************************************************************************
533 int SummaryCommand::process(SAbundVector*& sabund, ofstream& outputFileHandle, ofstream& outAve) {
534     try {
535         
536         //calculator -> data -> values
537         vector< vector< vector<double> > >  results; results.resize(sumCalculators.size());
538         
539         outputFileHandle << sabund->getLabel();
540         
541         SubSample sample;
542         for (int thisIter = 0; thisIter < iters+1; thisIter++) {
543             
544             SAbundVector* thisIterSabund = sabund;
545             
546             //we want the summary results for the whole dataset, then the subsampling
547             if ((thisIter > 0) && subsample) { //subsample sabund and run it
548                 //copy sabund since getSample destroys it
549                 RAbundVector rabund = sabund->getRAbundVector();
550                 SAbundVector* newSabund = new SAbundVector();
551                 *newSabund = rabund.getSAbundVector();
552                 
553                 sample.getSample(newSabund, subsampleSize);
554                 thisIterSabund = newSabund;
555             }
556             
557             for(int i=0;i<sumCalculators.size();i++){
558                 vector<double> data = sumCalculators[i]->getValues(thisIterSabund);
559                
560                 if (m->control_pressed) {  return 0;  }
561                 
562                 if (thisIter == 0) {
563                     outputFileHandle << '\t';
564                     sumCalculators[i]->print(outputFileHandle);
565                 }else {
566                     //some of the calc have hci and lci need to make room for that
567                     if (results[i].size() == 0) {  results[i].resize(data.size());  }
568                     //save results for ave and std.
569                     for (int j = 0; j < data.size(); j++) {
570                         if (m->control_pressed) {  return 0;  }
571                         results[i][j].push_back(data[j]); 
572                     }
573                 }
574             }
575             
576             //cleanup memory
577             if ((thisIter > 0) && subsample) { delete thisIterSabund; }
578         }
579         outputFileHandle << endl;
580      
581         if (subsample) {
582             outAve << sabund->getLabel() << '\t' << "ave\t"; 
583             //find ave and std for this label and output
584             //will need to modify the createGroupSummary to combine results and not mess with the .summary file.
585             
586             //calcs -> values
587             vector< vector<double> >  calcAverages; calcAverages.resize(sumCalculators.size()); 
588             for (int i = 0; i < calcAverages.size(); i++) {  calcAverages[i].resize(results[i].size(), 0);  }
589             
590             for (int thisIter = 0; thisIter < iters; thisIter++) { //sum all groups dists for each calculator
591                 for (int i = 0; i < calcAverages.size(); i++) {  //initialize sums to zero.
592                     for (int j = 0; j < calcAverages[i].size(); j++) {
593                         calcAverages[i][j] += results[i][j][thisIter];
594                     }
595                 }
596             }
597             
598             for (int i = 0; i < calcAverages.size(); i++) {  //finds average.
599                 for (int j = 0; j < calcAverages[i].size(); j++) {
600                     calcAverages[i][j] /= (float) iters;
601                     outAve << calcAverages[i][j] << '\t';
602                 }
603             }
604             
605             //find standard deviation
606             vector< vector<double>  > stdDev; stdDev.resize(sumCalculators.size());
607             for (int i = 0; i < stdDev.size(); i++) {  stdDev[i].resize(results[i].size(), 0);  }
608             
609             for (int thisIter = 0; thisIter < iters; thisIter++) { //compute the difference of each dist from the mean, and square the result of each
610                 for (int i = 0; i < stdDev.size(); i++) {  
611                     for (int j = 0; j < stdDev[i].size(); j++) {
612                         stdDev[i][j] += ((results[i][j][thisIter] - calcAverages[i][j]) * (results[i][j][thisIter] - calcAverages[i][j]));
613                     }
614                 }
615             }
616             
617             outAve << endl << sabund->getLabel() << '\t' << "std\t"; 
618             for (int i = 0; i < stdDev.size(); i++) {  //finds average.
619                 for (int j = 0; j < stdDev[i].size(); j++) {
620                     stdDev[i][j] /= (float) iters;
621                     stdDev[i][j] = sqrt(stdDev[i][j]);
622                     outAve << stdDev[i][j] << '\t';
623                 }
624             }
625             outAve << endl;  
626         }
627         
628         return 0;
629     }
630     catch(exception& e) {
631         m->errorOut(e, "SummaryCommand", "process");
632         exit(1);
633     }
634 }
635 //**********************************************************************************************************************
636 vector<string> SummaryCommand::parseSharedFile(string filename) {
637         try {
638                 vector<string> filenames;
639                 
640                 map<string, ofstream*> filehandles;
641                 map<string, ofstream*>::iterator it3;
642                 
643                 input = new InputData(filename, "sharedfile");
644                 vector<SharedRAbundVector*> lookup = input->getSharedRAbundVectors();
645                 
646                 string sharedFileRoot = m->getRootName(filename);
647                 
648         /******************************************************/
649         if (subsample) { 
650             if (subsampleSize == -1) { //user has not set size, set size = smallest samples size
651                 subsampleSize = lookup[0]->getNumSeqs();
652                 for (int i = 1; i < lookup.size(); i++) {
653                     int thisSize = lookup[i]->getNumSeqs();
654                     
655                     if (thisSize < subsampleSize) {     subsampleSize = thisSize;       }
656                 }
657             }else {
658                 m->clearGroups();
659                 vector<string> Groups;
660                 vector<SharedRAbundVector*> temp;
661                 for (int i = 0; i < lookup.size(); i++) {
662                     if (lookup[i]->getNumSeqs() < subsampleSize) { 
663                         m->mothurOut(lookup[i]->getGroup() + " contains " + toString(lookup[i]->getNumSeqs()) + ". Eliminating."); m->mothurOutEndLine();
664                         delete lookup[i];
665                     }else { 
666                         Groups.push_back(lookup[i]->getGroup()); 
667                         temp.push_back(lookup[i]);
668                     }
669                 } 
670                 lookup = temp;
671                 m->setGroups(Groups);
672             }
673             
674             if (lookup.size() < 1) { m->mothurOut("You have not provided enough valid groups.  I cannot run the command."); m->mothurOutEndLine(); m->control_pressed = true; delete input; return filenames; }
675         }
676         
677                 
678                 /******************************************************/
679         
680         //clears file before we start to write to it below
681                 for (int i=0; i<lookup.size(); i++) {
682                         m->mothurRemove((sharedFileRoot + lookup[i]->getGroup() + ".rabund"));
683                         filenames.push_back((sharedFileRoot + lookup[i]->getGroup() + ".rabund"));
684                 }
685         
686                 ofstream* temp;
687                 for (int i=0; i<lookup.size(); i++) {
688                         temp = new ofstream;
689                         filehandles[lookup[i]->getGroup()] = temp;
690                         groups.push_back(lookup[i]->getGroup());
691                 }
692
693                 while(lookup[0] != NULL) {
694                 
695                         for (int i = 0; i < lookup.size(); i++) {
696                                 RAbundVector rav = lookup[i]->getRAbundVector();
697                                 m->openOutputFileAppend(sharedFileRoot + lookup[i]->getGroup() + ".rabund", *(filehandles[lookup[i]->getGroup()]));
698                                 rav.print(*(filehandles[lookup[i]->getGroup()]));
699                                 (*(filehandles[lookup[i]->getGroup()])).close();
700                         }
701                 
702                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
703                         lookup = input->getSharedRAbundVectors();
704                 }
705                 
706                 //free memory
707                 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
708                         delete it3->second;
709                 }
710                 
711                 delete input;
712
713                 return filenames;
714         }
715         catch(exception& e) {
716                 m->errorOut(e, "SummaryCommand", "parseSharedFile");
717                 exit(1);
718         }
719 }
720 //**********************************************************************************************************************
721 vector<string> SummaryCommand::createGroupSummaryFile(int numLines, int numCols, vector<string>& outputNames, map<string, string> groupIndex) {
722         try {
723                                 
724                 //open each groups summary file
725         vector<string> newComboNames;
726                 
727                 map<string, map<string, vector<string> > > files;
728         map<string, string> filesTypesLabels;
729         map<string, int> filesTypesNumLines;
730                 for (int i=0; i<outputNames.size(); i++) {
731                         vector<string> thisFilesLines;
732             
733                         ifstream temp;
734                         m->openInputFile(outputNames[i], temp);
735                         
736                         //read through first line - labels
737             string labelsLine = m->getline(temp);
738             vector<string> theseLabels = m->splitWhiteSpace(labelsLine);
739             
740             string newLabel = "";
741             for (int j = 0; j < theseLabels.size(); j++) { 
742                  if (j == 1) {  newLabel += "group\t" + theseLabels[j] + '\t';
743                 }else{  newLabel += theseLabels[j] + '\t';      }
744             }
745                         
746                         m->gobble(temp);
747                         
748             int stop = numLines;
749             if (theseLabels.size() != numCols+1) {  stop = numLines*2; }
750                         //for each label
751                         for (int k = 0; k < stop; k++) {
752                                 
753                                 string thisLine = "";
754                                 string tempLabel;
755                                         
756                                 for (int j = 0; j < theseLabels.size(); j++) {  
757                                         temp >> tempLabel; 
758                                                 
759                                         //save for later
760                                         if (j == 1) { thisLine += groupIndex[outputNames[i]] + "\t" + tempLabel + "\t"; }
761                                         else{  thisLine += tempLabel + "\t";    }
762                                 }
763                                         
764                                 thisLine += "\n";
765                                 
766                                 thisFilesLines.push_back(thisLine);
767                                         
768                                 m->gobble(temp);
769                         }
770             
771             string extension = m->getExtension(outputNames[i]);
772             if (theseLabels.size() != numCols+1) { extension = ".ave-std" + extension;  }
773             string combineFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + "groups" + extension;
774                         m->mothurRemove(combineFileName); //remove old file
775             filesTypesLabels[extension] = newLabel;
776             filesTypesNumLines[extension] = stop;
777             
778             map<string, map<string, vector<string> > >::iterator itFiles = files.find(extension);
779             if (itFiles != files.end()) { //add new files info to existing type
780                 files[extension][outputNames[i]] = thisFilesLines;
781             }else {
782                 map<string, vector<string> > thisFile;
783                 thisFile[outputNames[i]] = thisFilesLines;
784                 files[extension] = thisFile;
785             }
786                         
787                         temp.close();
788                         m->mothurRemove(outputNames[i]);
789                 }
790                 
791         
792         for (map<string, map<string, vector<string> > >::iterator itFiles = files.begin(); itFiles != files.end(); itFiles++) {
793             
794             if (m->control_pressed) { break; }
795             
796             string extension = itFiles->first;
797             map<string, vector<string> > thisType = itFiles->second;
798             string combineFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + "groups" + extension;
799             newComboNames.push_back(combineFileName);
800             //open combined file
801             ofstream out;
802             m->openOutputFile(combineFileName, out);
803             
804             //output label line to new file
805             out <<  filesTypesLabels[extension] << endl;
806                 
807             //for each label
808             for (int k = 0; k < filesTypesNumLines[extension]; k++) {
809                 
810                 //grab summary data for each group
811                 for (map<string, vector<string> >::iterator itType = thisType.begin(); itType != thisType.end(); itType++) {
812                     out << (itType->second)[k];
813                 }
814             }   
815                 
816             outputNames.clear();
817                 
818             out.close();
819         }
820                 
821                 //return combine file name
822                 return newComboNames;
823                 
824         }
825         catch(exception& e) {
826                 m->errorOut(e, "SummaryCommand", "createGroupSummaryFile");
827                 exit(1);
828         }
829 }
830 //**********************************************************************************************************************