]> git.donarmstrong.com Git - mothur.git/blob - summarycommand.cpp
95f42626c9b324a0622c1ff99806384184d42714
[mothur.git] / summarycommand.cpp
1 /*
2  *  summarycommand.cpp
3  *  Dotur
4  *
5  *  Created by Sarah Westcott on 1/2/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "summarycommand.h"
11 #include "ace.h"
12 #include "sobs.h"
13 #include "nseqs.h"
14 #include "chao1.h"
15 #include "bootstrap.h"
16 #include "simpson.h"
17 #include "npshannon.h"
18 #include "shannon.h"
19 #include "jackknife.h"
20 #include "geom.h"
21 #include "logsd.h"
22 #include "qstat.h"
23 #include "bergerparker.h"
24 #include "bstick.h"
25 #include "goodscoverage.h"
26 #include "coverage.h"
27 #include "efron.h"
28 #include "boneh.h"
29 #include "solow.h"
30 #include "shen.h"
31
32 //**********************************************************************************************************************
33
34 SummaryCommand::SummaryCommand(string option)  {
35         try {
36                 globaldata = GlobalData::getInstance();
37                 abort = false;
38                 allLines = 1;
39                 labels.clear();
40                 Estimators.clear();
41                 
42                 //allow user to run help
43                 if(option == "help") { validCalculator = new ValidCalculators(); help(); delete validCalculator; abort = true; }
44                 
45                 else {
46                         //valid paramters for this command
47                         string Array[] =  {"label","calc","abund","size","outputdir","groupmode","inputdir"};
48                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
49                         
50                         OptionParser parser(option);
51                         map<string,string> parameters = parser.getParameters();
52                         
53                         ValidParameters validParameter;
54                         
55                         //check to make sure all parameters are valid for command
56                         for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
57                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
58                         }
59                         
60                         //make sure the user has already run the read.otu command
61                         if ((globaldata->getSharedFile() == "") && (globaldata->getListFile() == "") && (globaldata->getRabundFile() == "") && (globaldata->getSabundFile() == "")) { m->mothurOut("You must read a list, sabund, rabund or shared file before you can use the summary.single command."); m->mothurOutEndLine(); abort = true; }
62                         
63                         //if the user changes the output directory command factory will send this info to us in the output parameter 
64                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
65                                 outputDir = ""; 
66                                 outputDir += hasPath(globaldata->inputFileName); //if user entered a file with a path then preserve it  
67                         }
68
69                         //check for optional parameter and set defaults
70                         // ...at some point should added some additional type checking...
71                         label = validParameter.validFile(parameters, "label", false);                   
72                         if (label == "not found") { label = ""; }
73                         else { 
74                                 if(label != "all") {  splitAtDash(label, labels);  allLines = 0;  }
75                                 else { allLines = 1;  }
76                         }
77                         
78                         //if the user has not specified any labels use the ones from read.otu
79                         if(label == "") {  
80                                 allLines = globaldata->allLines; 
81                                 labels = globaldata->labels; 
82                         }
83                                 
84                         calc = validParameter.validFile(parameters, "calc", false);                     
85                         if (calc == "not found") { calc = "sobs-chao-ace-jack-shannon-npshannon-simpson";  }
86                         else { 
87                                  if (calc == "default")  {  calc = "sobs-chao-ace-jack-shannon-npshannon-simpson";  }
88                         }
89                         splitAtDash(calc, Estimators);
90
91                         string temp;
92                         temp = validParameter.validFile(parameters, "abund", false);            if (temp == "not found") { temp = "10"; }
93                         convert(temp, abund); 
94                         
95                         temp = validParameter.validFile(parameters, "size", false);                     if (temp == "not found") { temp = "0"; }
96                         convert(temp, size); 
97                         
98                         temp = validParameter.validFile(parameters, "groupmode", false);                if (temp == "not found") { temp = "F"; }
99                         groupMode = isTrue(temp);
100                         
101         
102                 }
103         }
104         catch(exception& e) {
105                 m->errorOut(e, "SummaryCommand", "SummaryCommand");
106                 exit(1);
107         }
108 }
109 //**********************************************************************************************************************
110
111 void SummaryCommand::help(){
112         try {
113                 m->mothurOut("The summary.single command can only be executed after a successful read.otu WTIH ONE EXECEPTION.\n");
114                 m->mothurOut("The summary.single command can be executed after a successful cluster command.  It will use the .list file from the output of the cluster.\n");
115                 m->mothurOut("The summary.single command parameters are label, calc, abund and groupmode.  No parameters are required.\n");
116                 m->mothurOut("The summary.single command should be in the following format: \n");
117                 m->mothurOut("summary.single(label=yourLabel, calc=yourEstimators).\n");
118                 m->mothurOut("Example summary.single(label=unique-.01-.03, calc=sobs-chao-ace-jack-bootstrap-shannon-npshannon-simpson).\n");
119                 validCalculator->printCalc("summary", cout);
120                 m->mothurOut("The default value calc is sobs-chao-ace-jack-shannon-npshannon-simpson\n");
121                 m->mothurOut("If you are running summary.single with a shared file and would like your summary results collated in one file, set groupmode=t. (Default=False).\n");
122                 m->mothurOut("The label parameter is used to analyze specific labels in your input.\n");
123                 m->mothurOut("Note: No spaces between parameter labels (i.e. label), '=' and parameters (i.e.yourLabels).\n\n");
124         }
125         catch(exception& e) {
126                 m->errorOut(e, "SummaryCommand", "help");
127                 exit(1);
128         }
129 }
130
131 //**********************************************************************************************************************
132
133 SummaryCommand::~SummaryCommand(){}
134
135 //**********************************************************************************************************************
136
137 int SummaryCommand::execute(){
138         try {
139         
140                 if (abort == true) { return 0; }
141                 
142                 vector<string> outputNames;
143                 
144                 string hadShared = "";
145                 if ((globaldata->getFormat() != "sharedfile")) { inputFileNames.push_back(globaldata->inputFileName);  }
146                 else { hadShared = globaldata->getSharedFile(); inputFileNames = parseSharedFile(globaldata->getSharedFile());  globaldata->setFormat("rabund");  }
147                 
148                 if (m->control_pressed) { if (hadShared != "") {  globaldata->setSharedFile(hadShared); globaldata->setFormat("sharedfile");  } return 0; }
149                 
150                 int numLines = 0;
151                 int numCols = 0;
152                 
153                 for (int p = 0; p < inputFileNames.size(); p++) {
154                         
155                         numLines = 0;
156                         numCols = 0;
157                         
158                         string fileNameRoot = outputDir + getRootName(getSimpleName(inputFileNames[p])) + "summary";
159                         globaldata->inputFileName = inputFileNames[p];
160                         outputNames.push_back(fileNameRoot);
161                         
162                         if (inputFileNames.size() > 1) {
163                                 m->mothurOutEndLine(); m->mothurOut("Processing group " + groups[p]); m->mothurOutEndLine(); m->mothurOutEndLine();
164                         }
165                         
166                         sumCalculators.clear();
167                         
168                         validCalculator = new ValidCalculators();
169                         
170                         for (int i=0; i<Estimators.size(); i++) {
171                                 if (validCalculator->isValidCalculator("summary", Estimators[i]) == true) { 
172                                         if(Estimators[i] == "sobs"){
173                                                 sumCalculators.push_back(new Sobs());
174                                         }else if(Estimators[i] == "chao"){
175                                                 sumCalculators.push_back(new Chao1());
176                                         }else if(Estimators[i] == "coverage"){
177                                                 sumCalculators.push_back(new Coverage());
178                                         }else if(Estimators[i] == "geometric"){
179                                                 sumCalculators.push_back(new Geom());
180                                         }else if(Estimators[i] == "logseries"){
181                                                 sumCalculators.push_back(new LogSD());
182                                         }else if(Estimators[i] == "qstat"){
183                                                 sumCalculators.push_back(new QStat());
184                                         }else if(Estimators[i] == "bergerparker"){
185                                                 sumCalculators.push_back(new BergerParker());
186                                         }else if(Estimators[i] == "bstick"){
187                                                 sumCalculators.push_back(new BStick());
188                                         }else if(Estimators[i] == "ace"){
189                                                 if(abund < 5)
190                                                         abund = 10;
191                                                 sumCalculators.push_back(new Ace(abund));
192                                         }else if(Estimators[i] == "jack"){
193                                                 sumCalculators.push_back(new Jackknife());
194                                         }else if(Estimators[i] == "shannon"){
195                                                 sumCalculators.push_back(new Shannon());
196                                         }else if(Estimators[i] == "npshannon"){
197                                                 sumCalculators.push_back(new NPShannon());
198                                         }else if(Estimators[i] == "simpson"){
199                                                 sumCalculators.push_back(new Simpson());
200                                         }else if(Estimators[i] == "bootstrap"){
201                                                 sumCalculators.push_back(new Bootstrap());
202                                         }else if (Estimators[i] == "nseqs") { 
203                                                 sumCalculators.push_back(new NSeqs());
204                                         }else if (Estimators[i] == "goodscoverage") { 
205                                                 sumCalculators.push_back(new GoodsCoverage());
206                                         }else if (Estimators[i] == "efron") { 
207                                                 sumCalculators.push_back(new Efron(size));
208                                         }else if (Estimators[i] == "boneh") { 
209                                                 sumCalculators.push_back(new Boneh(size));
210                                         }else if (Estimators[i] == "solow") { 
211                                                 sumCalculators.push_back(new Solow(size));
212                                         }else if (Estimators[i] == "shen") { 
213                                                 sumCalculators.push_back(new Shen(size, abund));
214                                         }
215                                 }
216                         }
217                         
218                         //if the users entered no valid calculators don't execute command
219                         if (sumCalculators.size() == 0) { if (hadShared != "") {  globaldata->setSharedFile(hadShared); globaldata->setFormat("sharedfile");  } return 0; }
220                         
221                         ofstream outputFileHandle;
222                         openOutputFile(fileNameRoot, outputFileHandle);
223                         outputFileHandle << "label";
224                         
225                         read = new ReadOTUFile(globaldata->inputFileName);      
226                         read->read(&*globaldata); 
227                         
228                         sabund = globaldata->sabund;
229                         string lastLabel = sabund->getLabel();
230                         input = globaldata->ginput;
231                         
232                         for(int i=0;i<sumCalculators.size();i++){
233                                 if(sumCalculators[i]->getCols() == 1){
234                                         outputFileHandle << '\t' << sumCalculators[i]->getName();
235                                         numCols++;
236                                 }
237                                 else{
238                                         outputFileHandle << '\t' << sumCalculators[i]->getName() << "\t" << sumCalculators[i]->getName() << "_lci\t" << sumCalculators[i]->getName() << "_hci";
239                                         numCols += 3;
240                                 }
241                         }
242                         outputFileHandle << endl;
243                         
244                         //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
245                         set<string> processedLabels;
246                         set<string> userLabels = labels;
247                         
248                         if (m->control_pressed) { if (hadShared != "") {  globaldata->setSharedFile(hadShared); globaldata->setFormat("sharedfile");  }  outputFileHandle.close(); for (int i = 0; i < outputNames.size(); i++) {       remove(outputNames[i].c_str());  } for(int i=0;i<sumCalculators.size();i++){  delete sumCalculators[i]; } delete validCalculator; delete read; delete sabund; globaldata->sabund = NULL; delete input; globaldata->ginput = NULL; return 0;  }
249                         
250                         while((sabund != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
251                                 
252                                 if (m->control_pressed) { if (hadShared != "") {  globaldata->setSharedFile(hadShared); globaldata->setFormat("sharedfile");  } outputFileHandle.close(); for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str());  } for(int i=0;i<sumCalculators.size();i++){  delete sumCalculators[i]; } delete validCalculator; delete read; delete sabund; globaldata->sabund = NULL; delete input; globaldata->ginput = NULL; return 0;  }
253                                 
254                                 if(allLines == 1 || labels.count(sabund->getLabel()) == 1){                     
255                                         
256                                         m->mothurOut(sabund->getLabel()); m->mothurOutEndLine();
257                                         processedLabels.insert(sabund->getLabel());
258                                         userLabels.erase(sabund->getLabel());
259                                         
260                                         outputFileHandle << sabund->getLabel();
261                                         for(int i=0;i<sumCalculators.size();i++){
262                                                 vector<double> data = sumCalculators[i]->getValues(sabund);
263                                                 
264                                                 if (m->control_pressed) { if (hadShared != "") {  globaldata->setSharedFile(hadShared); globaldata->setFormat("sharedfile");  } outputFileHandle.close(); for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str());  } for(int i=0;i<sumCalculators.size();i++){  delete sumCalculators[i]; } delete validCalculator; delete read; delete sabund; globaldata->sabund = NULL; delete input; globaldata->ginput = NULL; return 0;  }
265
266                                                 outputFileHandle << '\t';
267                                                 sumCalculators[i]->print(outputFileHandle);
268                                         }
269                                         outputFileHandle << endl;
270                                         numLines++;
271                                 }
272                                 
273                                 if ((anyLabelsToProcess(sabund->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
274                                         string saveLabel = sabund->getLabel();
275                                         
276                                         delete sabund;
277                                         sabund = input->getSAbundVector(lastLabel);
278                                         
279                                         m->mothurOut(sabund->getLabel()); m->mothurOutEndLine();
280                                         processedLabels.insert(sabund->getLabel());
281                                         userLabels.erase(sabund->getLabel());
282                                         
283                                         outputFileHandle << sabund->getLabel();
284                                         for(int i=0;i<sumCalculators.size();i++){
285                                                 vector<double> data = sumCalculators[i]->getValues(sabund);
286                                                 
287                                                 if (m->control_pressed) { if (hadShared != "") {  globaldata->setSharedFile(hadShared); globaldata->setFormat("sharedfile");  } outputFileHandle.close(); for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str());  } for(int i=0;i<sumCalculators.size();i++){  delete sumCalculators[i]; } delete validCalculator; delete read; delete sabund; globaldata->sabund = NULL; delete input; globaldata->ginput = NULL; return 0;  }
288                                                 
289                                                 outputFileHandle << '\t';
290                                                 sumCalculators[i]->print(outputFileHandle);
291                                         }
292                                         outputFileHandle << endl;
293                                         numLines++;
294                                         
295                                         //restore real lastlabel to save below
296                                         sabund->setLabel(saveLabel);
297                                 }               
298                                 
299                                 lastLabel = sabund->getLabel();                 
300                                 
301                                 delete sabund;
302                                 sabund = input->getSAbundVector();
303                         }
304                         
305                         if (m->control_pressed) { if (hadShared != "") {  globaldata->setSharedFile(hadShared); globaldata->setFormat("sharedfile");  } outputFileHandle.close(); for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str());  } for(int i=0;i<sumCalculators.size();i++){  delete sumCalculators[i]; } delete validCalculator; delete read;  delete input; globaldata->ginput = NULL; return 0;  }
306
307                         //output error messages about any remaining user labels
308                         set<string>::iterator it;
309                         bool needToRun = false;
310                         for (it = userLabels.begin(); it != userLabels.end(); it++) {  
311                                 m->mothurOut("Your file does not include the label " + *it); 
312                                 if (processedLabels.count(lastLabel) != 1) {
313                                         m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
314                                         needToRun = true;
315                                 }else {
316                                         m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
317                                 }
318                         }
319                         
320                         //run last label if you need to
321                         if (needToRun == true)  {
322                                 if (sabund != NULL) {   delete sabund;  }
323                                 sabund = input->getSAbundVector(lastLabel);
324                                 
325                                 m->mothurOut(sabund->getLabel()); m->mothurOutEndLine();
326                                 outputFileHandle << sabund->getLabel();
327                                 for(int i=0;i<sumCalculators.size();i++){
328                                         vector<double> data = sumCalculators[i]->getValues(sabund);
329                                         
330                                         if (m->control_pressed) { if (hadShared != "") {  globaldata->setSharedFile(hadShared); globaldata->setFormat("sharedfile");  } outputFileHandle.close(); for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str());  } for(int i=0;i<sumCalculators.size();i++){  delete sumCalculators[i]; } delete validCalculator; delete read; delete sabund; globaldata->sabund = NULL; delete input; globaldata->ginput = NULL; return 0;  }
331
332                                         outputFileHandle << '\t';
333                                         sumCalculators[i]->print(outputFileHandle);
334                                 }
335                                 outputFileHandle << endl;
336                                 numLines++;
337                                 delete sabund;
338                         }
339                         
340                         outputFileHandle.close();
341                         
342                         if (m->control_pressed) { if (hadShared != "") {  globaldata->setSharedFile(hadShared); globaldata->setFormat("sharedfile");  } for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str());  } for(int i=0;i<sumCalculators.size();i++){  delete sumCalculators[i]; } delete validCalculator; delete read;  delete input; globaldata->ginput = NULL; return 0;  }
343
344                         
345                         delete input;  globaldata->ginput = NULL;
346                         delete read;
347                         delete validCalculator;
348                         globaldata->sabund = NULL;
349                         for(int i=0;i<sumCalculators.size();i++){  delete sumCalculators[i]; }
350                 }
351                 
352                 if (hadShared != "") {  globaldata->setSharedFile(hadShared); globaldata->setFormat("sharedfile");  }
353                 
354                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str());  }  return 0;  }
355                 
356                 //create summary file containing all the groups data for each label - this function just combines the info from the files already created.
357                 if ((hadShared != "") && (groupMode)) {   outputNames.push_back(createGroupSummaryFile(numLines, numCols, outputNames));  }
358                 
359                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str());  }  return 0;  }
360                 
361                 m->mothurOutEndLine();
362                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
363                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
364                 m->mothurOutEndLine();
365                 
366                 return 0;
367         }
368         catch(exception& e) {
369                 m->errorOut(e, "SummaryCommand", "execute");
370                 exit(1);
371         }
372 }
373 //**********************************************************************************************************************
374 vector<string> SummaryCommand::parseSharedFile(string filename) {
375         try {
376                 vector<string> filenames;
377                 
378                 map<string, ofstream*> filehandles;
379                 map<string, ofstream*>::iterator it3;
380                 
381                                 
382                 //read first line
383                 read = new ReadOTUFile(filename);       
384                 read->read(&*globaldata); 
385                         
386                 input = globaldata->ginput;
387                 vector<SharedRAbundVector*> lookup = input->getSharedRAbundVectors();
388                 
389                 string sharedFileRoot = getRootName(filename);
390                 
391                 //clears file before we start to write to it below
392                 for (int i=0; i<lookup.size(); i++) {
393                         remove((sharedFileRoot + lookup[i]->getGroup() + ".rabund").c_str());
394                         filenames.push_back((sharedFileRoot + lookup[i]->getGroup() + ".rabund"));
395                 }
396                 
397                 ofstream* temp;
398                 for (int i=0; i<lookup.size(); i++) {
399                         temp = new ofstream;
400                         filehandles[lookup[i]->getGroup()] = temp;
401                         groups.push_back(lookup[i]->getGroup());
402                 }
403
404                 while(lookup[0] != NULL) {
405                 
406                         for (int i = 0; i < lookup.size(); i++) {
407                                 RAbundVector rav = lookup[i]->getRAbundVector();
408                                 openOutputFileAppend(sharedFileRoot + lookup[i]->getGroup() + ".rabund", *(filehandles[lookup[i]->getGroup()]));
409                                 rav.print(*(filehandles[lookup[i]->getGroup()]));
410                                 (*(filehandles[lookup[i]->getGroup()])).close();
411                         }
412                 
413                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
414                         lookup = input->getSharedRAbundVectors();
415                 }
416                 
417                 //free memory
418                 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
419                         delete it3->second;
420                 }
421                 delete read;
422                 delete input;
423                 globaldata->ginput = NULL;
424
425                 return filenames;
426         }
427         catch(exception& e) {
428                 m->errorOut(e, "SummaryCommand", "parseSharedFile");
429                 exit(1);
430         }
431 }
432 //**********************************************************************************************************************
433 string SummaryCommand::createGroupSummaryFile(int numLines, int numCols, vector<string> outputNames) {
434         try {
435                 
436                 ofstream out;
437                 string combineFileName = outputDir + getRootName(getSimpleName(globaldata->inputFileName)) + "groups.summary";
438                 
439                 //open combined file
440                 openOutputFile(combineFileName, out);
441                 
442                 //open each groups summary file
443                 string newLabel = "";
444                 ifstream* temp;
445                 map<string, ifstream*> filehandles;
446                 for (int i=0; i<outputNames.size(); i++) {
447                         temp = new ifstream;
448                         filehandles[outputNames[i]] = temp;
449                         openInputFile(outputNames[i], *(temp));
450                         
451                         //read through first line - labels
452                         string tempLabel;
453                         if (i == 0) { //we want to save the labels to output below
454                                 for (int j = 0; j < numCols+1; j++) {  
455                                         *(temp) >> tempLabel; 
456                                         
457                                         if (j == 1) {  newLabel += "group\t" + tempLabel + '\t';
458                                         }else{  newLabel += tempLabel + '\t';   }
459                                 }
460                         }else{  for (int j = 0; j < numCols+1; j++) {  *(temp) >> tempLabel;  }  }
461                         
462                         gobble(*(temp));
463                 }
464                 
465                 //output label line to new file
466                 out << newLabel << endl;
467                 
468                 //for each label
469                 for (int i = 0; i < numLines; i++) {
470                 
471                         //grab summary data for each group
472                         for (int i=0; i<outputNames.size(); i++) {
473                                 string tempLabel;
474                                 
475                                 for (int j = 0; j < numCols+1; j++) {  
476                                         *(filehandles[outputNames[i]]) >> tempLabel; 
477                                         
478                                         //print to combined file
479                                         if (j == 1) { out << groups[i] << '\t' << tempLabel << '\t';    }
480                                         else{  out << tempLabel << '\t';        }
481                                 }
482                                 
483                                 out << endl;
484                                 gobble(*(filehandles[outputNames[i]]));
485                         }
486                 }       
487                 
488                 //close each groups summary file
489                 for (int i=0; i<outputNames.size(); i++) {  (*(filehandles[outputNames[i]])).close();  }
490                 out.close();
491                 
492                 //return combine file name
493                 return combineFileName;
494                 
495         }
496         catch(exception& e) {
497                 m->errorOut(e, "SummaryCommand", "createGroupSummaryFile");
498                 exit(1);
499         }
500 }
501 //**********************************************************************************************************************