]> git.donarmstrong.com Git - mothur.git/blob - summarycommand.cpp
added smart distance feature and optimized all commands using line by line processing
[mothur.git] / summarycommand.cpp
1 /*
2  *  summarycommand.cpp
3  *  Dotur
4  *
5  *  Created by Sarah Westcott on 1/2/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "summarycommand.h"
11 #include "ace.h"
12 #include "sobs.h"
13 #include "nseqs.h"
14 #include "chao1.h"
15 #include "bootstrap.h"
16 #include "simpson.h"
17 #include "npshannon.h"
18 #include "shannon.h"
19 #include "jackknife.h"
20 #include "geom.h"
21 #include "logsd.h"
22 #include "qstat.h"
23 #include "bergerparker.h"
24 #include "bstick.h"
25 #include "goodscoverage.h"
26 #include "coverage.h"
27 #include "efron.h"
28 #include "boneh.h"
29 #include "solow.h"
30 #include "shen.h"
31
32 //**********************************************************************************************************************
33
34 SummaryCommand::SummaryCommand(){
35         try {
36                 globaldata = GlobalData::getInstance();
37                 validCalculator = new ValidCalculators();
38                 int i;
39                 
40                 for (i=0; i<globaldata->Estimators.size(); i++) {
41                         if (validCalculator->isValidCalculator("summary", globaldata->Estimators[i]) == true) { 
42                                 if(globaldata->Estimators[i] == "sobs"){
43                                         sumCalculators.push_back(new Sobs());
44                                 }else if(globaldata->Estimators[i] == "chao"){
45                                         sumCalculators.push_back(new Chao1());
46                                 }else if(globaldata->Estimators[i] == "coverage"){
47                                         sumCalculators.push_back(new Coverage());
48                                 }else if(globaldata->Estimators[i] == "geometric"){
49                                         sumCalculators.push_back(new Geom());
50                                 }else if(globaldata->Estimators[i] == "logseries"){
51                                         sumCalculators.push_back(new LogSD());
52                                 }else if(globaldata->Estimators[i] == "qstat"){
53                                         sumCalculators.push_back(new QStat());
54                                 }else if(globaldata->Estimators[i] == "bergerparker"){
55                                         sumCalculators.push_back(new BergerParker());
56                                 }else if(globaldata->Estimators[i] == "bstick"){
57                                         sumCalculators.push_back(new BStick());
58                                 }else if(globaldata->Estimators[i] == "ace"){
59                                         convert(globaldata->getAbund(), abund);
60                                         if(abund < 5)
61                                                 abund = 10;
62                                         sumCalculators.push_back(new Ace(abund));
63                                 }else if(globaldata->Estimators[i] == "jack"){
64                                         sumCalculators.push_back(new Jackknife());
65                                 }else if(globaldata->Estimators[i] == "shannon"){
66                                         sumCalculators.push_back(new Shannon());
67                                 }else if(globaldata->Estimators[i] == "npshannon"){
68                                         sumCalculators.push_back(new NPShannon());
69                                 }else if(globaldata->Estimators[i] == "simpson"){
70                                         sumCalculators.push_back(new Simpson());
71                                 }else if(globaldata->Estimators[i] == "bootstrap"){
72                                         sumCalculators.push_back(new Bootstrap());
73                                 }else if (globaldata->Estimators[i] == "nseqs") { 
74                                         sumCalculators.push_back(new NSeqs());
75                                 }else if (globaldata->Estimators[i] == "goodscoverage") { 
76                                         sumCalculators.push_back(new GoodsCoverage());
77                                 }else if (globaldata->Estimators[i] == "efron") { 
78                                         convert(globaldata->getSize(), size);
79                                         sumCalculators.push_back(new Efron(size));
80                                 }else if (globaldata->Estimators[i] == "boneh") { 
81                                         convert(globaldata->getSize(), size);
82                                         sumCalculators.push_back(new Boneh(size));
83                                 }else if (globaldata->Estimators[i] == "solow") { 
84                                         convert(globaldata->getSize(), size);
85                                         sumCalculators.push_back(new Solow(size));
86                                 }else if (globaldata->Estimators[i] == "shen") { 
87                                         convert(globaldata->getSize(), size);
88                                         sumCalculators.push_back(new Shen(size));
89                                 }
90                         }
91                 }
92                 
93                 //reset calc for next command
94                 globaldata->setCalc("");
95
96         }
97         catch(exception& e) {
98                 cout << "Standard Error: " << e.what() << " has occurred in the SummaryCommand class Function SummaryCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
99                 exit(1);
100         }
101         catch(...) {
102                 cout << "An unknown error has occurred in the SummaryCommand class function SummaryCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
103                 exit(1);
104         }       
105 }
106 //**********************************************************************************************************************
107
108 SummaryCommand::~SummaryCommand(){
109         delete sabund;
110         delete input;
111         delete read;
112 }
113
114 //**********************************************************************************************************************
115
116 int SummaryCommand::execute(){
117         try {
118                 int count = 1;
119                 
120                 //if the users entered no valid calculators don't execute command
121                 if (sumCalculators.size() == 0) { return 0; }
122
123                 outputFileName = ((getRootName(globaldata->inputFileName)) + "summary");
124                 openOutputFile(outputFileName, outputFileHandle);
125                 outputFileHandle << "label";
126         
127                 read = new ReadOTUFile(globaldata->inputFileName);      
128                 read->read(&*globaldata); 
129                 
130                 sabund = globaldata->sabund;
131                 SAbundVector* lastSAbund = sabund;
132                 input = globaldata->ginput;
133                 
134                 for(int i=0;i<sumCalculators.size();i++){
135                         if(sumCalculators[i]->getCols() == 1){
136                                 outputFileHandle << '\t' << sumCalculators[i]->getName();
137                         }
138                         else{
139                                 outputFileHandle << '\t' << sumCalculators[i]->getName() << "\t" << sumCalculators[i]->getName() << "_lci\t" << sumCalculators[i]->getName() << "_hci";
140                         }
141                 }
142                 outputFileHandle << endl;
143                 
144                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
145                 set<string> processedLabels;
146                 set<string> userLabels = globaldata->labels;
147                 
148                 while((sabund != NULL) && ((globaldata->allLines == 1) || (userLabels.size() != 0))) {
149                         
150                         if(globaldata->allLines == 1 || globaldata->lines.count(count) == 1 || globaldata->labels.count(sabund->getLabel()) == 1){                      
151         
152                                 cout << sabund->getLabel() << '\t' << count << endl;
153                                 processedLabels.insert(sabund->getLabel());
154                                 userLabels.erase(sabund->getLabel());
155
156                                 
157                                 outputFileHandle << sabund->getLabel();
158                                 for(int i=0;i<sumCalculators.size();i++){
159                                         vector<double> data = sumCalculators[i]->getValues(sabund);
160                                         outputFileHandle << '\t';
161                                         sumCalculators[i]->print(outputFileHandle);
162                                 }
163                                 outputFileHandle << endl;
164                         }
165                         
166                         if ((anyLabelsToProcess(sabund->getLabel(), userLabels, "") == true) && (processedLabels.count(lastSAbund->getLabel()) != 1)) {
167
168                                 cout << lastSAbund->getLabel() << '\t' << count << endl;
169                                 processedLabels.insert(lastSAbund->getLabel());
170                                 userLabels.erase(lastSAbund->getLabel());
171                                 
172                                 outputFileHandle << lastSAbund->getLabel();
173                                 for(int i=0;i<sumCalculators.size();i++){
174                                         vector<double> data = sumCalculators[i]->getValues(lastSAbund);
175                                         outputFileHandle << '\t';
176                                         sumCalculators[i]->print(outputFileHandle);
177                                 }
178                                 outputFileHandle << endl;
179                         }               
180
181                         if (count != 1) { delete lastSAbund; }
182                         lastSAbund = sabund;                    
183
184                         sabund = input->getSAbundVector();
185                         count++;
186                 }
187                 
188                 //output error messages about any remaining user labels
189                 set<string>::iterator it;
190                 bool needToRun = false;
191                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
192                         cout << "Your file does not include the label "<< *it; 
193                         if (processedLabels.count(lastSAbund->getLabel()) != 1) {
194                                 cout << ". I will use " << lastSAbund->getLabel() << "." << endl;
195                                 needToRun = true;
196                         }else {
197                                 cout << ". Please refer to " << lastSAbund->getLabel() << "." << endl;
198                         }
199                 }
200                 
201                 //run last line if you need to
202                 if (needToRun == true)  {
203                         cout << lastSAbund->getLabel() << '\t' << count << endl;
204                         outputFileHandle << lastSAbund->getLabel();
205                         for(int i=0;i<sumCalculators.size();i++){
206                                 vector<double> data = sumCalculators[i]->getValues(lastSAbund);
207                                 outputFileHandle << '\t';
208                                 sumCalculators[i]->print(outputFileHandle);
209                         }
210                         outputFileHandle << endl;
211                 }
212                 
213                 delete lastSAbund;
214                 return 0;
215         }
216         catch(exception& e) {
217                 cout << "Standard Error: " << e.what() << " has occurred in the SummaryCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
218                 exit(1);
219         }
220         catch(...) {
221                 cout << "An unknown error has occurred in the SummaryCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
222                 exit(1);
223         }               
224 }
225
226 //**********************************************************************************************************************