]> git.donarmstrong.com Git - mothur.git/blob - summarysharedcommand.cpp
added get.rabund and get.sabund command and fixed bug introduced by line by line...
[mothur.git] / summarysharedcommand.cpp
1 /*
2  *  summarysharedcommand.cpp
3  *  Dotur
4  *
5  *  Created by Sarah Westcott on 1/2/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "summarysharedcommand.h"
11 #include "sharedsobscollectsummary.h"
12 #include "sharedchao1.h"
13 #include "sharedace.h"
14 #include "sharednseqs.h"
15 #include "sharedjabund.h"
16 #include "sharedsorabund.h"
17 #include "sharedjclass.h"
18 #include "sharedsorclass.h"
19 #include "sharedjest.h"
20 #include "sharedsorest.h"
21 #include "sharedthetayc.h"
22 #include "sharedthetan.h"
23 #include "sharedkstest.h"
24 #include "whittaker.h"
25 #include "sharedochiai.h"
26 #include "sharedanderbergs.h"
27 #include "sharedkulczynski.h"
28 #include "sharedkulczynskicody.h"
29 #include "sharedlennon.h"
30 #include "sharedmorisitahorn.h"
31 #include "sharedbraycurtis.h"
32 #include "sharedjackknife.h"
33 #include "whittaker.h"
34
35
36 //**********************************************************************************************************************
37
38 SummarySharedCommand::SummarySharedCommand(){
39         try {
40                 globaldata = GlobalData::getInstance();
41                 outputFileName = ((getRootName(globaldata->inputFileName)) + "shared.summary");
42                 openOutputFile(outputFileName, outputFileHandle);
43                 format = globaldata->getFormat();
44                 validCalculator = new ValidCalculators();
45                 mult = false;
46                 
47                 int i;
48                 for (i=0; i<globaldata->Estimators.size(); i++) {
49                         if (validCalculator->isValidCalculator("sharedsummary", globaldata->Estimators[i]) == true) { 
50                                 if (globaldata->Estimators[i] == "sharedsobs") { 
51                                         sumCalculators.push_back(new SharedSobsCS());
52                                 }else if (globaldata->Estimators[i] == "sharedchao") { 
53                                         sumCalculators.push_back(new SharedChao1());
54                                 }else if (globaldata->Estimators[i] == "sharedace") { 
55                                         sumCalculators.push_back(new SharedAce());
56                                 }else if (globaldata->Estimators[i] == "jabund") {      
57                                         sumCalculators.push_back(new JAbund());
58                                 }else if (globaldata->Estimators[i] == "sorabund") { 
59                                         sumCalculators.push_back(new SorAbund());
60                                 }else if (globaldata->Estimators[i] == "jclass") { 
61                                         sumCalculators.push_back(new Jclass());
62                                 }else if (globaldata->Estimators[i] == "sorclass") { 
63                                         sumCalculators.push_back(new SorClass());
64                                 }else if (globaldata->Estimators[i] == "jest") { 
65                                         sumCalculators.push_back(new Jest());
66                                 }else if (globaldata->Estimators[i] == "sorest") { 
67                                         sumCalculators.push_back(new SorEst());
68                                 }else if (globaldata->Estimators[i] == "thetayc") { 
69                                         sumCalculators.push_back(new ThetaYC());
70                                 }else if (globaldata->Estimators[i] == "thetan") { 
71                                         sumCalculators.push_back(new ThetaN());
72                                 }else if (globaldata->Estimators[i] == "kstest") { 
73                                         sumCalculators.push_back(new KSTest());
74                                 }else if (globaldata->Estimators[i] == "sharednseqs") { 
75                                         sumCalculators.push_back(new SharedNSeqs());
76                                 }else if (globaldata->Estimators[i] == "ochiai") { 
77                                         sumCalculators.push_back(new Ochiai());
78                                 }else if (globaldata->Estimators[i] == "anderberg") { 
79                                         sumCalculators.push_back(new Anderberg());
80                                 }else if (globaldata->Estimators[i] == "kulczynski") { 
81                                         sumCalculators.push_back(new Kulczynski());
82                                 }else if (globaldata->Estimators[i] == "kulczynskicody") { 
83                                         sumCalculators.push_back(new KulczynskiCody());
84                                 }else if (globaldata->Estimators[i] == "lennon") { 
85                                         sumCalculators.push_back(new Lennon());
86                                 }else if (globaldata->Estimators[i] == "morisitahorn") { 
87                                         sumCalculators.push_back(new MorHorn());
88                                 }else if (globaldata->Estimators[i] == "braycurtis") { 
89                                         sumCalculators.push_back(new BrayCurtis());
90                                 }else if (globaldata->Estimators[i] == "whittaker") { 
91                                         sumCalculators.push_back(new Whittaker());
92                                 }
93                         }
94                 }
95                 //reset calc for next command
96                 globaldata->setCalc("");
97
98         }
99         catch(exception& e) {
100                 cout << "Standard Error: " << e.what() << " has occurred in the SummarySharedCommand class Function SummarySharedCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
101                 exit(1);
102         }
103         catch(...) {
104                 cout << "An unknown error has occurred in the SummarySharedCommand class function SummarySharedCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
105                 exit(1);
106         }       
107 }
108 //**********************************************************************************************************************
109
110 SummarySharedCommand::~SummarySharedCommand(){
111         delete input;
112         delete read;
113 }
114
115 //**********************************************************************************************************************
116
117 int SummarySharedCommand::execute(){
118         try {
119                 int count = 1;  
120         
121                 //if the users entered no valid calculators don't execute command
122                 if (sumCalculators.size() == 0) { return 0; }
123                 //check if any calcs can do multiples
124                 else{
125                         for (int i = 0; i < sumCalculators.size(); i++) {
126                                 if (sumCalculators[i]->getMultiple() == true) { mult = true; }
127                         }
128                 }
129                 
130                 //read first line
131                 read = new ReadOTUFile(globaldata->inputFileName);      
132                 read->read(&*globaldata); 
133                         
134                 input = globaldata->ginput;
135                 lookup = input->getSharedRAbundVectors();
136                 vector<SharedRAbundVector*> lastLookup = lookup;
137                 
138                 //output estimator names as column headers
139                 outputFileHandle << "label" <<'\t' << "comparison" << '\t'; 
140                 for(int i=0;i<sumCalculators.size();i++){
141                         outputFileHandle << '\t' << sumCalculators[i]->getName();
142                 }
143                 outputFileHandle << endl;
144                 
145                 //create file and put column headers for multiple groups file
146                 if (mult == true) {
147                         outAllFileName = ((getRootName(globaldata->inputFileName)) + "sharedmultiple.summary");
148                         openOutputFile(outAllFileName, outAll);
149                         
150                         outAll << "label" <<'\t' << "comparison" << '\t'; 
151                         for(int i=0;i<sumCalculators.size();i++){
152                                 if (sumCalculators[i]->getMultiple() == true) { 
153                                         outAll << '\t' << sumCalculators[i]->getName();
154                                 }
155                         }
156                         outAll << endl;
157                 }
158                 
159                 if (lookup.size() < 2) { 
160                         cout << "I cannot run the command without at least 2 valid groups."; 
161                         for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
162                         
163                         //close files and clean up
164                         outputFileHandle.close();  remove(outputFileName.c_str());
165                         if (mult == true) {  outAll.close();  remove(outAllFileName.c_str());  }
166                         return 0;
167                 }
168                                         
169                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
170                 set<string> processedLabels;
171                 set<string> userLabels = globaldata->labels;
172                 set<int> userLines = globaldata->lines;
173                 
174                 //as long as you are not at the end of the file or done wih the lines you want
175                 while((lookup[0] != NULL) && ((globaldata->allLines == 1) || (userLabels.size() != 0) || (userLines.size() != 0))) {
176                 
177                         if(globaldata->allLines == 1 || globaldata->lines.count(count) == 1 || globaldata->labels.count(lookup[0]->getLabel()) == 1){                   
178                                 cout << lookup[0]->getLabel() << '\t' << count << endl;
179                                 process(lookup);
180                                 
181                                 processedLabels.insert(lookup[0]->getLabel());
182                                 userLabels.erase(lookup[0]->getLabel());
183                                 userLines.erase(count);
184                         }
185                         
186                         if ((anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLookup[0]->getLabel()) != 1)) {
187                                         cout << lastLookup[0]->getLabel() << '\t' << count << endl;
188                                         process(lastLookup);
189                                         
190                                         processedLabels.insert(lastLookup[0]->getLabel());
191                                         userLabels.erase(lastLookup[0]->getLabel());
192                         }
193
194                 
195                         //prevent memory leak
196                         if (count != 1) { for (int i = 0; i < lastLookup.size(); i++) {  delete lastLookup[i];  } }
197                         lastLookup = lookup;                    
198                                 
199                         //get next line to process
200                         lookup = input->getSharedRAbundVectors();
201                         count++;
202                 }
203                 
204                 //output error messages about any remaining user labels
205                 set<string>::iterator it;
206                 bool needToRun = false;
207                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
208                         cout << "Your file does not include the label "<< *it; 
209                         if (processedLabels.count(lastLookup[0]->getLabel()) != 1) {
210                                 cout << ". I will use " << lastLookup[0]->getLabel() << "." << endl;
211                                 needToRun = true;
212                         }else {
213                                 cout << ". Please refer to " << lastLookup[0]->getLabel() << "." << endl;
214                         }
215                 }
216                 
217                 //run last line if you need to
218                 if (needToRun == true)  {
219                         cout << lastLookup[0]->getLabel() << '\t' << count << endl;
220                         process(lastLookup);
221                 }
222                 
223                 for (int i = 0; i < lastLookup.size(); i++) {  delete lastLookup[i];  }
224
225                 //reset groups parameter
226                 globaldata->Groups.clear();  globaldata->setGroups("");
227                 
228                 //close files
229                 outputFileHandle.close();
230                 if (mult == true) {  outAll.close();  }
231
232                 return 0;
233         }
234         catch(exception& e) {
235                 cout << "Standard Error: " << e.what() << " has occurred in the SummarySharedCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
236                 exit(1);
237         }
238         catch(...) {
239                 cout << "An unknown error has occurred in the SummarySharedCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
240                 exit(1);
241         }               
242 }
243
244 /***********************************************************/
245 void SummarySharedCommand::process(vector<SharedRAbundVector*> thisLookup) {
246         try {
247                                 //loop through calculators and add to file all for all calcs that can do mutiple groups
248                                 if (mult == true) {
249                                         //output label
250                                         outAll << thisLookup[0]->getLabel() << '\t';
251                                         
252                                         //output groups names
253                                         string outNames = "";
254                                         for (int j = 0; j < thisLookup.size(); j++) {
255                                                 outNames += thisLookup[j]->getGroup() +  "-";
256                                         }
257                                         outNames = outNames.substr(0, outNames.length()-1); //rip off extra '-';
258                                         outAll << outNames << '\t';
259                                         
260                                         for(int i=0;i<sumCalculators.size();i++){
261                                                 if (sumCalculators[i]->getMultiple() == true) { 
262                                                         sumCalculators[i]->getValues(thisLookup);
263                                                         outAll << '\t';
264                                                         sumCalculators[i]->print(outAll);
265                                                 }
266                                         }
267                                         outAll << endl;
268                                 }
269         
270                                 int n = 1; 
271                                 vector<SharedRAbundVector*> subset;
272                                 for (int k = 0; k < (thisLookup.size() - 1); k++) { // pass cdd each set of groups to commpare
273                                         for (int l = n; l < thisLookup.size(); l++) {
274                                                 
275                                                 outputFileHandle << thisLookup[0]->getLabel() << '\t';
276                                                 
277                                                 subset.clear(); //clear out old pair of sharedrabunds
278                                                 //add new pair of sharedrabunds
279                                                 subset.push_back(thisLookup[k]); subset.push_back(thisLookup[l]); 
280                                                 
281                                                 //sort groups to be alphanumeric
282                                                 if (thisLookup[k]->getGroup() > thisLookup[l]->getGroup()) {
283                                                         outputFileHandle << (thisLookup[l]->getGroup() +'\t' + thisLookup[k]->getGroup()) << '\t'; //print out groups
284                                                 }else{
285                                                         outputFileHandle << (thisLookup[k]->getGroup() +'\t' + thisLookup[l]->getGroup()) << '\t'; //print out groups
286                                                 }
287                                                 
288                                                 for(int i=0;i<sumCalculators.size();i++) {
289
290                                                         sumCalculators[i]->getValues(subset); //saves the calculator outputs
291                                                         outputFileHandle << '\t';
292                                                         sumCalculators[i]->print(outputFileHandle);
293                                                 }
294                                                 outputFileHandle << endl;
295                                         }
296                                         n++;
297                                 }
298
299         }
300         catch(exception& e) {
301                 cout << "Standard Error: " << e.what() << " has occurred in the SummarySharedCommand class Function process. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
302                 exit(1);
303         }
304         catch(...) {
305                 cout << "An unknown error has occurred in the SummarySharedCommand class function process. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
306                 exit(1);
307         }               
308 }
309
310 /***********************************************************/