]> git.donarmstrong.com Git - mothur.git/blob - summarysharedcommand.cpp
added smart distance feature and optimized all commands using line by line processing
[mothur.git] / summarysharedcommand.cpp
1 /*
2  *  summarysharedcommand.cpp
3  *  Dotur
4  *
5  *  Created by Sarah Westcott on 1/2/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "summarysharedcommand.h"
11 #include "sharedsobscollectsummary.h"
12 #include "sharedchao1.h"
13 #include "sharedace.h"
14 #include "sharednseqs.h"
15 #include "sharedjabund.h"
16 #include "sharedsorabund.h"
17 #include "sharedjclass.h"
18 #include "sharedsorclass.h"
19 #include "sharedjest.h"
20 #include "sharedsorest.h"
21 #include "sharedthetayc.h"
22 #include "sharedthetan.h"
23 #include "sharedkstest.h"
24 #include "whittaker.h"
25 #include "sharedochiai.h"
26 #include "sharedanderbergs.h"
27 #include "sharedkulczynski.h"
28 #include "sharedkulczynskicody.h"
29 #include "sharedlennon.h"
30 #include "sharedmorisitahorn.h"
31 #include "sharedbraycurtis.h"
32 #include "sharedjackknife.h"
33 #include "whittaker.h"
34
35
36 //**********************************************************************************************************************
37
38 SummarySharedCommand::SummarySharedCommand(){
39         try {
40                 globaldata = GlobalData::getInstance();
41                 outputFileName = ((getRootName(globaldata->inputFileName)) + "shared.summary");
42                 openOutputFile(outputFileName, outputFileHandle);
43                 format = globaldata->getFormat();
44                 validCalculator = new ValidCalculators();
45                 mult = false;
46                 
47                 int i;
48                 for (i=0; i<globaldata->Estimators.size(); i++) {
49                         if (validCalculator->isValidCalculator("sharedsummary", globaldata->Estimators[i]) == true) { 
50                                 if (globaldata->Estimators[i] == "sharedsobs") { 
51                                         sumCalculators.push_back(new SharedSobsCS());
52                                 }else if (globaldata->Estimators[i] == "sharedchao") { 
53                                         sumCalculators.push_back(new SharedChao1());
54                                 }else if (globaldata->Estimators[i] == "sharedace") { 
55                                         sumCalculators.push_back(new SharedAce());
56                                 }else if (globaldata->Estimators[i] == "jabund") {      
57                                         sumCalculators.push_back(new JAbund());
58                                 }else if (globaldata->Estimators[i] == "sorabund") { 
59                                         sumCalculators.push_back(new SorAbund());
60                                 }else if (globaldata->Estimators[i] == "jclass") { 
61                                         sumCalculators.push_back(new Jclass());
62                                 }else if (globaldata->Estimators[i] == "sorclass") { 
63                                         sumCalculators.push_back(new SorClass());
64                                 }else if (globaldata->Estimators[i] == "jest") { 
65                                         sumCalculators.push_back(new Jest());
66                                 }else if (globaldata->Estimators[i] == "sorest") { 
67                                         sumCalculators.push_back(new SorEst());
68                                 }else if (globaldata->Estimators[i] == "thetayc") { 
69                                         sumCalculators.push_back(new ThetaYC());
70                                 }else if (globaldata->Estimators[i] == "thetan") { 
71                                         sumCalculators.push_back(new ThetaN());
72                                 }else if (globaldata->Estimators[i] == "kstest") { 
73                                         sumCalculators.push_back(new KSTest());
74                                 }else if (globaldata->Estimators[i] == "sharednseqs") { 
75                                         sumCalculators.push_back(new SharedNSeqs());
76                                 }else if (globaldata->Estimators[i] == "ochiai") { 
77                                         sumCalculators.push_back(new Ochiai());
78                                 }else if (globaldata->Estimators[i] == "anderberg") { 
79                                         sumCalculators.push_back(new Anderberg());
80                                 }else if (globaldata->Estimators[i] == "kulczynski") { 
81                                         sumCalculators.push_back(new Kulczynski());
82                                 }else if (globaldata->Estimators[i] == "kulczynskicody") { 
83                                         sumCalculators.push_back(new KulczynskiCody());
84                                 }else if (globaldata->Estimators[i] == "lennon") { 
85                                         sumCalculators.push_back(new Lennon());
86                                 }else if (globaldata->Estimators[i] == "morisitahorn") { 
87                                         sumCalculators.push_back(new MorHorn());
88                                 }else if (globaldata->Estimators[i] == "braycurtis") { 
89                                         sumCalculators.push_back(new BrayCurtis());
90                                 }else if (globaldata->Estimators[i] == "whittaker") { 
91                                         sumCalculators.push_back(new Whittaker());
92                                 }
93                         }
94                 }
95                 //reset calc for next command
96                 globaldata->setCalc("");
97
98         }
99         catch(exception& e) {
100                 cout << "Standard Error: " << e.what() << " has occurred in the SummarySharedCommand class Function SummarySharedCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
101                 exit(1);
102         }
103         catch(...) {
104                 cout << "An unknown error has occurred in the SummarySharedCommand class function SummarySharedCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
105                 exit(1);
106         }       
107 }
108 //**********************************************************************************************************************
109
110 SummarySharedCommand::~SummarySharedCommand(){
111         delete input;
112         delete read;
113 }
114
115 //**********************************************************************************************************************
116
117 int SummarySharedCommand::execute(){
118         try {
119                 int count = 1;  
120         
121                 //if the users entered no valid calculators don't execute command
122                 if (sumCalculators.size() == 0) { return 0; }
123                 //check if any calcs can do multiples
124                 else{
125                         for (int i = 0; i < sumCalculators.size(); i++) {
126                                 if (sumCalculators[i]->getMultiple() == true) { mult = true; }
127                         }
128                 }
129                 
130                 //read first line
131                 read = new ReadOTUFile(globaldata->inputFileName);      
132                 read->read(&*globaldata); 
133                         
134                 input = globaldata->ginput;
135                 lookup = input->getSharedRAbundVectors();
136                 vector<SharedRAbundVector*> lastLookup = lookup;
137                 
138                 //output estimator names as column headers
139                 outputFileHandle << "label" <<'\t' << "comparison" << '\t'; 
140                 for(int i=0;i<sumCalculators.size();i++){
141                         outputFileHandle << '\t' << sumCalculators[i]->getName();
142                 }
143                 outputFileHandle << endl;
144                 
145                 //create file and put column headers for multiple groups file
146                 if (mult == true) {
147                         outAllFileName = ((getRootName(globaldata->inputFileName)) + "sharedmultiple.summary");
148                         openOutputFile(outAllFileName, outAll);
149                         
150                         outAll << "label" <<'\t' << "comparison" << '\t'; 
151                         for(int i=0;i<sumCalculators.size();i++){
152                                 if (sumCalculators[i]->getMultiple() == true) { 
153                                         outAll << '\t' << sumCalculators[i]->getName();
154                                 }
155                         }
156                         outAll << endl;
157                 }
158                 
159                 if (lookup.size() < 2) { 
160                         cout << "I cannot run the command without at least 2 valid groups."; 
161                         for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
162                         
163                         //close files and clean up
164                         outputFileHandle.close();  remove(outputFileName.c_str());
165                         if (mult == true) {  outAll.close();  remove(outAllFileName.c_str());  }
166                         return 0;
167                 }
168                                         
169                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
170                 set<string> processedLabels;
171                 set<string> userLabels = globaldata->labels;
172                 
173                 //as long as you are not at the end of the file or done wih the lines you want
174                 while((lookup[0] != NULL) && ((globaldata->allLines == 1) || (userLabels.size() != 0))) {
175                 
176                         if(globaldata->allLines == 1 || globaldata->lines.count(count) == 1 || globaldata->labels.count(lookup[0]->getLabel()) == 1){                   
177                                 cout << lookup[0]->getLabel() << '\t' << count << endl;
178                                 process(lookup);
179                                 
180                                 processedLabels.insert(lookup[0]->getLabel());
181                                 userLabels.erase(lookup[0]->getLabel());
182                         }
183                         
184                         if ((anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLookup[0]->getLabel()) != 1)) {
185                                         cout << lastLookup[0]->getLabel() << '\t' << count << endl;
186                                         process(lastLookup);
187                                         
188                                         processedLabels.insert(lastLookup[0]->getLabel());
189                                         userLabels.erase(lastLookup[0]->getLabel());
190                         }
191
192                 
193                         //prevent memory leak
194                         if (count != 1) { for (int i = 0; i < lastLookup.size(); i++) {  delete lastLookup[i];  } }
195                         lastLookup = lookup;                    
196                                 
197                         //get next line to process
198                         lookup = input->getSharedRAbundVectors();
199                         count++;
200                 }
201                 
202                 //output error messages about any remaining user labels
203                 set<string>::iterator it;
204                 bool needToRun = false;
205                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
206                         cout << "Your file does not include the label "<< *it; 
207                         if (processedLabels.count(lastLookup[0]->getLabel()) != 1) {
208                                 cout << ". I will use " << lastLookup[0]->getLabel() << "." << endl;
209                                 needToRun = true;
210                         }else {
211                                 cout << ". Please refer to " << lastLookup[0]->getLabel() << "." << endl;
212                         }
213                 }
214                 
215                 //run last line if you need to
216                 if (needToRun == true)  {
217                         cout << lastLookup[0]->getLabel() << '\t' << count << endl;
218                         process(lastLookup);
219                 }
220                 
221                 for (int i = 0; i < lastLookup.size(); i++) {  delete lastLookup[i];  }
222
223                 //reset groups parameter
224                 globaldata->Groups.clear();  globaldata->setGroups("");
225                 
226                 //close files
227                 outputFileHandle.close();
228                 if (mult == true) {  outAll.close();  }
229
230                 return 0;
231         }
232         catch(exception& e) {
233                 cout << "Standard Error: " << e.what() << " has occurred in the SummarySharedCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
234                 exit(1);
235         }
236         catch(...) {
237                 cout << "An unknown error has occurred in the SummarySharedCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
238                 exit(1);
239         }               
240 }
241
242 /***********************************************************/
243 void SummarySharedCommand::process(vector<SharedRAbundVector*> thisLookup) {
244         try {
245                                 //loop through calculators and add to file all for all calcs that can do mutiple groups
246                                 if (mult == true) {
247                                         //output label
248                                         outAll << thisLookup[0]->getLabel() << '\t';
249                                         
250                                         //output groups names
251                                         string outNames = "";
252                                         for (int j = 0; j < thisLookup.size(); j++) {
253                                                 outNames += thisLookup[j]->getGroup() +  "-";
254                                         }
255                                         outNames = outNames.substr(0, outNames.length()-1); //rip off extra '-';
256                                         outAll << outNames << '\t';
257                                         
258                                         for(int i=0;i<sumCalculators.size();i++){
259                                                 if (sumCalculators[i]->getMultiple() == true) { 
260                                                         sumCalculators[i]->getValues(thisLookup);
261                                                         outAll << '\t';
262                                                         sumCalculators[i]->print(outAll);
263                                                 }
264                                         }
265                                         outAll << endl;
266                                 }
267         
268                                 int n = 1; 
269                                 vector<SharedRAbundVector*> subset;
270                                 for (int k = 0; k < (thisLookup.size() - 1); k++) { // pass cdd each set of groups to commpare
271                                         for (int l = n; l < thisLookup.size(); l++) {
272                                                 
273                                                 outputFileHandle << thisLookup[0]->getLabel() << '\t';
274                                                 
275                                                 subset.clear(); //clear out old pair of sharedrabunds
276                                                 //add new pair of sharedrabunds
277                                                 subset.push_back(thisLookup[k]); subset.push_back(thisLookup[l]); 
278                                                 
279                                                 //sort groups to be alphanumeric
280                                                 if (thisLookup[k]->getGroup() > thisLookup[l]->getGroup()) {
281                                                         outputFileHandle << (thisLookup[l]->getGroup() +'\t' + thisLookup[k]->getGroup()) << '\t'; //print out groups
282                                                 }else{
283                                                         outputFileHandle << (thisLookup[k]->getGroup() +'\t' + thisLookup[l]->getGroup()) << '\t'; //print out groups
284                                                 }
285                                                 
286                                                 for(int i=0;i<sumCalculators.size();i++) {
287
288                                                         sumCalculators[i]->getValues(subset); //saves the calculator outputs
289                                                         outputFileHandle << '\t';
290                                                         sumCalculators[i]->print(outputFileHandle);
291                                                 }
292                                                 outputFileHandle << endl;
293                                         }
294                                         n++;
295                                 }
296
297         }
298         catch(exception& e) {
299                 cout << "Standard Error: " << e.what() << " has occurred in the SummarySharedCommand class Function process. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
300                 exit(1);
301         }
302         catch(...) {
303                 cout << "An unknown error has occurred in the SummarySharedCommand class function process. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
304                 exit(1);
305         }               
306 }
307
308 /***********************************************************/