]> git.donarmstrong.com Git - mothur.git/blob - summarysharedcommand.cpp
fixed order of unifrac commands so that it matches the summary commands
[mothur.git] / summarysharedcommand.cpp
1 /*
2  *  summarysharedcommand.cpp
3  *  Dotur
4  *
5  *  Created by Sarah Westcott on 1/2/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "summarysharedcommand.h"
11 #include "sharedsobscollectsummary.h"
12 #include "sharedchao1.h"
13 #include "sharedace.h"
14 #include "sharednseqs.h"
15 #include "sharedjabund.h"
16 #include "sharedsorabund.h"
17 #include "sharedjclass.h"
18 #include "sharedsorclass.h"
19 #include "sharedjest.h"
20 #include "sharedsorest.h"
21 #include "sharedthetayc.h"
22 #include "sharedthetan.h"
23 #include "sharedkstest.h"
24 #include "whittaker.h"
25 #include "sharedochiai.h"
26 #include "sharedanderbergs.h"
27 #include "sharedkulczynski.h"
28 #include "sharedkulczynskicody.h"
29 #include "sharedlennon.h"
30 #include "sharedmorisitahorn.h"
31 #include "sharedbraycurtis.h"
32 #include "sharedjackknife.h"
33 #include "whittaker.h"
34
35
36 //**********************************************************************************************************************
37
38 SummarySharedCommand::SummarySharedCommand(string option)  {
39         try {
40                 globaldata = GlobalData::getInstance();
41                 abort = false;
42                 allLines = 1;
43                 labels.clear();
44                 Estimators.clear();
45                 
46                 //allow user to run help
47                 if(option == "help") { validCalculator = new ValidCalculators(); help(); abort = true; }
48                 
49                 else {
50                         //valid paramters for this command
51                         string Array[] =  {"label","calc","groups","all","outputdir","inputdir", "processors"};
52                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
53                         
54                         OptionParser parser(option);
55                         map<string, string> parameters = parser.getParameters();
56                         
57                         ValidParameters validParameter;
58                 
59                         //check to make sure all parameters are valid for command
60                         for (map<string, string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
61                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
62                         }
63                         
64                         //make sure the user has already run the read.otu command
65                         if (globaldata->getSharedFile() == "") {
66                                  m->mothurOut("You must read a list and a group, or a shared before you can use the summary.shared command."); m->mothurOutEndLine(); abort = true; 
67                         }
68                         
69                         //if the user changes the output directory command factory will send this info to us in the output parameter 
70                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
71                                 outputDir = ""; 
72                                 outputDir += m->hasPath(globaldata->getSharedFile()); //if user entered a file with a path then preserve it     
73                         }
74
75                         //check for optional parameter and set defaults
76                         // ...at some point should added some additional type checking...
77                         label = validParameter.validFile(parameters, "label", false);                   
78                         if (label == "not found") { label = ""; }
79                         else { 
80                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
81                                 else { allLines = 1;  }
82                         }
83                         
84                         //if the user has not specified any labels use the ones from read.otu
85                         if(label == "") {  
86                                 allLines = globaldata->allLines; 
87                                 labels = globaldata->labels; 
88                         }
89                                 
90                         calc = validParameter.validFile(parameters, "calc", false);                     
91                         if (calc == "not found") { calc = "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan";  }
92                         else { 
93                                  if (calc == "default")  {  calc = "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan";  }
94                         }
95                         m->splitAtDash(calc, Estimators);
96                         
97                         groups = validParameter.validFile(parameters, "groups", false);                 
98                         if (groups == "not found") { groups = ""; }
99                         else { 
100                                 m->splitAtDash(groups, Groups);
101                                 globaldata->Groups = Groups;
102                         }
103                         
104                         string temp = validParameter.validFile(parameters, "all", false);                               if (temp == "not found") { temp = "false"; }
105                         all = m->isTrue(temp);
106                         
107                         temp = validParameter.validFile(parameters, "processors", false);       if(temp == "not found"){        temp = "1"; }
108                         convert(temp, processors); 
109                         
110                         if (abort == false) {
111                         
112                                 validCalculator = new ValidCalculators();
113                                 int i;
114                                 
115                                 for (i=0; i<Estimators.size(); i++) {
116                                         if (validCalculator->isValidCalculator("sharedsummary", Estimators[i]) == true) { 
117                                                 if (Estimators[i] == "sharedsobs") { 
118                                                         sumCalculators.push_back(new SharedSobsCS());
119                                                 }else if (Estimators[i] == "sharedchao") { 
120                                                         sumCalculators.push_back(new SharedChao1());
121                                                 }else if (Estimators[i] == "sharedace") { 
122                                                         sumCalculators.push_back(new SharedAce());
123                                                 }else if (Estimators[i] == "jabund") {  
124                                                         sumCalculators.push_back(new JAbund());
125                                                 }else if (Estimators[i] == "sorabund") { 
126                                                         sumCalculators.push_back(new SorAbund());
127                                                 }else if (Estimators[i] == "jclass") { 
128                                                         sumCalculators.push_back(new Jclass());
129                                                 }else if (Estimators[i] == "sorclass") { 
130                                                         sumCalculators.push_back(new SorClass());
131                                                 }else if (Estimators[i] == "jest") { 
132                                                         sumCalculators.push_back(new Jest());
133                                                 }else if (Estimators[i] == "sorest") { 
134                                                         sumCalculators.push_back(new SorEst());
135                                                 }else if (Estimators[i] == "thetayc") { 
136                                                         sumCalculators.push_back(new ThetaYC());
137                                                 }else if (Estimators[i] == "thetan") { 
138                                                         sumCalculators.push_back(new ThetaN());
139                                                 }else if (Estimators[i] == "kstest") { 
140                                                         sumCalculators.push_back(new KSTest());
141                                                 }else if (Estimators[i] == "sharednseqs") { 
142                                                         sumCalculators.push_back(new SharedNSeqs());
143                                                 }else if (Estimators[i] == "ochiai") { 
144                                                         sumCalculators.push_back(new Ochiai());
145                                                 }else if (Estimators[i] == "anderberg") { 
146                                                         sumCalculators.push_back(new Anderberg());
147                                                 }else if (Estimators[i] == "kulczynski") { 
148                                                         sumCalculators.push_back(new Kulczynski());
149                                                 }else if (Estimators[i] == "kulczynskicody") { 
150                                                         sumCalculators.push_back(new KulczynskiCody());
151                                                 }else if (Estimators[i] == "lennon") { 
152                                                         sumCalculators.push_back(new Lennon());
153                                                 }else if (Estimators[i] == "morisitahorn") { 
154                                                         sumCalculators.push_back(new MorHorn());
155                                                 }else if (Estimators[i] == "braycurtis") { 
156                                                         sumCalculators.push_back(new BrayCurtis());
157                                                 }else if (Estimators[i] == "whittaker") { 
158                                                         sumCalculators.push_back(new Whittaker());
159                                                 }
160                                         }
161                                 }
162                                 
163                                 mult = false;
164                         }
165                 }
166         }
167         catch(exception& e) {
168                 m->errorOut(e, "SummarySharedCommand", "SummarySharedCommand");
169                 exit(1);
170         }
171 }
172
173 //**********************************************************************************************************************
174
175 void SummarySharedCommand::help(){
176         try {
177                 m->mothurOut("The summary.shared command can only be executed after a successful read.otu command.\n");
178                 m->mothurOut("The summary.shared command parameters are label, calc and all.  No parameters are required.\n");
179                 m->mothurOut("The summary.shared command should be in the following format: \n");
180                 m->mothurOut("summary.shared(label=yourLabel, calc=yourEstimators, groups=yourGroups).\n");
181                 m->mothurOut("Example summary.shared(label=unique-.01-.03, groups=B-C, calc=sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan).\n");
182                 validCalculator->printCalc("sharedsummary", cout);
183                 m->mothurOut("The default value for calc is sharedsobs-sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan\n");
184                 m->mothurOut("The default value for groups is all the groups in your groupfile.\n");
185                 m->mothurOut("The label parameter is used to analyze specific labels in your input.\n");
186                 m->mothurOut("The all parameter is used to specify if you want the estimate of all your groups together.  This estimate can only be made for sharedsobs and sharedchao calculators. The default is false.\n");
187                 m->mothurOut("If you use sharedchao and run into memory issues, set all to false. \n");
188                 m->mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed.  You must enter at least 2 valid groups.\n");
189                 m->mothurOut("Note: No spaces between parameter labels (i.e. label), '=' and parameters (i.e.yourLabel).\n\n");
190         }
191         catch(exception& e) {
192                 m->errorOut(e, "SummarySharedCommand", "help");
193                 exit(1);
194         }
195 }
196
197 //**********************************************************************************************************************
198
199 SummarySharedCommand::~SummarySharedCommand(){
200         if (abort == false) {
201                 delete read;
202                 delete validCalculator;
203         }
204 }
205
206 //**********************************************************************************************************************
207
208 int SummarySharedCommand::execute(){
209         try {
210         
211                 if (abort == true) { return 0; }
212                 
213                 ofstream outputFileHandle, outAll;
214                 string outputFileName = outputDir + m->getRootName(m->getSimpleName(globaldata->inputFileName)) + "shared.summary";
215                 
216                 //if the users entered no valid calculators don't execute command
217                 if (sumCalculators.size() == 0) { return 0; }
218                 //check if any calcs can do multiples
219                 else{
220                         if (all){ 
221                                 for (int i = 0; i < sumCalculators.size(); i++) {
222                                         if (sumCalculators[i]->getMultiple() == true) { mult = true; }
223                                 }
224                         }
225                 }
226                 
227                 //read first line
228                 read = new ReadOTUFile(globaldata->inputFileName);      
229                 read->read(&*globaldata); 
230                         
231                 input = globaldata->ginput;
232                 lookup = input->getSharedRAbundVectors();
233                 string lastLabel = lookup[0]->getLabel();
234         
235                 /******************************************************/
236                 //output headings for files
237                 /******************************************************/
238                 //output estimator names as column headers
239                 m->openOutputFile(outputFileName, outputFileHandle);
240                 outputFileHandle << "label" <<'\t' << "comparison" << '\t'; 
241                 for(int i=0;i<sumCalculators.size();i++){
242                         outputFileHandle << '\t' << sumCalculators[i]->getName();
243                         if (sumCalculators[i]->getCols() == 3) {   outputFileHandle << "\t" << sumCalculators[i]->getName() << "_lci\t" << sumCalculators[i]->getName() << "_hci";  }
244                 }
245                 outputFileHandle << endl;
246                 outputFileHandle.close();
247                 
248                 //create file and put column headers for multiple groups file
249                 string outAllFileName = ((m->getRootName(globaldata->inputFileName)) + "sharedmultiple.summary");
250                 if (mult == true) {
251                         m->openOutputFile(outAllFileName, outAll);
252                         outputNames.push_back(outAllFileName);
253                         
254                         outAll << "label" <<'\t' << "comparison" << '\t'; 
255                         for(int i=0;i<sumCalculators.size();i++){
256                                 if (sumCalculators[i]->getMultiple() == true) { 
257                                         outAll << '\t' << sumCalculators[i]->getName();
258                                 }
259                         }
260                         outAll << endl;
261                         outAll.close();
262                 }
263                 
264                 if (lookup.size() < 2) { 
265                         m->mothurOut("I cannot run the command without at least 2 valid groups."); 
266                         for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
267                         
268                         //close files and clean up
269                         remove(outputFileName.c_str());
270                         if (mult == true) { remove(outAllFileName.c_str());  }
271                         return 0;
272                 //if you only have 2 groups you don't need a .sharedmultiple file
273                 }else if ((lookup.size() == 2) && (mult == true)) { 
274                         mult = false;
275                         remove(outAllFileName.c_str());
276                         outputNames.pop_back();
277                 }
278                 
279                 if (m->control_pressed) {
280                         if (mult) {  remove(outAllFileName.c_str());  }
281                         remove(outputFileName.c_str()); 
282                         delete input; globaldata->ginput = NULL;
283                         for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
284                         for(int i=0;i<sumCalculators.size();i++){  delete sumCalculators[i]; }
285                         globaldata->Groups.clear(); 
286                         return 0;
287                 }
288                 /******************************************************/
289                 
290                 
291                 /******************************************************/
292                 //comparison breakup to be used by different processes later
293                 numGroups = globaldata->Groups.size();
294                 lines.resize(processors);
295                 for (int i = 0; i < processors; i++) {
296                         lines[i].start = int (sqrt(float(i)/float(processors)) * numGroups);
297                         lines[i].end = int (sqrt(float(i+1)/float(processors)) * numGroups);
298                 }               
299                 /******************************************************/
300                 
301                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
302                 set<string> processedLabels;
303                 set<string> userLabels = labels;
304                         
305                 //as long as you are not at the end of the file or done wih the lines you want
306                 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
307                         if (m->control_pressed) {
308                                 if (mult) {  remove(outAllFileName.c_str());  }
309                                 remove(outputFileName.c_str()); 
310                                 delete input; globaldata->ginput = NULL;
311                                 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
312                                 for(int i=0;i<sumCalculators.size();i++){  delete sumCalculators[i]; }
313                                 globaldata->Groups.clear(); 
314                                 return 0;
315                         }
316
317                 
318                         if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){                  
319                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
320                                 process(lookup, outputFileName, outAllFileName);
321                                 
322                                 processedLabels.insert(lookup[0]->getLabel());
323                                 userLabels.erase(lookup[0]->getLabel());
324                         }
325                         
326                         if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
327                                         string saveLabel = lookup[0]->getLabel();
328                                         
329                                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
330                                         lookup = input->getSharedRAbundVectors(lastLabel);
331
332                                         m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
333                                         process(lookup, outputFileName, outAllFileName);
334                                         
335                                         processedLabels.insert(lookup[0]->getLabel());
336                                         userLabels.erase(lookup[0]->getLabel());
337                                         
338                                         //restore real lastlabel to save below
339                                         lookup[0]->setLabel(saveLabel);
340                         }
341                         
342                         lastLabel = lookup[0]->getLabel();                      
343                                 
344                         //get next line to process
345                         //prevent memory leak
346                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
347                         lookup = input->getSharedRAbundVectors();
348                 }
349                 
350                 if (m->control_pressed) {
351                         if (mult) { remove(outAllFileName.c_str());  }
352                         remove(outputFileName.c_str()); 
353                         delete input; globaldata->ginput = NULL;
354                         for(int i=0;i<sumCalculators.size();i++){  delete sumCalculators[i]; }
355                         globaldata->Groups.clear(); 
356                         return 0;
357                 }
358
359                 //output error messages about any remaining user labels
360                 set<string>::iterator it;
361                 bool needToRun = false;
362                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
363                         m->mothurOut("Your file does not include the label " + *it); 
364                         if (processedLabels.count(lastLabel) != 1) {
365                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
366                                 needToRun = true;
367                         }else {
368                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
369                         }
370                 }
371                 
372                 //run last label if you need to
373                 if (needToRun == true)  {
374                                 for (int i = 0; i < lookup.size(); i++) {  if (lookup[i] != NULL) {     delete lookup[i];       } } 
375                                 lookup = input->getSharedRAbundVectors(lastLabel);
376
377                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
378                                 process(lookup, outputFileName, outAllFileName);
379                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
380                 }
381                 
382                                 
383                 //reset groups parameter
384                 globaldata->Groups.clear();  
385                 
386                 for(int i=0;i<sumCalculators.size();i++){  delete sumCalculators[i]; }
387                 delete input;  globaldata->ginput = NULL;
388                 
389                 if (m->control_pressed) {
390                         remove(outAllFileName.c_str());  
391                         remove(outputFileName.c_str()); 
392                         return 0;
393                 }
394                 
395                 m->mothurOutEndLine();
396                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
397                 m->mothurOut(outputFileName); m->mothurOutEndLine();    
398                 if (mult) { m->mothurOut(outAllFileName); m->mothurOutEndLine();        }
399                 m->mothurOutEndLine();
400
401                 return 0;
402         }
403         catch(exception& e) {
404                 m->errorOut(e, "SummarySharedCommand", "execute");
405                 exit(1);
406         }
407 }
408
409 /***********************************************************/
410 int SummarySharedCommand::process(vector<SharedRAbundVector*> thisLookup, string sumFileName, string sumAllFileName) {
411         try {
412                                 
413                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
414                                 if(processors == 1){
415                                         driver(thisLookup, 0, numGroups, sumFileName+".temp", sumAllFileName+".temp");
416                                         m->appendFiles((sumFileName + ".temp"), sumFileName);
417                                         remove((sumFileName + ".temp").c_str());
418                                         if (mult) {
419                                                 m->appendFiles((sumAllFileName + ".temp"), sumAllFileName);
420                                                 remove((sumAllFileName + ".temp").c_str());
421                                         }
422                                 }else{
423                                         int process = 0;
424                                         vector<int> processIDS;
425                 
426                                         //loop through and create all the processes you want
427                                         while (process != processors) {
428                                                 int pid = fork();
429                                                 
430                                                 if (pid > 0) {
431                                                         processIDS.push_back(pid); 
432                                                         process++;
433                                                 }else if (pid == 0){
434                                                         driver(thisLookup, lines[process].start, lines[process].end, sumFileName + toString(getpid()) + ".temp", sumAllFileName + toString(getpid()) + ".temp");   
435                                                         exit(0);
436                                                 }else { m->mothurOut("unable to spawn the necessary processes."); m->mothurOutEndLine(); exit(0); }
437                                         }
438                                 
439                                         //force parent to wait until all the processes are done
440                                         for (int i = 0; i < processIDS.size(); i++) {
441                                                 int temp = processIDS[i];
442                                                 wait(&temp);
443                                         }
444                                         
445                                         for (int i = 0; i < processIDS.size(); i++) {
446                                                 m->appendFiles((sumFileName + toString(processIDS[i]) + ".temp"), sumFileName);
447                                                 remove((sumFileName + toString(processIDS[i]) + ".temp").c_str());
448                                                 if (mult) {
449                                                         if (i == 0) {  m->appendFiles((sumAllFileName + toString(processIDS[i]) + ".temp"), sumAllFileName);  }
450                                                         remove((sumAllFileName + toString(processIDS[i]) + ".temp").c_str());
451                                                 }
452                                         }
453
454                                 }
455                         #else
456                                 driver(thisLookup, 0, numGroups, (sumFileName + ".temp"), (sumAllFileName + ".temp"));
457                                 m->appendFiles((sumFileName + ".temp"), sumFileName);
458                                 remove((sumFileName + ".temp").c_str());
459                                 if (mult) {
460                                         m->appendFiles((sumAllFileName + ".temp"), sumAllFileName);
461                                         remove((sumAllFileName + ".temp").c_str());
462                                 }
463                         #endif
464         }
465         catch(exception& e) {
466                 m->errorOut(e, "SummarySharedCommand", "process");
467                 exit(1);
468         }
469 }
470 /**************************************************************************************************/
471 int SummarySharedCommand::driver(vector<SharedRAbundVector*> thisLookup, int start, int end, string sumFile, string sumAllFile) { 
472         try {
473                 
474                 //loop through calculators and add to file all for all calcs that can do mutiple groups
475                 if (mult == true) {
476                         ofstream outAll;
477                         m->openOutputFile(sumAllFile, outAll);
478                         
479                         //output label
480                         outAll << thisLookup[0]->getLabel() << '\t';
481                         
482                         //output groups names
483                         string outNames = "";
484                         for (int j = 0; j < thisLookup.size(); j++) {
485                                 outNames += thisLookup[j]->getGroup() +  "-";
486                         }
487                         outNames = outNames.substr(0, outNames.length()-1); //rip off extra '-';
488                         outAll << outNames << '\t';
489                         
490                         for(int i=0;i<sumCalculators.size();i++){
491                                 if (sumCalculators[i]->getMultiple() == true) { 
492                                         sumCalculators[i]->getValues(thisLookup);
493                                         
494                                         if (m->control_pressed) { outAll.close(); return 1; }
495                                         
496                                         outAll << '\t';
497                                         sumCalculators[i]->print(outAll);
498                                 }
499                         }
500                         outAll << endl;
501                         outAll.close();
502                 }
503                 
504                 ofstream outputFileHandle;
505                 m->openOutputFile(sumFile, outputFileHandle);
506                 
507                 vector<SharedRAbundVector*> subset;
508                 for (int k = start; k < end; k++) { // pass cdd each set of groups to compare
509
510                         for (int l = 0; l < k; l++) {
511                                 
512                                 outputFileHandle << thisLookup[0]->getLabel() << '\t';
513                                 
514                                 subset.clear(); //clear out old pair of sharedrabunds
515                                 //add new pair of sharedrabunds
516                                 subset.push_back(thisLookup[k]); subset.push_back(thisLookup[l]); 
517                                 
518                                 //sort groups to be alphanumeric
519                                 if (thisLookup[k]->getGroup() > thisLookup[l]->getGroup()) {
520                                         outputFileHandle << (thisLookup[l]->getGroup() +'\t' + thisLookup[k]->getGroup()) << '\t'; //print out groups
521                                 }else{
522                                         outputFileHandle << (thisLookup[k]->getGroup() +'\t' + thisLookup[l]->getGroup()) << '\t'; //print out groups
523                                 }
524                                 
525                                 for(int i=0;i<sumCalculators.size();i++) {
526
527                                         sumCalculators[i]->getValues(subset); //saves the calculator outputs
528                                         
529                                         if (m->control_pressed) { outputFileHandle.close(); return 1; }
530                                         
531                                         outputFileHandle << '\t';
532                                         sumCalculators[i]->print(outputFileHandle);
533                                 }
534                                 outputFileHandle << endl;
535                         }
536                 }
537                 
538                 outputFileHandle.close();
539                 
540                 return 0;
541         }
542         catch(exception& e) {
543                 m->errorOut(e, "SummarySharedCommand", "driver");
544                 exit(1);
545         }
546 }
547 /**************************************************************************************************/
548
549