]> git.donarmstrong.com Git - mothur.git/blob - summarysharedcommand.cpp
fixes while testing 1.33.0
[mothur.git] / summarysharedcommand.cpp
1 /*
2  *  summarysharedcommand.cpp
3  *  Dotur
4  *
5  *  Created by Sarah Westcott on 1/2/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "summarysharedcommand.h"
11 #include "subsample.h"
12
13 //**********************************************************************************************************************
14 vector<string> SummarySharedCommand::setParameters(){   
15         try {
16                 CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","summary",false,true,true); parameters.push_back(pshared);
17                 CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
18         CommandParameter psubsample("subsample", "String", "", "", "", "", "","phylip",false,false); parameters.push_back(psubsample);
19                 CommandParameter pdistance("distance", "Boolean", "", "F", "", "", "","phylip",false,false); parameters.push_back(pdistance);
20                 CommandParameter pcalc("calc", "Multiple", "sharedchao-sharedsobs-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan-kstest-whittaker-sharednseqs-ochiai-anderberg-kulczynski-kulczynskicody-lennon-morisitahorn-braycurtis-odum-canberra-structeuclidean-structchord-hellinger-manhattan-structpearson-soergel-spearman-structkulczynski-speciesprofile-structchi2-hamming-gower-memchi2-memchord-memeuclidean-mempearson-jsd-rjsd", "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan", "", "", "","",true,false,true); parameters.push_back(pcalc);
21         CommandParameter poutput("output", "Multiple", "lt-square", "lt", "", "", "","",false,false); parameters.push_back(poutput);
22                 CommandParameter pall("all", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pall);
23         CommandParameter piters("iters", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(piters);
24                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
25                 CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
26                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
27                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
28                 
29                 vector<string> myArray;
30                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
31                 return myArray;
32         }
33         catch(exception& e) {
34                 m->errorOut(e, "SummarySharedCommand", "setParameters");
35                 exit(1);
36         }
37 }
38 //**********************************************************************************************************************
39 string SummarySharedCommand::getHelpString(){   
40         try {
41                 string helpString = "";
42                 ValidCalculators validCalculator;
43                 helpString += "The summary.shared command parameters are shared, label, calc, distance, processors, subsample, iters and all.  shared is required if there is no current sharedfile.\n";
44                 helpString += "The summary.shared command should be in the following format: \n";
45                 helpString += "summary.shared(label=yourLabel, calc=yourEstimators, groups=yourGroups).\n";
46                 helpString += "Example summary.shared(label=unique-.01-.03, groups=B-C, calc=sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan).\n";
47                 helpString +=  validCalculator.printCalc("sharedsummary");
48         helpString += "The iters parameter allows you to choose the number of times you would like to run the subsample.\n";
49         helpString += "The subsample parameter allows you to enter the size pergroup of the sample or you can set subsample=T and mothur will use the size of your smallest group.\n";
50         helpString += "The output parameter allows you to specify format of your distance matrix. Options are lt, and square. The default is lt.\n";
51                 helpString += "The default value for calc is sharedsobs-sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan\n";
52                 helpString += "The default value for groups is all the groups in your groupfile.\n";
53                 helpString += "The distance parameter allows you to indicate you would like a distance file created for each calculator for each label, default=f.\n";
54                 helpString += "The label parameter is used to analyze specific labels in your input.\n";
55                 helpString += "The all parameter is used to specify if you want the estimate of all your groups together.  This estimate can only be made for sharedsobs and sharedchao calculators. The default is false.\n";
56                 helpString += "If you use sharedchao and run into memory issues, set all to false. \n";
57                 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed.  You must enter at least 2 valid groups.\n";
58                 helpString += "Note: No spaces between parameter labels (i.e. label), '=' and parameters (i.e.yourLabel).\n";
59                 return helpString;
60         }
61         catch(exception& e) {
62                 m->errorOut(e, "SummarySharedCommand", "getHelpString");
63                 exit(1);
64         }
65 }
66 //**********************************************************************************************************************
67 string SummarySharedCommand::getOutputPattern(string type) {
68     try {
69         string pattern = "";
70         
71         if (type == "summary") {  pattern = "[filename],summary-[filename],[tag],summary"; } 
72         else if (type == "phylip") {  pattern = "[filename],[calc],[distance],[outputtag],[tag2],dist"; } 
73         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
74         
75         return pattern;
76     }
77     catch(exception& e) {
78         m->errorOut(e, "SummarySharedCommand", "getOutputPattern");
79         exit(1);
80     }
81 }
82 //**********************************************************************************************************************
83 SummarySharedCommand::SummarySharedCommand(){   
84         try {
85                 abort = true; calledHelp = true; 
86                 setParameters();
87                 vector<string> tempOutNames;
88                 outputTypes["summary"] = tempOutNames;
89         outputTypes["phylip"] = tempOutNames;
90         }
91         catch(exception& e) {
92                 m->errorOut(e, "SummarySharedCommand", "SummarySharedCommand");
93                 exit(1);
94         }
95 }
96 //**********************************************************************************************************************
97
98 SummarySharedCommand::SummarySharedCommand(string option)  {
99         try {
100                 abort = false; calledHelp = false;   
101                 allLines = 1;
102                                 
103                 //allow user to run help
104                 if(option == "help") {  help(); abort = true; calledHelp = true; }
105                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
106                 
107                 else {
108                         vector<string> myArray = setParameters();
109                         
110                         OptionParser parser(option);
111                         map<string, string> parameters = parser.getParameters();
112                         map<string, string>::iterator it;
113                         
114                         ValidParameters validParameter;
115                 
116                         //check to make sure all parameters are valid for command
117                         for (map<string, string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
118                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
119                         }
120                         
121                         //initialize outputTypes
122                         vector<string> tempOutNames;
123                         outputTypes["summary"] = tempOutNames;
124             outputTypes["phylip"] = tempOutNames;
125                         
126                         //if the user changes the input directory command factory will send this info to us in the output parameter 
127                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
128                         if (inputDir == "not found"){   inputDir = "";          }
129                         else {
130                                 string path;
131                                 it = parameters.find("shared");
132                                 //user has given a template file
133                                 if(it != parameters.end()){ 
134                                         path = m->hasPath(it->second);
135                                         //if the user has not given a path then, add inputdir. else leave path alone.
136                                         if (path == "") {       parameters["shared"] = inputDir + it->second;           }
137                                 }
138                         }
139                         
140                         //get shared file
141                         sharedfile = validParameter.validFile(parameters, "shared", true);
142                         if (sharedfile == "not open") { sharedfile = ""; abort = true; }        
143                         else if (sharedfile == "not found") { 
144                                 //if there is a current shared file, use it
145                                 sharedfile = m->getSharedFile(); 
146                                 if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
147                                 else {  m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; }
148                         }else { m->setSharedFile(sharedfile); }
149                         
150                         
151                         //if the user changes the output directory command factory will send this info to us in the output parameter 
152                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(sharedfile);             }
153                         
154
155                         //check for optional parameter and set defaults
156                         // ...at some point should added some additional type checking...
157                         label = validParameter.validFile(parameters, "label", false);                   
158                         if (label == "not found") { label = ""; }
159                         else { 
160                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
161                                 else { allLines = 1;  }
162                         }
163                         
164                                         
165                         calc = validParameter.validFile(parameters, "calc", false);                     
166                         if (calc == "not found") { calc = "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan";  }
167                         else { 
168                                  if (calc == "default")  {  calc = "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan";  }
169                         }
170                         m->splitAtDash(calc, Estimators);
171                         if (m->inUsersGroups("citation", Estimators)) { 
172                                 ValidCalculators validCalc; validCalc.printCitations(Estimators); 
173                                 //remove citation from list of calcs
174                                 for (int i = 0; i < Estimators.size(); i++) { if (Estimators[i] == "citation") {  Estimators.erase(Estimators.begin()+i); break; } }
175                         }
176                         
177                         groups = validParameter.validFile(parameters, "groups", false);                 
178                         if (groups == "not found") { groups = ""; }
179                         else { 
180                                 m->splitAtDash(groups, Groups);
181                                 m->setGroups(Groups);
182                         }
183                         
184                         string temp = validParameter.validFile(parameters, "all", false);                               if (temp == "not found") { temp = "false"; }
185                         all = m->isTrue(temp);
186                         
187             temp = validParameter.validFile(parameters, "iters", false);                        if (temp == "not found") { temp = "1000"; }
188                         m->mothurConvert(temp, iters); 
189             
190             output = validParameter.validFile(parameters, "output", false);             
191             if(output == "not found"){  output = "lt"; }
192             else { createPhylip = true; }
193                         if ((output != "lt") && (output != "square")) { m->mothurOut(output + " is not a valid output form. Options are lt and square. I will use lt."); m->mothurOutEndLine(); output = "lt"; }
194             
195             temp = validParameter.validFile(parameters, "subsample", false);            if (temp == "not found") { temp = "F"; }
196                         if (m->isNumeric1(temp)) { m->mothurConvert(temp, subsampleSize); subsample = true; }
197             else {  
198                 if (m->isTrue(temp)) { subsample = true; subsampleSize = -1; }  //we will set it to smallest group later 
199                 else { subsample = false; }
200             }
201             
202             if (subsample == false) { iters = 0; }
203             
204             temp = validParameter.validFile(parameters, "distance", false);                                     if (temp == "not found") { temp = "false"; }
205                         createPhylip = m->isTrue(temp);
206             if (subsample) { createPhylip = true; }
207             
208                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
209                         m->setProcessors(temp);
210                         m->mothurConvert(temp, processors); 
211                         
212                         if (abort == false) {
213                         
214                                 ValidCalculators validCalculator;
215                                 int i;
216                                 
217                                 for (i=0; i<Estimators.size(); i++) {
218                                         if (validCalculator.isValidCalculator("sharedsummary", Estimators[i]) == true) { 
219                                                 if (Estimators[i] == "sharedsobs") { 
220                                                         sumCalculators.push_back(new SharedSobsCS());
221                                                 }else if (Estimators[i] == "sharedchao") { 
222                                                         sumCalculators.push_back(new SharedChao1());
223                                                 }else if (Estimators[i] == "sharedace") { 
224                                                         sumCalculators.push_back(new SharedAce());
225                                                 }else if (Estimators[i] == "jabund") {  
226                                                         sumCalculators.push_back(new JAbund());
227                                                 }else if (Estimators[i] == "sorabund") { 
228                                                         sumCalculators.push_back(new SorAbund());
229                                                 }else if (Estimators[i] == "jclass") { 
230                                                         sumCalculators.push_back(new Jclass());
231                                                 }else if (Estimators[i] == "sorclass") { 
232                                                         sumCalculators.push_back(new SorClass());
233                                                 }else if (Estimators[i] == "jest") { 
234                                                         sumCalculators.push_back(new Jest());
235                                                 }else if (Estimators[i] == "sorest") { 
236                                                         sumCalculators.push_back(new SorEst());
237                                                 }else if (Estimators[i] == "thetayc") { 
238                                                         sumCalculators.push_back(new ThetaYC());
239                                                 }else if (Estimators[i] == "thetan") { 
240                                                         sumCalculators.push_back(new ThetaN());
241                                                 }else if (Estimators[i] == "kstest") { 
242                                                         sumCalculators.push_back(new KSTest());
243                                                 }else if (Estimators[i] == "sharednseqs") { 
244                                                         sumCalculators.push_back(new SharedNSeqs());
245                                                 }else if (Estimators[i] == "ochiai") { 
246                                                         sumCalculators.push_back(new Ochiai());
247                                                 }else if (Estimators[i] == "anderberg") { 
248                                                         sumCalculators.push_back(new Anderberg());
249                                                 }else if (Estimators[i] == "kulczynski") { 
250                                                         sumCalculators.push_back(new Kulczynski());
251                                                 }else if (Estimators[i] == "kulczynskicody") { 
252                                                         sumCalculators.push_back(new KulczynskiCody());
253                                                 }else if (Estimators[i] == "lennon") { 
254                                                         sumCalculators.push_back(new Lennon());
255                                                 }else if (Estimators[i] == "morisitahorn") { 
256                                                         sumCalculators.push_back(new MorHorn());
257                                                 }else if (Estimators[i] == "braycurtis") { 
258                                                         sumCalculators.push_back(new BrayCurtis());
259                                                 }else if (Estimators[i] == "whittaker") { 
260                                                         sumCalculators.push_back(new Whittaker());
261                                                 }else if (Estimators[i] == "odum") { 
262                                                         sumCalculators.push_back(new Odum());
263                                                 }else if (Estimators[i] == "canberra") { 
264                                                         sumCalculators.push_back(new Canberra());
265                                                 }else if (Estimators[i] == "structeuclidean") { 
266                                                         sumCalculators.push_back(new StructEuclidean());
267                                                 }else if (Estimators[i] == "structchord") { 
268                                                         sumCalculators.push_back(new StructChord());
269                                                 }else if (Estimators[i] == "hellinger") { 
270                                                         sumCalculators.push_back(new Hellinger());
271                                                 }else if (Estimators[i] == "manhattan") { 
272                                                         sumCalculators.push_back(new Manhattan());
273                                                 }else if (Estimators[i] == "structpearson") { 
274                                                         sumCalculators.push_back(new StructPearson());
275                                                 }else if (Estimators[i] == "soergel") { 
276                                                         sumCalculators.push_back(new Soergel());
277                                                 }else if (Estimators[i] == "spearman") { 
278                                                         sumCalculators.push_back(new Spearman());
279                                                 }else if (Estimators[i] == "structkulczynski") { 
280                                                         sumCalculators.push_back(new StructKulczynski());
281                                                 }else if (Estimators[i] == "speciesprofile") { 
282                                                         sumCalculators.push_back(new SpeciesProfile());
283                                                 }else if (Estimators[i] == "hamming") { 
284                                                         sumCalculators.push_back(new Hamming());
285                                                 }else if (Estimators[i] == "structchi2") { 
286                                                         sumCalculators.push_back(new StructChi2());
287                                                 }else if (Estimators[i] == "gower") { 
288                                                         sumCalculators.push_back(new Gower());
289                                                 }else if (Estimators[i] == "memchi2") { 
290                                                         sumCalculators.push_back(new MemChi2());
291                                                 }else if (Estimators[i] == "memchord") { 
292                                                         sumCalculators.push_back(new MemChord());
293                                                 }else if (Estimators[i] == "memeuclidean") { 
294                                                         sumCalculators.push_back(new MemEuclidean());
295                                                 }else if (Estimators[i] == "mempearson") { 
296                                                         sumCalculators.push_back(new MemPearson());
297                                                 }else if (Estimators[i] == "jsd") {
298                                                         sumCalculators.push_back(new JSD());
299                                                 }else if (Estimators[i] == "rjsd") {
300                                                         sumCalculators.push_back(new RJSD());
301                                                 }
302                                         }
303                                 }
304                                 
305                                 mult = false;
306                         }
307                 }
308         }
309         catch(exception& e) {
310                 m->errorOut(e, "SummarySharedCommand", "SummarySharedCommand");
311                 exit(1);
312         }
313 }
314 //**********************************************************************************************************************
315
316 int SummarySharedCommand::execute(){
317         try {
318         
319                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
320                 
321                 ofstream outputFileHandle, outAll;
322         map<string, string> variables; 
323                 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
324                 string outputFileName = getOutputFileName("summary",variables);
325                 
326                 //if the users entered no valid calculators don't execute command
327                 if (sumCalculators.size() == 0) { return 0; }
328                 //check if any calcs can do multiples
329                 else{
330                         if (all){ 
331                                 for (int i = 0; i < sumCalculators.size(); i++) {
332                                         if (sumCalculators[i]->getMultiple() == true) { mult = true; }
333                                 }
334                         }
335                 }
336                         
337                 input = new InputData(sharedfile, "sharedfile");
338                 lookup = input->getSharedRAbundVectors();
339                 string lastLabel = lookup[0]->getLabel();
340         
341                 /******************************************************/
342                 //output headings for files
343                 /******************************************************/
344                 //output estimator names as column headers
345                 m->openOutputFile(outputFileName, outputFileHandle);
346                 outputFileHandle << "label" <<'\t' << "comparison" << '\t'; 
347                 for(int i=0;i<sumCalculators.size();i++){
348                         outputFileHandle << '\t' << sumCalculators[i]->getName();
349                         if (sumCalculators[i]->getCols() == 3) {   outputFileHandle << "\t" << sumCalculators[i]->getName() << "_lci\t" << sumCalculators[i]->getName() << "_hci";  }
350                 }
351                 outputFileHandle << endl;
352                 outputFileHandle.close();
353                 
354                 //create file and put column headers for multiple groups file
355         variables["[tag]"]= "multiple";
356                 string outAllFileName = getOutputFileName("summary",variables);
357                 if (mult == true) {
358                         m->openOutputFile(outAllFileName, outAll);
359                         outputNames.push_back(outAllFileName);
360                         
361                         outAll << "label" <<'\t' << "comparison" << '\t'; 
362                         for(int i=0;i<sumCalculators.size();i++){
363                                 if (sumCalculators[i]->getMultiple() == true) { 
364                                         outAll << '\t' << sumCalculators[i]->getName();
365                                 }
366                         }
367                         outAll << endl;
368                         outAll.close();
369                 }
370                 
371                 if (lookup.size() < 2) { 
372                         m->mothurOut("I cannot run the command without at least 2 valid groups."); 
373                         for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
374                         
375                         //close files and clean up
376                         m->mothurRemove(outputFileName);
377                         if (mult == true) { m->mothurRemove(outAllFileName);  }
378                         return 0;
379                 //if you only have 2 groups you don't need a .sharedmultiple file
380                 }else if ((lookup.size() == 2) && (mult == true)) { 
381                         mult = false;
382                         m->mothurRemove(outAllFileName);
383                         outputNames.pop_back();
384                 }
385                 
386                 if (m->control_pressed) {
387                         if (mult) {  m->mothurRemove(outAllFileName);  }
388                         m->mothurRemove(outputFileName); 
389                         delete input;
390                         for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
391                         for(int i=0;i<sumCalculators.size();i++){  delete sumCalculators[i]; }
392                         m->clearGroups(); 
393                         return 0;
394                 }
395                 /******************************************************/
396         if (subsample) { 
397             if (subsampleSize == -1) { //user has not set size, set size = smallest samples size
398                 subsampleSize = lookup[0]->getNumSeqs();
399                 for (int i = 1; i < lookup.size(); i++) {
400                     int thisSize = lookup[i]->getNumSeqs();
401                     
402                     if (thisSize < subsampleSize) {     subsampleSize = thisSize;       }
403                 }
404             }else {
405                 m->clearGroups();
406                 Groups.clear();
407                 vector<SharedRAbundVector*> temp;
408                 for (int i = 0; i < lookup.size(); i++) {
409                     if (lookup[i]->getNumSeqs() < subsampleSize) { 
410                         m->mothurOut(lookup[i]->getGroup() + " contains " + toString(lookup[i]->getNumSeqs()) + ". Eliminating."); m->mothurOutEndLine();
411                         delete lookup[i];
412                     }else { 
413                         Groups.push_back(lookup[i]->getGroup()); 
414                         temp.push_back(lookup[i]);
415                     }
416                 } 
417                 lookup = temp;
418                 m->setGroups(Groups);
419             }
420             
421             if (lookup.size() < 2) { m->mothurOut("You have not provided enough valid groups.  I cannot run the command."); m->mothurOutEndLine(); m->control_pressed = true; delete input; return 0; }
422         }
423
424                 
425                 /******************************************************/
426                 //comparison breakup to be used by different processes later
427                 numGroups = lookup.size();
428                 lines.resize(processors);
429                 for (int i = 0; i < processors; i++) {
430                         lines[i].start = int (sqrt(float(i)/float(processors)) * numGroups);
431                         lines[i].end = int (sqrt(float(i+1)/float(processors)) * numGroups);
432                 }               
433                 /******************************************************/
434                 
435                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
436                 set<string> processedLabels;
437                 set<string> userLabels = labels;
438                         
439                 //as long as you are not at the end of the file or done wih the lines you want
440                 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
441                         if (m->control_pressed) {
442                                 if (mult) {  m->mothurRemove(outAllFileName);  }
443                                 m->mothurRemove(outputFileName); 
444                                 delete input; 
445                                 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
446                                 for(int i=0;i<sumCalculators.size();i++){  delete sumCalculators[i]; }
447                                 m->clearGroups(); 
448                                 return 0;
449                         }
450
451                 
452                         if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){                  
453                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
454                                 process(lookup, outputFileName, outAllFileName);
455                                 
456                                 processedLabels.insert(lookup[0]->getLabel());
457                                 userLabels.erase(lookup[0]->getLabel());
458                         }
459                         
460                         if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
461                                         string saveLabel = lookup[0]->getLabel();
462                                         
463                                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
464                                         lookup = input->getSharedRAbundVectors(lastLabel);
465
466                                         m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
467                                         process(lookup, outputFileName, outAllFileName);
468                                         
469                                         processedLabels.insert(lookup[0]->getLabel());
470                                         userLabels.erase(lookup[0]->getLabel());
471                                         
472                                         //restore real lastlabel to save below
473                                         lookup[0]->setLabel(saveLabel);
474                         }
475                         
476                         lastLabel = lookup[0]->getLabel();                      
477                                 
478                         //get next line to process
479                         //prevent memory leak
480                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
481                         lookup = input->getSharedRAbundVectors();
482                 }
483                 
484                 if (m->control_pressed) {
485                         if (mult) { m->mothurRemove(outAllFileName);  }
486                         m->mothurRemove(outputFileName); 
487                         delete input; 
488                         for(int i=0;i<sumCalculators.size();i++){  delete sumCalculators[i]; }
489                         m->clearGroups(); 
490                         return 0;
491                 }
492
493                 //output error messages about any remaining user labels
494                 set<string>::iterator it;
495                 bool needToRun = false;
496                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
497                         m->mothurOut("Your file does not include the label " + *it); 
498                         if (processedLabels.count(lastLabel) != 1) {
499                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
500                                 needToRun = true;
501                         }else {
502                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
503                         }
504                 }
505                 
506                 //run last label if you need to
507                 if (needToRun == true)  {
508                                 for (int i = 0; i < lookup.size(); i++) {  if (lookup[i] != NULL) {     delete lookup[i];       } } 
509                                 lookup = input->getSharedRAbundVectors(lastLabel);
510
511                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
512                                 process(lookup, outputFileName, outAllFileName);
513                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
514                 }
515                 
516                                 
517                 //reset groups parameter
518                 m->clearGroups();  
519                 
520                 for(int i=0;i<sumCalculators.size();i++){  delete sumCalculators[i]; }
521                 delete input;  
522                 
523                 if (m->control_pressed) {
524                         m->mothurRemove(outAllFileName);  
525                         m->mothurRemove(outputFileName); 
526                         return 0;
527                 }
528                 
529                 m->mothurOutEndLine();
530                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
531                 m->mothurOut(outputFileName); m->mothurOutEndLine();    
532                 if (mult) { m->mothurOut(outAllFileName); m->mothurOutEndLine();        outputTypes["summary"].push_back(outAllFileName); }
533                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    } outputTypes["summary"].push_back(outputFileName);
534                 m->mothurOutEndLine();
535
536                 return 0;
537         }
538         catch(exception& e) {
539                 m->errorOut(e, "SummarySharedCommand", "execute");
540                 exit(1);
541         }
542 }
543 /***********************************************************/
544 int SummarySharedCommand::printSims(ostream& out, vector< vector<double> >& simMatrix) {
545         try {
546                 
547                 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
548                 
549                 //output num seqs
550                 out << simMatrix.size() << endl;
551                 
552                 if (output == "lt") {
553                         for (int b = 0; b < simMatrix.size(); b++)      {
554                                 out << lookup[b]->getGroup() << '\t';
555                                 for (int n = 0; n < b; n++)     {
556                     if (m->control_pressed) { return 0; }
557                                         out << simMatrix[b][n] << '\t'; 
558                                 }
559                                 out << endl;
560                         }
561                 }else{
562                         for (int b = 0; b < simMatrix.size(); m++)      {
563                                 out << lookup[b]->getGroup() << '\t';
564                                 for (int n = 0; n < simMatrix[b].size(); n++)   {
565                     if (m->control_pressed) { return 0; }
566                                         out << simMatrix[b][n] << '\t'; 
567                                 }
568                                 out << endl;
569                         }
570                 }
571         
572         return 0;
573         }
574         catch(exception& e) {
575                 m->errorOut(e, "SummarySharedCommand", "printSims");
576                 exit(1);
577         }
578 }
579 /***********************************************************/
580 int SummarySharedCommand::process(vector<SharedRAbundVector*> thisLookup, string sumFileName, string sumAllFileName) {
581         try {
582         vector< vector< vector<seqDist> > > calcDistsTotals;  //each iter, one for each calc, then each groupCombos dists. this will be used to make .dist files
583         vector< vector<seqDist>  > calcDists; calcDists.resize(sumCalculators.size());          
584         
585         for (int thisIter = 0; thisIter < iters+1; thisIter++) {
586             
587             vector<SharedRAbundVector*> thisItersLookup = thisLookup;
588             
589             if (subsample && (thisIter != 0)) { //we want the summary results for the whole dataset, then the subsampling
590                 SubSample sample;
591                 vector<string> tempLabels; //dont need since we arent printing the sampled sharedRabunds
592                 
593                 //make copy of lookup so we don't get access violations
594                 vector<SharedRAbundVector*> newLookup;
595                 for (int k = 0; k < thisItersLookup.size(); k++) {
596                     SharedRAbundVector* temp = new SharedRAbundVector();
597                     temp->setLabel(thisItersLookup[k]->getLabel());
598                     temp->setGroup(thisItersLookup[k]->getGroup());
599                     newLookup.push_back(temp);
600                 }
601                 
602                 //for each bin
603                 for (int k = 0; k < thisItersLookup[0]->getNumBins(); k++) {
604                     if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
605                     for (int j = 0; j < thisItersLookup.size(); j++) { newLookup[j]->push_back(thisItersLookup[j]->getAbundance(k), thisItersLookup[j]->getGroup()); }
606                 }
607                 
608                 tempLabels = sample.getSample(newLookup, subsampleSize);
609                 thisItersLookup = newLookup;
610             }
611         
612             
613             if(processors == 1){
614                 driver(thisLookup, 0, numGroups, sumFileName+".temp", sumAllFileName+".temp", calcDists);
615                 m->appendFiles((sumFileName + ".temp"), sumFileName);
616                 m->mothurRemove((sumFileName + ".temp"));
617                 if (mult) {
618                     m->appendFiles((sumAllFileName + ".temp"), sumAllFileName);
619                     m->mothurRemove((sumAllFileName + ".temp"));
620                 }
621             }else{
622                 
623                 int process = 1;
624                 vector<int> processIDS;
625                 
626 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
627                 //loop through and create all the processes you want
628                 while (process != processors) {
629                     int pid = fork();
630                     
631                     if (pid > 0) {
632                         processIDS.push_back(pid); 
633                         process++;
634                     }else if (pid == 0){
635                         driver(thisLookup, lines[process].start, lines[process].end, sumFileName + toString(getpid()) + ".temp", sumAllFileName + toString(getpid()) + ".temp", calcDists);   
636                         
637                         //only do this if you want a distance file
638                         if (createPhylip) {
639                             string tempdistFileName = m->getRootName(m->getSimpleName(sumFileName)) + toString(getpid()) + ".dist";
640                             ofstream outtemp;
641                             m->openOutputFile(tempdistFileName, outtemp);
642                             
643                             for (int i = 0; i < calcDists.size(); i++) {
644                                 outtemp << calcDists[i].size() << endl;
645                                 
646                                 for (int j = 0; j < calcDists[i].size(); j++) {
647                                     outtemp << calcDists[i][j].seq1 << '\t' << calcDists[i][j].seq2 << '\t' << calcDists[i][j].dist << endl;
648                                 }
649                             }
650                             outtemp.close();
651                         }
652                         
653                         exit(0);
654                     }else { 
655                         m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
656                         for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
657                         exit(0);
658                     }
659                 }
660                 
661                 //parent do your part
662                 driver(thisLookup, lines[0].start, lines[0].end, sumFileName + toString(getpid()) + ".temp", sumAllFileName + toString(getpid()) + ".temp", calcDists);   
663                 m->appendFiles((sumFileName + toString(getpid()) + ".temp"), sumFileName);
664                 m->mothurRemove((sumFileName + toString(getpid()) + ".temp"));
665                 if (mult) { m->appendFiles((sumAllFileName + toString(getpid()) + ".temp"), sumAllFileName); }
666                 
667                 //force parent to wait until all the processes are done
668                 for (int i = 0; i < processIDS.size(); i++) {
669                     int temp = processIDS[i];
670                     wait(&temp);
671                 }
672                 
673                 for (int i = 0; i < processIDS.size(); i++) {
674                     m->appendFiles((sumFileName + toString(processIDS[i]) + ".temp"), sumFileName);
675                     m->mothurRemove((sumFileName + toString(processIDS[i]) + ".temp"));
676                     if (mult) { m->mothurRemove((sumAllFileName + toString(processIDS[i]) + ".temp"));  }
677                     
678                     if (createPhylip) {
679                         string tempdistFileName = m->getRootName(m->getSimpleName(sumFileName)) + toString(processIDS[i]) +  ".dist";
680                         ifstream intemp;
681                         m->openInputFile(tempdistFileName, intemp);
682                         
683                         for (int k = 0; k < calcDists.size(); k++) {
684                             int size = 0;
685                             intemp >> size; m->gobble(intemp);
686                             
687                             for (int j = 0; j < size; j++) {
688                                 int seq1 = 0;
689                                 int seq2 = 0;
690                                 float dist = 1.0;
691                                 
692                                 intemp >> seq1 >> seq2 >> dist;   m->gobble(intemp);
693                                 
694                                 seqDist tempDist(seq1, seq2, dist);
695                                 calcDists[k].push_back(tempDist);
696                             }
697                         }
698                         intemp.close();
699                         m->mothurRemove(tempdistFileName);
700                     }
701                 }
702 #else
703                 //////////////////////////////////////////////////////////////////////////////////////////////////////
704                 //Windows version shared memory, so be careful when passing variables through the summarySharedData struct. 
705                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
706                 //Taking advantage of shared memory to pass results vectors.
707                 //////////////////////////////////////////////////////////////////////////////////////////////////////
708                 
709                 vector<summarySharedData*> pDataArray; 
710                 DWORD   dwThreadIdArray[processors-1];
711                 HANDLE  hThreadArray[processors-1];
712                 
713                 //Create processor worker threads.
714                 for( int i=1; i<processors; i++ ){
715                     
716                     //make copy of lookup so we don't get access violations
717                     vector<SharedRAbundVector*> newLookup;
718                     for (int k = 0; k < thisLookup.size(); k++) {
719                         SharedRAbundVector* temp = new SharedRAbundVector();
720                         temp->setLabel(thisLookup[k]->getLabel());
721                         temp->setGroup(thisLookup[k]->getGroup());
722                         newLookup.push_back(temp);
723                     }
724                 
725                     
726                     //for each bin
727                     for (int k = 0; k < thisLookup[0]->getNumBins(); k++) {
728                         if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
729                         for (int j = 0; j < thisLookup.size(); j++) { newLookup[j]->push_back(thisLookup[j]->getAbundance(k), thisLookup[j]->getGroup()); }
730                     }
731                     
732                     // Allocate memory for thread data.
733                     summarySharedData* tempSum = new summarySharedData((sumFileName+toString(i)+".temp"), m, lines[i].start, lines[i].end, Estimators, newLookup);
734                     pDataArray.push_back(tempSum);
735                     processIDS.push_back(i);
736                     
737                     hThreadArray[i-1] = CreateThread(NULL, 0, MySummarySharedThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);   
738                 }
739                 
740                 //parent do your part
741                 driver(thisLookup, lines[0].start, lines[0].end, sumFileName +"0.temp", sumAllFileName + "0.temp", calcDists);   
742                 m->appendFiles((sumFileName + "0.temp"), sumFileName);
743                 m->mothurRemove((sumFileName + "0.temp"));
744                 if (mult) { m->appendFiles((sumAllFileName + "0.temp"), sumAllFileName); }
745                 
746                 //Wait until all threads have terminated.
747                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
748                 
749                 //Close all thread handles and free memory allocations.
750                 for(int i=0; i < pDataArray.size(); i++){
751                     if (pDataArray[i]->count != (pDataArray[i]->end-pDataArray[i]->start)) {
752                         m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of " + toString(pDataArray[i]->end-pDataArray[i]->start) + " groups assigned to it, quitting. \n"); m->control_pressed = true; 
753                     }
754                     m->appendFiles((sumFileName + toString(processIDS[i]) + ".temp"), sumFileName);
755                     m->mothurRemove((sumFileName + toString(processIDS[i]) + ".temp"));
756                     
757                     for (int j = 0; j < pDataArray[i]->thisLookup.size(); j++) {  delete pDataArray[i]->thisLookup[j];  } 
758                     
759                     if (createPhylip) {
760                         for (int k = 0; k < calcDists.size(); k++) {
761                             int size = pDataArray[i]->calcDists[k].size();
762                             for (int j = 0; j < size; j++) {    calcDists[k].push_back(pDataArray[i]->calcDists[k][j]);    }
763                         }
764                     }
765                     
766                     CloseHandle(hThreadArray[i]);
767                     delete pDataArray[i];
768                 }
769                 
770 #endif
771             }
772             
773             if (subsample && (thisIter != 0)) { //we want the summary results for the whole dataset, then the subsampling
774                 
775                 calcDistsTotals.push_back(calcDists); 
776                 //clean up memory
777                 for (int i = 0; i < thisItersLookup.size(); i++) { delete thisItersLookup[i]; }
778                 thisItersLookup.clear();
779             }else {
780                 if (createPhylip) {
781                     for (int i = 0; i < calcDists.size(); i++) {
782                         if (m->control_pressed) { break; }
783                         
784                         //initialize matrix
785                         vector< vector<double> > matrix; //square matrix to represent the distance
786                         matrix.resize(thisLookup.size());
787                         for (int k = 0; k < thisLookup.size(); k++) {  matrix[k].resize(thisLookup.size(), 0.0); }
788                         
789                         for (int j = 0; j < calcDists[i].size(); j++) {
790                             int row = calcDists[i][j].seq1;
791                             int column = calcDists[i][j].seq2;
792                             double dist = calcDists[i][j].dist;
793                             
794                             matrix[row][column] = dist;
795                             matrix[column][row] = dist;
796                         }
797                         
798                         map<string, string> variables; 
799                         variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
800                         variables["[calc]"] = sumCalculators[i]->getName();
801                         variables["[distance]"] = thisLookup[0]->getLabel();
802                         variables["[outputtag]"] = output;
803                         variables["[tag2]"] = "";
804                         string distFileName = getOutputFileName("phylip",variables);
805                         outputNames.push_back(distFileName); outputTypes["phylip"].push_back(distFileName);
806                         ofstream outDist;
807                         m->openOutputFile(distFileName, outDist);
808                         outDist.setf(ios::fixed, ios::floatfield); outDist.setf(ios::showpoint);
809                         
810                         printSims(outDist, matrix);
811                         
812                         outDist.close();
813                     }
814                 }
815             }
816             for (int i = 0; i < calcDists.size(); i++) {  calcDists[i].clear(); }
817                 }
818
819         if (iters != 0) {
820             //we need to find the average distance and standard deviation for each groups distance
821             vector< vector<seqDist>  > calcAverages = m->getAverages(calcDistsTotals);
822             
823             //find standard deviation
824             vector< vector<seqDist>  > stdDev = m->getStandardDeviation(calcDistsTotals, calcAverages); 
825             
826             //print results
827             for (int i = 0; i < calcDists.size(); i++) {
828                 vector< vector<double> > matrix; //square matrix to represent the distance
829                 matrix.resize(thisLookup.size());
830                 for (int k = 0; k < thisLookup.size(); k++) {  matrix[k].resize(thisLookup.size(), 0.0); }
831                 
832                 vector< vector<double> > stdmatrix; //square matrix to represent the stdDev
833                 stdmatrix.resize(thisLookup.size());
834                 for (int k = 0; k < thisLookup.size(); k++) {  stdmatrix[k].resize(thisLookup.size(), 0.0); }
835                 
836                 
837                 for (int j = 0; j < calcAverages[i].size(); j++) {
838                     int row = calcAverages[i][j].seq1;
839                     int column = calcAverages[i][j].seq2;
840                     float dist = calcAverages[i][j].dist;
841                     float stdDist = stdDev[i][j].dist;
842                     
843                     matrix[row][column] = dist;
844                     matrix[column][row] = dist;
845                     stdmatrix[row][column] = stdDist;
846                     stdmatrix[column][row] = stdDist;
847                 }
848                 
849                 map<string, string> variables; 
850                 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
851                 variables["[calc]"] = sumCalculators[i]->getName();
852                 variables["[distance]"] = thisLookup[0]->getLabel();
853                 variables["[outputtag]"] = output;
854                 variables["[tag2]"] = "ave";
855                 string distFileName = getOutputFileName("phylip",variables);
856                 outputNames.push_back(distFileName); outputTypes["phylip"].push_back(distFileName);
857                 ofstream outAve;
858                 m->openOutputFile(distFileName, outAve);
859                 outAve.setf(ios::fixed, ios::floatfield); outAve.setf(ios::showpoint);
860                 
861                 printSims(outAve, matrix);
862                 
863                 outAve.close();
864                 
865                 variables["[tag2]"] = "std";
866                 distFileName = getOutputFileName("phylip",variables);
867                 outputNames.push_back(distFileName); outputTypes["phylip"].push_back(distFileName);
868                 ofstream outSTD;
869                 m->openOutputFile(distFileName, outSTD);
870                 outSTD.setf(ios::fixed, ios::floatfield); outSTD.setf(ios::showpoint);
871                 
872                 printSims(outSTD, stdmatrix);
873                 
874                 outSTD.close();
875                 
876             }
877         }
878         
879         return 0;
880         }
881         catch(exception& e) {
882                 m->errorOut(e, "SummarySharedCommand", "process");
883                 exit(1);
884         }
885 }
886 /**************************************************************************************************/
887 int SummarySharedCommand::driver(vector<SharedRAbundVector*> thisLookup, int start, int end, string sumFile, string sumAllFile, vector< vector<seqDist> >& calcDists) { 
888         try {
889                 
890                 //loop through calculators and add to file all for all calcs that can do mutiple groups
891                 if (mult == true) {
892                         ofstream outAll;
893                         m->openOutputFile(sumAllFile, outAll);
894                         
895                         //output label
896                         outAll << thisLookup[0]->getLabel() << '\t';
897                         
898                         //output groups names
899                         string outNames = "";
900                         for (int j = 0; j < thisLookup.size(); j++) {
901                                 outNames += thisLookup[j]->getGroup() +  "-";
902                         }
903                         outNames = outNames.substr(0, outNames.length()-1); //rip off extra '-';
904                         outAll << outNames << '\t';
905                         
906                         for(int i=0;i<sumCalculators.size();i++){
907                                 if (sumCalculators[i]->getMultiple() == true) { 
908                                         sumCalculators[i]->getValues(thisLookup);
909                                         
910                                         if (m->control_pressed) { outAll.close(); return 1; }
911                                         
912                                         outAll << '\t';
913                                         sumCalculators[i]->print(outAll);
914                                 }
915                         }
916                         outAll << endl;
917                         outAll.close();
918                 }
919                 
920                 ofstream outputFileHandle;
921                 m->openOutputFile(sumFile, outputFileHandle);
922                 
923                 vector<SharedRAbundVector*> subset;
924                 for (int k = start; k < end; k++) { // pass cdd each set of groups to compare
925
926                         for (int l = 0; l < k; l++) {
927                                 
928                                 outputFileHandle << thisLookup[0]->getLabel() << '\t';
929                                 
930                                 subset.clear(); //clear out old pair of sharedrabunds
931                                 //add new pair of sharedrabunds
932                                 subset.push_back(thisLookup[k]); subset.push_back(thisLookup[l]); 
933                                 
934                                 //sort groups to be alphanumeric
935                                 if (thisLookup[k]->getGroup() > thisLookup[l]->getGroup()) {
936                                         outputFileHandle << (thisLookup[l]->getGroup() +'\t' + thisLookup[k]->getGroup()) << '\t'; //print out groups
937                                 }else{
938                                         outputFileHandle << (thisLookup[k]->getGroup() +'\t' + thisLookup[l]->getGroup()) << '\t'; //print out groups
939                                 }
940                                 
941                                 for(int i=0;i<sumCalculators.size();i++) {
942                                         
943                                         //if this calc needs all groups to calculate the pair load all groups
944                                         if (sumCalculators[i]->getNeedsAll()) { 
945                                                 //load subset with rest of lookup for those calcs that need everyone to calc for a pair
946                                                 for (int w = 0; w < thisLookup.size(); w++) {
947                                                         if ((w != k) && (w != l)) { subset.push_back(thisLookup[w]); }
948                                                 }
949                                         }
950                                         
951                                         vector<double> tempdata = sumCalculators[i]->getValues(subset); //saves the calculator outputs
952                                         
953                                         if (m->control_pressed) { outputFileHandle.close(); return 1; }
954                                         
955                                         outputFileHandle << '\t';
956                                         sumCalculators[i]->print(outputFileHandle);
957                                         
958                                         seqDist temp(l, k, tempdata[0]);
959                                         calcDists[i].push_back(temp);
960                                 }
961                                 outputFileHandle << endl;
962                         }
963                 }
964                 
965                 outputFileHandle.close();
966                 
967                 return 0;
968         }
969         catch(exception& e) {
970                 m->errorOut(e, "SummarySharedCommand", "driver");
971                 exit(1);
972         }
973 }
974 /**************************************************************************************************/
975
976