]> git.donarmstrong.com Git - mothur.git/blob - summarysharedcommand.cpp
2c9399bda1dd363d04c1fed89ff9b12ea8c3aace
[mothur.git] / summarysharedcommand.cpp
1 /*
2  *  summarysharedcommand.cpp
3  *  Dotur
4  *
5  *  Created by Sarah Westcott on 1/2/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "summarysharedcommand.h"
11 #include "subsample.h"
12
13 //**********************************************************************************************************************
14 vector<string> SummarySharedCommand::setParameters(){   
15         try {
16                 CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","summary",false,true,true); parameters.push_back(pshared);
17                 CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
18         CommandParameter psubsample("subsample", "String", "", "", "", "", "","phylip",false,false); parameters.push_back(psubsample);
19                 CommandParameter pdistance("distance", "Boolean", "", "F", "", "", "","phylip",false,false); parameters.push_back(pdistance);
20                 CommandParameter pcalc("calc", "Multiple", "sharedchao-sharedsobs-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan-kstest-whittaker-sharednseqs-ochiai-anderberg-kulczynski-kulczynskicody-lennon-morisitahorn-braycurtis-odum-canberra-structeuclidean-structchord-hellinger-manhattan-structpearson-soergel-spearman-structkulczynski-speciesprofile-structchi2-hamming-gower-memchi2-memchord-memeuclidean-mempearson", "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan", "", "", "","",true,false,true); parameters.push_back(pcalc);
21         CommandParameter poutput("output", "Multiple", "lt-square", "lt", "", "", "","",false,false); parameters.push_back(poutput);
22                 CommandParameter pall("all", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pall);
23         CommandParameter piters("iters", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(piters);
24                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
25                 CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
26                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
27                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
28                 
29                 vector<string> myArray;
30                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
31                 return myArray;
32         }
33         catch(exception& e) {
34                 m->errorOut(e, "SummarySharedCommand", "setParameters");
35                 exit(1);
36         }
37 }
38 //**********************************************************************************************************************
39 string SummarySharedCommand::getHelpString(){   
40         try {
41                 string helpString = "";
42                 ValidCalculators validCalculator;
43                 helpString += "The summary.shared command parameters are shared, label, calc, distance, processors, subsample, iters and all.  shared is required if there is no current sharedfile.\n";
44                 helpString += "The summary.shared command should be in the following format: \n";
45                 helpString += "summary.shared(label=yourLabel, calc=yourEstimators, groups=yourGroups).\n";
46                 helpString += "Example summary.shared(label=unique-.01-.03, groups=B-C, calc=sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan).\n";
47                 helpString +=  validCalculator.printCalc("sharedsummary");
48         helpString += "The iters parameter allows you to choose the number of times you would like to run the subsample.\n";
49         helpString += "The subsample parameter allows you to enter the size pergroup of the sample or you can set subsample=T and mothur will use the size of your smallest group.\n";
50         helpString += "The output parameter allows you to specify format of your distance matrix. Options are lt, and square. The default is lt.\n";
51                 helpString += "The default value for calc is sharedsobs-sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan\n";
52                 helpString += "The default value for groups is all the groups in your groupfile.\n";
53                 helpString += "The distance parameter allows you to indicate you would like a distance file created for each calculator for each label, default=f.\n";
54                 helpString += "The label parameter is used to analyze specific labels in your input.\n";
55                 helpString += "The all parameter is used to specify if you want the estimate of all your groups together.  This estimate can only be made for sharedsobs and sharedchao calculators. The default is false.\n";
56                 helpString += "If you use sharedchao and run into memory issues, set all to false. \n";
57                 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed.  You must enter at least 2 valid groups.\n";
58                 helpString += "Note: No spaces between parameter labels (i.e. label), '=' and parameters (i.e.yourLabel).\n";
59                 return helpString;
60         }
61         catch(exception& e) {
62                 m->errorOut(e, "SummarySharedCommand", "getHelpString");
63                 exit(1);
64         }
65 }
66 //**********************************************************************************************************************
67 string SummarySharedCommand::getOutputPattern(string type) {
68     try {
69         string pattern = "";
70         
71         if (type == "summary") {  pattern = "[filename],summary-[filename],[tag],summary"; } 
72         else if (type == "phylip") {  pattern = "[filename],[calc],[distance],[outputtag],[tag2],dist"; } 
73         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
74         
75         return pattern;
76     }
77     catch(exception& e) {
78         m->errorOut(e, "SummarySharedCommand", "getOutputPattern");
79         exit(1);
80     }
81 }
82 //**********************************************************************************************************************
83 SummarySharedCommand::SummarySharedCommand(){   
84         try {
85                 abort = true; calledHelp = true; 
86                 setParameters();
87                 vector<string> tempOutNames;
88                 outputTypes["summary"] = tempOutNames;
89         outputTypes["phylip"] = tempOutNames;
90         }
91         catch(exception& e) {
92                 m->errorOut(e, "SummarySharedCommand", "SummarySharedCommand");
93                 exit(1);
94         }
95 }
96 //**********************************************************************************************************************
97
98 SummarySharedCommand::SummarySharedCommand(string option)  {
99         try {
100                 abort = false; calledHelp = false;   
101                 allLines = 1;
102                                 
103                 //allow user to run help
104                 if(option == "help") {  help(); abort = true; calledHelp = true; }
105                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
106                 
107                 else {
108                         vector<string> myArray = setParameters();
109                         
110                         OptionParser parser(option);
111                         map<string, string> parameters = parser.getParameters();
112                         map<string, string>::iterator it;
113                         
114                         ValidParameters validParameter;
115                 
116                         //check to make sure all parameters are valid for command
117                         for (map<string, string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
118                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
119                         }
120                         
121                         //initialize outputTypes
122                         vector<string> tempOutNames;
123                         outputTypes["summary"] = tempOutNames;
124             outputTypes["phylip"] = tempOutNames;
125                         
126                         //if the user changes the input directory command factory will send this info to us in the output parameter 
127                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
128                         if (inputDir == "not found"){   inputDir = "";          }
129                         else {
130                                 string path;
131                                 it = parameters.find("shared");
132                                 //user has given a template file
133                                 if(it != parameters.end()){ 
134                                         path = m->hasPath(it->second);
135                                         //if the user has not given a path then, add inputdir. else leave path alone.
136                                         if (path == "") {       parameters["shared"] = inputDir + it->second;           }
137                                 }
138                         }
139                         
140                         //get shared file
141                         sharedfile = validParameter.validFile(parameters, "shared", true);
142                         if (sharedfile == "not open") { sharedfile = ""; abort = true; }        
143                         else if (sharedfile == "not found") { 
144                                 //if there is a current shared file, use it
145                                 sharedfile = m->getSharedFile(); 
146                                 if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
147                                 else {  m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; }
148                         }else { m->setSharedFile(sharedfile); }
149                         
150                         
151                         //if the user changes the output directory command factory will send this info to us in the output parameter 
152                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(sharedfile);             }
153                         
154
155                         //check for optional parameter and set defaults
156                         // ...at some point should added some additional type checking...
157                         label = validParameter.validFile(parameters, "label", false);                   
158                         if (label == "not found") { label = ""; }
159                         else { 
160                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
161                                 else { allLines = 1;  }
162                         }
163                         
164                                         
165                         calc = validParameter.validFile(parameters, "calc", false);                     
166                         if (calc == "not found") { calc = "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan";  }
167                         else { 
168                                  if (calc == "default")  {  calc = "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan";  }
169                         }
170                         m->splitAtDash(calc, Estimators);
171                         if (m->inUsersGroups("citation", Estimators)) { 
172                                 ValidCalculators validCalc; validCalc.printCitations(Estimators); 
173                                 //remove citation from list of calcs
174                                 for (int i = 0; i < Estimators.size(); i++) { if (Estimators[i] == "citation") {  Estimators.erase(Estimators.begin()+i); break; } }
175                         }
176                         
177                         groups = validParameter.validFile(parameters, "groups", false);                 
178                         if (groups == "not found") { groups = ""; }
179                         else { 
180                                 m->splitAtDash(groups, Groups);
181                                 m->setGroups(Groups);
182                         }
183                         
184                         string temp = validParameter.validFile(parameters, "all", false);                               if (temp == "not found") { temp = "false"; }
185                         all = m->isTrue(temp);
186                         
187             temp = validParameter.validFile(parameters, "iters", false);                        if (temp == "not found") { temp = "1000"; }
188                         m->mothurConvert(temp, iters); 
189             
190             output = validParameter.validFile(parameters, "output", false);             
191             if(output == "not found"){  output = "lt"; }
192             else { createPhylip = true; }
193                         if ((output != "lt") && (output != "square")) { m->mothurOut(output + " is not a valid output form. Options are lt and square. I will use lt."); m->mothurOutEndLine(); output = "lt"; }
194             
195             temp = validParameter.validFile(parameters, "subsample", false);            if (temp == "not found") { temp = "F"; }
196                         if (m->isNumeric1(temp)) { m->mothurConvert(temp, subsampleSize); subsample = true; }
197             else {  
198                 if (m->isTrue(temp)) { subsample = true; subsampleSize = -1; }  //we will set it to smallest group later 
199                 else { subsample = false; }
200             }
201             
202             if (subsample == false) { iters = 0; }
203             
204             temp = validParameter.validFile(parameters, "distance", false);                                     if (temp == "not found") { temp = "false"; }
205                         createPhylip = m->isTrue(temp);
206             if (subsample) { createPhylip = true; }
207             
208                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
209                         m->setProcessors(temp);
210                         m->mothurConvert(temp, processors); 
211                         
212                         if (abort == false) {
213                         
214                                 ValidCalculators validCalculator;
215                                 int i;
216                                 
217                                 for (i=0; i<Estimators.size(); i++) {
218                                         if (validCalculator.isValidCalculator("sharedsummary", Estimators[i]) == true) { 
219                                                 if (Estimators[i] == "sharedsobs") { 
220                                                         sumCalculators.push_back(new SharedSobsCS());
221                                                 }else if (Estimators[i] == "sharedchao") { 
222                                                         sumCalculators.push_back(new SharedChao1());
223                                                 }else if (Estimators[i] == "sharedace") { 
224                                                         sumCalculators.push_back(new SharedAce());
225                                                 }else if (Estimators[i] == "jabund") {  
226                                                         sumCalculators.push_back(new JAbund());
227                                                 }else if (Estimators[i] == "sorabund") { 
228                                                         sumCalculators.push_back(new SorAbund());
229                                                 }else if (Estimators[i] == "jclass") { 
230                                                         sumCalculators.push_back(new Jclass());
231                                                 }else if (Estimators[i] == "sorclass") { 
232                                                         sumCalculators.push_back(new SorClass());
233                                                 }else if (Estimators[i] == "jest") { 
234                                                         sumCalculators.push_back(new Jest());
235                                                 }else if (Estimators[i] == "sorest") { 
236                                                         sumCalculators.push_back(new SorEst());
237                                                 }else if (Estimators[i] == "thetayc") { 
238                                                         sumCalculators.push_back(new ThetaYC());
239                                                 }else if (Estimators[i] == "thetan") { 
240                                                         sumCalculators.push_back(new ThetaN());
241                                                 }else if (Estimators[i] == "kstest") { 
242                                                         sumCalculators.push_back(new KSTest());
243                                                 }else if (Estimators[i] == "sharednseqs") { 
244                                                         sumCalculators.push_back(new SharedNSeqs());
245                                                 }else if (Estimators[i] == "ochiai") { 
246                                                         sumCalculators.push_back(new Ochiai());
247                                                 }else if (Estimators[i] == "anderberg") { 
248                                                         sumCalculators.push_back(new Anderberg());
249                                                 }else if (Estimators[i] == "kulczynski") { 
250                                                         sumCalculators.push_back(new Kulczynski());
251                                                 }else if (Estimators[i] == "kulczynskicody") { 
252                                                         sumCalculators.push_back(new KulczynskiCody());
253                                                 }else if (Estimators[i] == "lennon") { 
254                                                         sumCalculators.push_back(new Lennon());
255                                                 }else if (Estimators[i] == "morisitahorn") { 
256                                                         sumCalculators.push_back(new MorHorn());
257                                                 }else if (Estimators[i] == "braycurtis") { 
258                                                         sumCalculators.push_back(new BrayCurtis());
259                                                 }else if (Estimators[i] == "whittaker") { 
260                                                         sumCalculators.push_back(new Whittaker());
261                                                 }else if (Estimators[i] == "odum") { 
262                                                         sumCalculators.push_back(new Odum());
263                                                 }else if (Estimators[i] == "canberra") { 
264                                                         sumCalculators.push_back(new Canberra());
265                                                 }else if (Estimators[i] == "structeuclidean") { 
266                                                         sumCalculators.push_back(new StructEuclidean());
267                                                 }else if (Estimators[i] == "structchord") { 
268                                                         sumCalculators.push_back(new StructChord());
269                                                 }else if (Estimators[i] == "hellinger") { 
270                                                         sumCalculators.push_back(new Hellinger());
271                                                 }else if (Estimators[i] == "manhattan") { 
272                                                         sumCalculators.push_back(new Manhattan());
273                                                 }else if (Estimators[i] == "structpearson") { 
274                                                         sumCalculators.push_back(new StructPearson());
275                                                 }else if (Estimators[i] == "soergel") { 
276                                                         sumCalculators.push_back(new Soergel());
277                                                 }else if (Estimators[i] == "spearman") { 
278                                                         sumCalculators.push_back(new Spearman());
279                                                 }else if (Estimators[i] == "structkulczynski") { 
280                                                         sumCalculators.push_back(new StructKulczynski());
281                                                 }else if (Estimators[i] == "speciesprofile") { 
282                                                         sumCalculators.push_back(new SpeciesProfile());
283                                                 }else if (Estimators[i] == "hamming") { 
284                                                         sumCalculators.push_back(new Hamming());
285                                                 }else if (Estimators[i] == "structchi2") { 
286                                                         sumCalculators.push_back(new StructChi2());
287                                                 }else if (Estimators[i] == "gower") { 
288                                                         sumCalculators.push_back(new Gower());
289                                                 }else if (Estimators[i] == "memchi2") { 
290                                                         sumCalculators.push_back(new MemChi2());
291                                                 }else if (Estimators[i] == "memchord") { 
292                                                         sumCalculators.push_back(new MemChord());
293                                                 }else if (Estimators[i] == "memeuclidean") { 
294                                                         sumCalculators.push_back(new MemEuclidean());
295                                                 }else if (Estimators[i] == "mempearson") { 
296                                                         sumCalculators.push_back(new MemPearson());
297                                                 }
298                                         }
299                                 }
300                                 
301                                 mult = false;
302                         }
303                 }
304         }
305         catch(exception& e) {
306                 m->errorOut(e, "SummarySharedCommand", "SummarySharedCommand");
307                 exit(1);
308         }
309 }
310 //**********************************************************************************************************************
311
312 int SummarySharedCommand::execute(){
313         try {
314         
315                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
316                 
317                 ofstream outputFileHandle, outAll;
318         map<string, string> variables; 
319                 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
320                 string outputFileName = getOutputFileName("summary",variables);
321                 
322                 //if the users entered no valid calculators don't execute command
323                 if (sumCalculators.size() == 0) { return 0; }
324                 //check if any calcs can do multiples
325                 else{
326                         if (all){ 
327                                 for (int i = 0; i < sumCalculators.size(); i++) {
328                                         if (sumCalculators[i]->getMultiple() == true) { mult = true; }
329                                 }
330                         }
331                 }
332                         
333                 input = new InputData(sharedfile, "sharedfile");
334                 lookup = input->getSharedRAbundVectors();
335                 string lastLabel = lookup[0]->getLabel();
336         
337                 /******************************************************/
338                 //output headings for files
339                 /******************************************************/
340                 //output estimator names as column headers
341                 m->openOutputFile(outputFileName, outputFileHandle);
342                 outputFileHandle << "label" <<'\t' << "comparison" << '\t'; 
343                 for(int i=0;i<sumCalculators.size();i++){
344                         outputFileHandle << '\t' << sumCalculators[i]->getName();
345                         if (sumCalculators[i]->getCols() == 3) {   outputFileHandle << "\t" << sumCalculators[i]->getName() << "_lci\t" << sumCalculators[i]->getName() << "_hci";  }
346                 }
347                 outputFileHandle << endl;
348                 outputFileHandle.close();
349                 
350                 //create file and put column headers for multiple groups file
351         variables["[tag]"]= "multiple";
352                 string outAllFileName = getOutputFileName("summary",variables);
353                 if (mult == true) {
354                         m->openOutputFile(outAllFileName, outAll);
355                         outputNames.push_back(outAllFileName);
356                         
357                         outAll << "label" <<'\t' << "comparison" << '\t'; 
358                         for(int i=0;i<sumCalculators.size();i++){
359                                 if (sumCalculators[i]->getMultiple() == true) { 
360                                         outAll << '\t' << sumCalculators[i]->getName();
361                                 }
362                         }
363                         outAll << endl;
364                         outAll.close();
365                 }
366                 
367                 if (lookup.size() < 2) { 
368                         m->mothurOut("I cannot run the command without at least 2 valid groups."); 
369                         for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
370                         
371                         //close files and clean up
372                         m->mothurRemove(outputFileName);
373                         if (mult == true) { m->mothurRemove(outAllFileName);  }
374                         return 0;
375                 //if you only have 2 groups you don't need a .sharedmultiple file
376                 }else if ((lookup.size() == 2) && (mult == true)) { 
377                         mult = false;
378                         m->mothurRemove(outAllFileName);
379                         outputNames.pop_back();
380                 }
381                 
382                 if (m->control_pressed) {
383                         if (mult) {  m->mothurRemove(outAllFileName);  }
384                         m->mothurRemove(outputFileName); 
385                         delete input;
386                         for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
387                         for(int i=0;i<sumCalculators.size();i++){  delete sumCalculators[i]; }
388                         m->clearGroups(); 
389                         return 0;
390                 }
391                 /******************************************************/
392         if (subsample) { 
393             if (subsampleSize == -1) { //user has not set size, set size = smallest samples size
394                 subsampleSize = lookup[0]->getNumSeqs();
395                 for (int i = 1; i < lookup.size(); i++) {
396                     int thisSize = lookup[i]->getNumSeqs();
397                     
398                     if (thisSize < subsampleSize) {     subsampleSize = thisSize;       }
399                 }
400             }else {
401                 m->clearGroups();
402                 Groups.clear();
403                 vector<SharedRAbundVector*> temp;
404                 for (int i = 0; i < lookup.size(); i++) {
405                     if (lookup[i]->getNumSeqs() < subsampleSize) { 
406                         m->mothurOut(lookup[i]->getGroup() + " contains " + toString(lookup[i]->getNumSeqs()) + ". Eliminating."); m->mothurOutEndLine();
407                         delete lookup[i];
408                     }else { 
409                         Groups.push_back(lookup[i]->getGroup()); 
410                         temp.push_back(lookup[i]);
411                     }
412                 } 
413                 lookup = temp;
414                 m->setGroups(Groups);
415             }
416             
417             if (lookup.size() < 2) { m->mothurOut("You have not provided enough valid groups.  I cannot run the command."); m->mothurOutEndLine(); m->control_pressed = true; delete input; return 0; }
418         }
419
420                 
421                 /******************************************************/
422                 //comparison breakup to be used by different processes later
423                 numGroups = lookup.size();
424                 lines.resize(processors);
425                 for (int i = 0; i < processors; i++) {
426                         lines[i].start = int (sqrt(float(i)/float(processors)) * numGroups);
427                         lines[i].end = int (sqrt(float(i+1)/float(processors)) * numGroups);
428                 }               
429                 /******************************************************/
430                 
431                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
432                 set<string> processedLabels;
433                 set<string> userLabels = labels;
434                         
435                 //as long as you are not at the end of the file or done wih the lines you want
436                 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
437                         if (m->control_pressed) {
438                                 if (mult) {  m->mothurRemove(outAllFileName);  }
439                                 m->mothurRemove(outputFileName); 
440                                 delete input; 
441                                 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
442                                 for(int i=0;i<sumCalculators.size();i++){  delete sumCalculators[i]; }
443                                 m->clearGroups(); 
444                                 return 0;
445                         }
446
447                 
448                         if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){                  
449                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
450                                 process(lookup, outputFileName, outAllFileName);
451                                 
452                                 processedLabels.insert(lookup[0]->getLabel());
453                                 userLabels.erase(lookup[0]->getLabel());
454                         }
455                         
456                         if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
457                                         string saveLabel = lookup[0]->getLabel();
458                                         
459                                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
460                                         lookup = input->getSharedRAbundVectors(lastLabel);
461
462                                         m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
463                                         process(lookup, outputFileName, outAllFileName);
464                                         
465                                         processedLabels.insert(lookup[0]->getLabel());
466                                         userLabels.erase(lookup[0]->getLabel());
467                                         
468                                         //restore real lastlabel to save below
469                                         lookup[0]->setLabel(saveLabel);
470                         }
471                         
472                         lastLabel = lookup[0]->getLabel();                      
473                                 
474                         //get next line to process
475                         //prevent memory leak
476                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
477                         lookup = input->getSharedRAbundVectors();
478                 }
479                 
480                 if (m->control_pressed) {
481                         if (mult) { m->mothurRemove(outAllFileName);  }
482                         m->mothurRemove(outputFileName); 
483                         delete input; 
484                         for(int i=0;i<sumCalculators.size();i++){  delete sumCalculators[i]; }
485                         m->clearGroups(); 
486                         return 0;
487                 }
488
489                 //output error messages about any remaining user labels
490                 set<string>::iterator it;
491                 bool needToRun = false;
492                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
493                         m->mothurOut("Your file does not include the label " + *it); 
494                         if (processedLabels.count(lastLabel) != 1) {
495                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
496                                 needToRun = true;
497                         }else {
498                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
499                         }
500                 }
501                 
502                 //run last label if you need to
503                 if (needToRun == true)  {
504                                 for (int i = 0; i < lookup.size(); i++) {  if (lookup[i] != NULL) {     delete lookup[i];       } } 
505                                 lookup = input->getSharedRAbundVectors(lastLabel);
506
507                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
508                                 process(lookup, outputFileName, outAllFileName);
509                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
510                 }
511                 
512                                 
513                 //reset groups parameter
514                 m->clearGroups();  
515                 
516                 for(int i=0;i<sumCalculators.size();i++){  delete sumCalculators[i]; }
517                 delete input;  
518                 
519                 if (m->control_pressed) {
520                         m->mothurRemove(outAllFileName);  
521                         m->mothurRemove(outputFileName); 
522                         return 0;
523                 }
524                 
525                 m->mothurOutEndLine();
526                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
527                 m->mothurOut(outputFileName); m->mothurOutEndLine();    
528                 if (mult) { m->mothurOut(outAllFileName); m->mothurOutEndLine();        outputTypes["summary"].push_back(outAllFileName); }
529                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    } outputTypes["summary"].push_back(outputFileName);
530                 m->mothurOutEndLine();
531
532                 return 0;
533         }
534         catch(exception& e) {
535                 m->errorOut(e, "SummarySharedCommand", "execute");
536                 exit(1);
537         }
538 }
539 /***********************************************************/
540 int SummarySharedCommand::printSims(ostream& out, vector< vector<double> >& simMatrix) {
541         try {
542                 
543                 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
544                 
545                 //output num seqs
546                 out << simMatrix.size() << endl;
547                 
548                 if (output == "lt") {
549                         for (int b = 0; b < simMatrix.size(); b++)      {
550                                 out << lookup[b]->getGroup() << '\t';
551                                 for (int n = 0; n < b; n++)     {
552                     if (m->control_pressed) { return 0; }
553                                         out << simMatrix[b][n] << '\t'; 
554                                 }
555                                 out << endl;
556                         }
557                 }else{
558                         for (int b = 0; b < simMatrix.size(); m++)      {
559                                 out << lookup[b]->getGroup() << '\t';
560                                 for (int n = 0; n < simMatrix[b].size(); n++)   {
561                     if (m->control_pressed) { return 0; }
562                                         out << simMatrix[b][n] << '\t'; 
563                                 }
564                                 out << endl;
565                         }
566                 }
567         
568         return 0;
569         }
570         catch(exception& e) {
571                 m->errorOut(e, "SummarySharedCommand", "printSims");
572                 exit(1);
573         }
574 }
575 /***********************************************************/
576 int SummarySharedCommand::process(vector<SharedRAbundVector*> thisLookup, string sumFileName, string sumAllFileName) {
577         try {
578         vector< vector< vector<seqDist> > > calcDistsTotals;  //each iter, one for each calc, then each groupCombos dists. this will be used to make .dist files
579         vector< vector<seqDist>  > calcDists; calcDists.resize(sumCalculators.size());          
580         
581         for (int thisIter = 0; thisIter < iters+1; thisIter++) {
582             
583             vector<SharedRAbundVector*> thisItersLookup = thisLookup;
584             
585             if (subsample && (thisIter != 0)) { //we want the summary results for the whole dataset, then the subsampling
586                 SubSample sample;
587                 vector<string> tempLabels; //dont need since we arent printing the sampled sharedRabunds
588                 
589                 //make copy of lookup so we don't get access violations
590                 vector<SharedRAbundVector*> newLookup;
591                 for (int k = 0; k < thisItersLookup.size(); k++) {
592                     SharedRAbundVector* temp = new SharedRAbundVector();
593                     temp->setLabel(thisItersLookup[k]->getLabel());
594                     temp->setGroup(thisItersLookup[k]->getGroup());
595                     newLookup.push_back(temp);
596                 }
597                 
598                 //for each bin
599                 for (int k = 0; k < thisItersLookup[0]->getNumBins(); k++) {
600                     if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
601                     for (int j = 0; j < thisItersLookup.size(); j++) { newLookup[j]->push_back(thisItersLookup[j]->getAbundance(k), thisItersLookup[j]->getGroup()); }
602                 }
603                 
604                 tempLabels = sample.getSample(newLookup, subsampleSize);
605                 thisItersLookup = newLookup;
606             }
607         
608             
609             if(processors == 1){
610                 driver(thisLookup, 0, numGroups, sumFileName+".temp", sumAllFileName+".temp", calcDists);
611                 m->appendFiles((sumFileName + ".temp"), sumFileName);
612                 m->mothurRemove((sumFileName + ".temp"));
613                 if (mult) {
614                     m->appendFiles((sumAllFileName + ".temp"), sumAllFileName);
615                     m->mothurRemove((sumAllFileName + ".temp"));
616                 }
617             }else{
618                 
619                 int process = 1;
620                 vector<int> processIDS;
621                 
622 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
623                 //loop through and create all the processes you want
624                 while (process != processors) {
625                     int pid = fork();
626                     
627                     if (pid > 0) {
628                         processIDS.push_back(pid); 
629                         process++;
630                     }else if (pid == 0){
631                         driver(thisLookup, lines[process].start, lines[process].end, sumFileName + toString(getpid()) + ".temp", sumAllFileName + toString(getpid()) + ".temp", calcDists);   
632                         
633                         //only do this if you want a distance file
634                         if (createPhylip) {
635                             string tempdistFileName = m->getRootName(m->getSimpleName(sumFileName)) + toString(getpid()) + ".dist";
636                             ofstream outtemp;
637                             m->openOutputFile(tempdistFileName, outtemp);
638                             
639                             for (int i = 0; i < calcDists.size(); i++) {
640                                 outtemp << calcDists[i].size() << endl;
641                                 
642                                 for (int j = 0; j < calcDists[i].size(); j++) {
643                                     outtemp << calcDists[i][j].seq1 << '\t' << calcDists[i][j].seq2 << '\t' << calcDists[i][j].dist << endl;
644                                 }
645                             }
646                             outtemp.close();
647                         }
648                         
649                         exit(0);
650                     }else { 
651                         m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
652                         for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
653                         exit(0);
654                     }
655                 }
656                 
657                 //parent do your part
658                 driver(thisLookup, lines[0].start, lines[0].end, sumFileName + toString(getpid()) + ".temp", sumAllFileName + toString(getpid()) + ".temp", calcDists);   
659                 m->appendFiles((sumFileName + toString(getpid()) + ".temp"), sumFileName);
660                 m->mothurRemove((sumFileName + toString(getpid()) + ".temp"));
661                 if (mult) { m->appendFiles((sumAllFileName + toString(getpid()) + ".temp"), sumAllFileName); }
662                 
663                 //force parent to wait until all the processes are done
664                 for (int i = 0; i < processIDS.size(); i++) {
665                     int temp = processIDS[i];
666                     wait(&temp);
667                 }
668                 
669                 for (int i = 0; i < processIDS.size(); i++) {
670                     m->appendFiles((sumFileName + toString(processIDS[i]) + ".temp"), sumFileName);
671                     m->mothurRemove((sumFileName + toString(processIDS[i]) + ".temp"));
672                     if (mult) { m->mothurRemove((sumAllFileName + toString(processIDS[i]) + ".temp"));  }
673                     
674                     if (createPhylip) {
675                         string tempdistFileName = m->getRootName(m->getSimpleName(sumFileName)) + toString(processIDS[i]) +  ".dist";
676                         ifstream intemp;
677                         m->openInputFile(tempdistFileName, intemp);
678                         
679                         for (int k = 0; k < calcDists.size(); k++) {
680                             int size = 0;
681                             intemp >> size; m->gobble(intemp);
682                             
683                             for (int j = 0; j < size; j++) {
684                                 int seq1 = 0;
685                                 int seq2 = 0;
686                                 float dist = 1.0;
687                                 
688                                 intemp >> seq1 >> seq2 >> dist;   m->gobble(intemp);
689                                 
690                                 seqDist tempDist(seq1, seq2, dist);
691                                 calcDists[k].push_back(tempDist);
692                             }
693                         }
694                         intemp.close();
695                         m->mothurRemove(tempdistFileName);
696                     }
697                 }
698 #else
699                 //////////////////////////////////////////////////////////////////////////////////////////////////////
700                 //Windows version shared memory, so be careful when passing variables through the summarySharedData struct. 
701                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
702                 //Taking advantage of shared memory to pass results vectors.
703                 //////////////////////////////////////////////////////////////////////////////////////////////////////
704                 
705                 vector<summarySharedData*> pDataArray; 
706                 DWORD   dwThreadIdArray[processors-1];
707                 HANDLE  hThreadArray[processors-1];
708                 
709                 //Create processor worker threads.
710                 for( int i=1; i<processors; i++ ){
711                     
712                     //make copy of lookup so we don't get access violations
713                     vector<SharedRAbundVector*> newLookup;
714                     for (int k = 0; k < thisLookup.size(); k++) {
715                         SharedRAbundVector* temp = new SharedRAbundVector();
716                         temp->setLabel(thisLookup[k]->getLabel());
717                         temp->setGroup(thisLookup[k]->getGroup());
718                         newLookup.push_back(temp);
719                     }
720                 
721                     
722                     //for each bin
723                     for (int k = 0; k < thisLookup[0]->getNumBins(); k++) {
724                         if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
725                         for (int j = 0; j < thisLookup.size(); j++) { newLookup[j]->push_back(thisLookup[j]->getAbundance(k), thisLookup[j]->getGroup()); }
726                     }
727                     
728                     // Allocate memory for thread data.
729                     summarySharedData* tempSum = new summarySharedData((sumFileName+toString(i)+".temp"), m, lines[i].start, lines[i].end, Estimators, newLookup);
730                     pDataArray.push_back(tempSum);
731                     processIDS.push_back(i);
732                     
733                     hThreadArray[i-1] = CreateThread(NULL, 0, MySummarySharedThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);   
734                 }
735                 
736                 //parent do your part
737                 driver(thisLookup, lines[0].start, lines[0].end, sumFileName +"0.temp", sumAllFileName + "0.temp", calcDists);   
738                 m->appendFiles((sumFileName + "0.temp"), sumFileName);
739                 m->mothurRemove((sumFileName + "0.temp"));
740                 if (mult) { m->appendFiles((sumAllFileName + "0.temp"), sumAllFileName); }
741                 
742                 //Wait until all threads have terminated.
743                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
744                 
745                 //Close all thread handles and free memory allocations.
746                 for(int i=0; i < pDataArray.size(); i++){
747                     if (pDataArray[i]->count != (pDataArray[i]->end-pDataArray[i]->start)) {
748                         m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of " + toString(pDataArray[i]->end-pDataArray[i]->start) + " groups assigned to it, quitting. \n"); m->control_pressed = true; 
749                     }
750                     m->appendFiles((sumFileName + toString(processIDS[i]) + ".temp"), sumFileName);
751                     m->mothurRemove((sumFileName + toString(processIDS[i]) + ".temp"));
752                     
753                     for (int j = 0; j < pDataArray[i]->thisLookup.size(); j++) {  delete pDataArray[i]->thisLookup[j];  } 
754                     
755                     if (createPhylip) {
756                         for (int k = 0; k < calcDists.size(); k++) {
757                             int size = pDataArray[i]->calcDists[k].size();
758                             for (int j = 0; j < size; j++) {    calcDists[k].push_back(pDataArray[i]->calcDists[k][j]);    }
759                         }
760                     }
761                     
762                     CloseHandle(hThreadArray[i]);
763                     delete pDataArray[i];
764                 }
765                 
766 #endif
767             }
768             
769             if (subsample && (thisIter != 0)) { //we want the summary results for the whole dataset, then the subsampling
770                 
771                 calcDistsTotals.push_back(calcDists); 
772                 //clean up memory
773                 for (int i = 0; i < thisItersLookup.size(); i++) { delete thisItersLookup[i]; }
774                 thisItersLookup.clear();
775             }else {
776                 if (createPhylip) {
777                     for (int i = 0; i < calcDists.size(); i++) {
778                         if (m->control_pressed) { break; }
779                         
780                         //initialize matrix
781                         vector< vector<double> > matrix; //square matrix to represent the distance
782                         matrix.resize(thisLookup.size());
783                         for (int k = 0; k < thisLookup.size(); k++) {  matrix[k].resize(thisLookup.size(), 0.0); }
784                         
785                         for (int j = 0; j < calcDists[i].size(); j++) {
786                             int row = calcDists[i][j].seq1;
787                             int column = calcDists[i][j].seq2;
788                             double dist = calcDists[i][j].dist;
789                             
790                             matrix[row][column] = dist;
791                             matrix[column][row] = dist;
792                         }
793                         
794                         map<string, string> variables; 
795                         variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
796                         variables["[calc]"] = sumCalculators[i]->getName();
797                         variables["[distance]"] = thisLookup[0]->getLabel();
798                         variables["[outputtag]"] = output;
799                         variables["[tag2]"] = "";
800                         string distFileName = getOutputFileName("phylip",variables);
801                         outputNames.push_back(distFileName); outputTypes["phylip"].push_back(distFileName);
802                         ofstream outDist;
803                         m->openOutputFile(distFileName, outDist);
804                         outDist.setf(ios::fixed, ios::floatfield); outDist.setf(ios::showpoint);
805                         
806                         printSims(outDist, matrix);
807                         
808                         outDist.close();
809                     }
810                 }
811             }
812             for (int i = 0; i < calcDists.size(); i++) {  calcDists[i].clear(); }
813                 }
814
815         if (iters != 0) {
816             //we need to find the average distance and standard deviation for each groups distance
817             vector< vector<seqDist>  > calcAverages = m->getAverages(calcDistsTotals);
818             
819             //find standard deviation
820             vector< vector<seqDist>  > stdDev = m->getStandardDeviation(calcDistsTotals, calcAverages); 
821             
822             //print results
823             for (int i = 0; i < calcDists.size(); i++) {
824                 vector< vector<double> > matrix; //square matrix to represent the distance
825                 matrix.resize(thisLookup.size());
826                 for (int k = 0; k < thisLookup.size(); k++) {  matrix[k].resize(thisLookup.size(), 0.0); }
827                 
828                 vector< vector<double> > stdmatrix; //square matrix to represent the stdDev
829                 stdmatrix.resize(thisLookup.size());
830                 for (int k = 0; k < thisLookup.size(); k++) {  stdmatrix[k].resize(thisLookup.size(), 0.0); }
831                 
832                 
833                 for (int j = 0; j < calcAverages[i].size(); j++) {
834                     int row = calcAverages[i][j].seq1;
835                     int column = calcAverages[i][j].seq2;
836                     float dist = calcAverages[i][j].dist;
837                     float stdDist = stdDev[i][j].dist;
838                     
839                     matrix[row][column] = dist;
840                     matrix[column][row] = dist;
841                     stdmatrix[row][column] = stdDist;
842                     stdmatrix[column][row] = stdDist;
843                 }
844                 
845                 map<string, string> variables; 
846                 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
847                 variables["[calc]"] = sumCalculators[i]->getName();
848                 variables["[distance]"] = thisLookup[0]->getLabel();
849                 variables["[outputtag]"] = output;
850                 variables["[tag2]"] = "ave";
851                 string distFileName = getOutputFileName("phylip",variables);
852                 outputNames.push_back(distFileName); outputTypes["phylip"].push_back(distFileName);
853                 ofstream outAve;
854                 m->openOutputFile(distFileName, outAve);
855                 outAve.setf(ios::fixed, ios::floatfield); outAve.setf(ios::showpoint);
856                 
857                 printSims(outAve, matrix);
858                 
859                 outAve.close();
860                 
861                 variables["[tag2]"] = "std";
862                 distFileName = getOutputFileName("phylip",variables);
863                 outputNames.push_back(distFileName); outputTypes["phylip"].push_back(distFileName);
864                 ofstream outSTD;
865                 m->openOutputFile(distFileName, outSTD);
866                 outSTD.setf(ios::fixed, ios::floatfield); outSTD.setf(ios::showpoint);
867                 
868                 printSims(outSTD, stdmatrix);
869                 
870                 outSTD.close();
871                 
872             }
873         }
874         
875         return 0;
876         }
877         catch(exception& e) {
878                 m->errorOut(e, "SummarySharedCommand", "process");
879                 exit(1);
880         }
881 }
882 /**************************************************************************************************/
883 int SummarySharedCommand::driver(vector<SharedRAbundVector*> thisLookup, int start, int end, string sumFile, string sumAllFile, vector< vector<seqDist> >& calcDists) { 
884         try {
885                 
886                 //loop through calculators and add to file all for all calcs that can do mutiple groups
887                 if (mult == true) {
888                         ofstream outAll;
889                         m->openOutputFile(sumAllFile, outAll);
890                         
891                         //output label
892                         outAll << thisLookup[0]->getLabel() << '\t';
893                         
894                         //output groups names
895                         string outNames = "";
896                         for (int j = 0; j < thisLookup.size(); j++) {
897                                 outNames += thisLookup[j]->getGroup() +  "-";
898                         }
899                         outNames = outNames.substr(0, outNames.length()-1); //rip off extra '-';
900                         outAll << outNames << '\t';
901                         
902                         for(int i=0;i<sumCalculators.size();i++){
903                                 if (sumCalculators[i]->getMultiple() == true) { 
904                                         sumCalculators[i]->getValues(thisLookup);
905                                         
906                                         if (m->control_pressed) { outAll.close(); return 1; }
907                                         
908                                         outAll << '\t';
909                                         sumCalculators[i]->print(outAll);
910                                 }
911                         }
912                         outAll << endl;
913                         outAll.close();
914                 }
915                 
916                 ofstream outputFileHandle;
917                 m->openOutputFile(sumFile, outputFileHandle);
918                 
919                 vector<SharedRAbundVector*> subset;
920                 for (int k = start; k < end; k++) { // pass cdd each set of groups to compare
921
922                         for (int l = 0; l < k; l++) {
923                                 
924                                 outputFileHandle << thisLookup[0]->getLabel() << '\t';
925                                 
926                                 subset.clear(); //clear out old pair of sharedrabunds
927                                 //add new pair of sharedrabunds
928                                 subset.push_back(thisLookup[k]); subset.push_back(thisLookup[l]); 
929                                 
930                                 //sort groups to be alphanumeric
931                                 if (thisLookup[k]->getGroup() > thisLookup[l]->getGroup()) {
932                                         outputFileHandle << (thisLookup[l]->getGroup() +'\t' + thisLookup[k]->getGroup()) << '\t'; //print out groups
933                                 }else{
934                                         outputFileHandle << (thisLookup[k]->getGroup() +'\t' + thisLookup[l]->getGroup()) << '\t'; //print out groups
935                                 }
936                                 
937                                 for(int i=0;i<sumCalculators.size();i++) {
938                                         
939                                         //if this calc needs all groups to calculate the pair load all groups
940                                         if (sumCalculators[i]->getNeedsAll()) { 
941                                                 //load subset with rest of lookup for those calcs that need everyone to calc for a pair
942                                                 for (int w = 0; w < thisLookup.size(); w++) {
943                                                         if ((w != k) && (w != l)) { subset.push_back(thisLookup[w]); }
944                                                 }
945                                         }
946                                         
947                                         vector<double> tempdata = sumCalculators[i]->getValues(subset); //saves the calculator outputs
948                                         
949                                         if (m->control_pressed) { outputFileHandle.close(); return 1; }
950                                         
951                                         outputFileHandle << '\t';
952                                         sumCalculators[i]->print(outputFileHandle);
953                                         
954                                         seqDist temp(l, k, tempdata[0]);
955                                         calcDists[i].push_back(temp);
956                                 }
957                                 outputFileHandle << endl;
958                         }
959                 }
960                 
961                 outputFileHandle.close();
962                 
963                 return 0;
964         }
965         catch(exception& e) {
966                 m->errorOut(e, "SummarySharedCommand", "driver");
967                 exit(1);
968         }
969 }
970 /**************************************************************************************************/
971
972