]> git.donarmstrong.com Git - mothur.git/blob - summarysharedcommand.cpp
changes while testing 1.26
[mothur.git] / summarysharedcommand.cpp
1 /*
2  *  summarysharedcommand.cpp
3  *  Dotur
4  *
5  *  Created by Sarah Westcott on 1/2/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "summarysharedcommand.h"
11 #include "subsample.h"
12
13 //**********************************************************************************************************************
14 vector<string> SummarySharedCommand::setParameters(){   
15         try {
16                 CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pshared);
17                 CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
18         CommandParameter psubsample("subsample", "String", "", "", "", "", "",false,false); parameters.push_back(psubsample);
19                 CommandParameter pdistance("distance", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pdistance);
20                 CommandParameter pcalc("calc", "Multiple", "sharedchao-sharedsobs-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan-kstest-whittaker-sharednseqs-ochiai-anderberg-kulczynski-kulczynskicody-lennon-morisitahorn-braycurtis-odum-canberra-structeuclidean-structchord-hellinger-manhattan-structpearson-soergel-spearman-structkulczynski-speciesprofile-structchi2-hamming-gower-memchi2-memchord-memeuclidean-mempearson", "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan", "", "", "",true,false); parameters.push_back(pcalc);
21         CommandParameter poutput("output", "Multiple", "lt-square", "lt", "", "", "",false,false); parameters.push_back(poutput);
22                 CommandParameter pall("all", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pall);
23         CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
24                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
25                 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
26                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
27                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
28                 
29                 vector<string> myArray;
30                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
31                 return myArray;
32         }
33         catch(exception& e) {
34                 m->errorOut(e, "SummarySharedCommand", "setParameters");
35                 exit(1);
36         }
37 }
38 //**********************************************************************************************************************
39 string SummarySharedCommand::getHelpString(){   
40         try {
41                 string helpString = "";
42                 ValidCalculators validCalculator;
43                 helpString += "The summary.shared command parameters are shared, label, calc, distance, processors, subsample, iters and all.  shared is required if there is no current sharedfile.\n";
44                 helpString += "The summary.shared command should be in the following format: \n";
45                 helpString += "summary.shared(label=yourLabel, calc=yourEstimators, groups=yourGroups).\n";
46                 helpString += "Example summary.shared(label=unique-.01-.03, groups=B-C, calc=sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan).\n";
47                 helpString +=  validCalculator.printCalc("sharedsummary");
48         helpString += "The iters parameter allows you to choose the number of times you would like to run the subsample.\n";
49         helpString += "The subsample parameter allows you to enter the size pergroup of the sample or you can set subsample=T and mothur will use the size of your smallest group.\n";
50         helpString += "The output parameter allows you to specify format of your distance matrix. Options are lt, and square. The default is lt.\n";
51                 helpString += "The default value for calc is sharedsobs-sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan\n";
52                 helpString += "The default value for groups is all the groups in your groupfile.\n";
53                 helpString += "The distance parameter allows you to indicate you would like a distance file created for each calculator for each label, default=f.\n";
54                 helpString += "The label parameter is used to analyze specific labels in your input.\n";
55                 helpString += "The all parameter is used to specify if you want the estimate of all your groups together.  This estimate can only be made for sharedsobs and sharedchao calculators. The default is false.\n";
56                 helpString += "If you use sharedchao and run into memory issues, set all to false. \n";
57                 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed.  You must enter at least 2 valid groups.\n";
58                 helpString += "Note: No spaces between parameter labels (i.e. label), '=' and parameters (i.e.yourLabel).\n";
59                 return helpString;
60         }
61         catch(exception& e) {
62                 m->errorOut(e, "SummarySharedCommand", "getHelpString");
63                 exit(1);
64         }
65 }
66 //**********************************************************************************************************************
67 string SummarySharedCommand::getOutputFileNameTag(string type, string inputName=""){    
68         try {
69         string outputFileName = "";
70                 map<string, vector<string> >::iterator it;
71         
72         //is this a type this command creates
73         it = outputTypes.find(type);
74         if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
75         else {
76             if (type == "summary")            {   outputFileName =  "shared.summary";   }
77             else if (type == "phylip")            {   outputFileName =  "dist";   }
78             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
79         }
80         return outputFileName;
81         }
82         catch(exception& e) {
83                 m->errorOut(e, "SummarySharedCommand", "getOutputFileNameTag");
84                 exit(1);
85         }
86 }
87 //**********************************************************************************************************************
88 SummarySharedCommand::SummarySharedCommand(){   
89         try {
90                 abort = true; calledHelp = true; 
91                 setParameters();
92                 vector<string> tempOutNames;
93                 outputTypes["summary"] = tempOutNames;
94         outputTypes["phylip"] = tempOutNames;
95         }
96         catch(exception& e) {
97                 m->errorOut(e, "SummarySharedCommand", "SummarySharedCommand");
98                 exit(1);
99         }
100 }
101 //**********************************************************************************************************************
102
103 SummarySharedCommand::SummarySharedCommand(string option)  {
104         try {
105                 abort = false; calledHelp = false;   
106                 allLines = 1;
107                                 
108                 //allow user to run help
109                 if(option == "help") {  help(); abort = true; calledHelp = true; }
110                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
111                 
112                 else {
113                         vector<string> myArray = setParameters();
114                         
115                         OptionParser parser(option);
116                         map<string, string> parameters = parser.getParameters();
117                         map<string, string>::iterator it;
118                         
119                         ValidParameters validParameter;
120                 
121                         //check to make sure all parameters are valid for command
122                         for (map<string, string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
123                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
124                         }
125                         
126                         //initialize outputTypes
127                         vector<string> tempOutNames;
128                         outputTypes["summary"] = tempOutNames;
129             outputTypes["phylip"] = tempOutNames;
130                         
131                         //if the user changes the input directory command factory will send this info to us in the output parameter 
132                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
133                         if (inputDir == "not found"){   inputDir = "";          }
134                         else {
135                                 string path;
136                                 it = parameters.find("shared");
137                                 //user has given a template file
138                                 if(it != parameters.end()){ 
139                                         path = m->hasPath(it->second);
140                                         //if the user has not given a path then, add inputdir. else leave path alone.
141                                         if (path == "") {       parameters["shared"] = inputDir + it->second;           }
142                                 }
143                         }
144                         
145                         //get shared file
146                         sharedfile = validParameter.validFile(parameters, "shared", true);
147                         if (sharedfile == "not open") { sharedfile = ""; abort = true; }        
148                         else if (sharedfile == "not found") { 
149                                 //if there is a current shared file, use it
150                                 sharedfile = m->getSharedFile(); 
151                                 if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
152                                 else {  m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; }
153                         }else { m->setSharedFile(sharedfile); }
154                         
155                         
156                         //if the user changes the output directory command factory will send this info to us in the output parameter 
157                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(sharedfile);             }
158                         
159
160                         //check for optional parameter and set defaults
161                         // ...at some point should added some additional type checking...
162                         label = validParameter.validFile(parameters, "label", false);                   
163                         if (label == "not found") { label = ""; }
164                         else { 
165                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
166                                 else { allLines = 1;  }
167                         }
168                         
169                                         
170                         calc = validParameter.validFile(parameters, "calc", false);                     
171                         if (calc == "not found") { calc = "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan";  }
172                         else { 
173                                  if (calc == "default")  {  calc = "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan";  }
174                         }
175                         m->splitAtDash(calc, Estimators);
176                         if (m->inUsersGroups("citation", Estimators)) { 
177                                 ValidCalculators validCalc; validCalc.printCitations(Estimators); 
178                                 //remove citation from list of calcs
179                                 for (int i = 0; i < Estimators.size(); i++) { if (Estimators[i] == "citation") {  Estimators.erase(Estimators.begin()+i); break; } }
180                         }
181                         
182                         groups = validParameter.validFile(parameters, "groups", false);                 
183                         if (groups == "not found") { groups = ""; }
184                         else { 
185                                 m->splitAtDash(groups, Groups);
186                                 m->setGroups(Groups);
187                         }
188                         
189                         string temp = validParameter.validFile(parameters, "all", false);                               if (temp == "not found") { temp = "false"; }
190                         all = m->isTrue(temp);
191                         
192                         temp = validParameter.validFile(parameters, "distance", false);                                 if (temp == "not found") { temp = "false"; }
193                         createPhylip = m->isTrue(temp);
194                         
195             temp = validParameter.validFile(parameters, "iters", false);                        if (temp == "not found") { temp = "1000"; }
196                         m->mothurConvert(temp, iters); 
197             
198             output = validParameter.validFile(parameters, "output", false);             if(output == "not found"){      output = "lt"; }
199                         if ((output != "lt") && (output != "square")) { m->mothurOut(output + " is not a valid output form. Options are lt and square. I will use lt."); m->mothurOutEndLine(); output = "lt"; }
200             
201             temp = validParameter.validFile(parameters, "subsample", false);            if (temp == "not found") { temp = "F"; }
202                         if (m->isNumeric1(temp)) { m->mothurConvert(temp, subsampleSize); subsample = true; }
203             else {  
204                 if (m->isTrue(temp)) { subsample = true; subsampleSize = -1; }  //we will set it to smallest group later 
205                 else { subsample = false; }
206             }
207             
208             if (subsample == false) { iters = 1; }
209             
210                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
211                         m->setProcessors(temp);
212                         m->mothurConvert(temp, processors); 
213                         
214                         if (abort == false) {
215                         
216                                 ValidCalculators validCalculator;
217                                 int i;
218                                 
219                                 for (i=0; i<Estimators.size(); i++) {
220                                         if (validCalculator.isValidCalculator("sharedsummary", Estimators[i]) == true) { 
221                                                 if (Estimators[i] == "sharedsobs") { 
222                                                         sumCalculators.push_back(new SharedSobsCS());
223                                                 }else if (Estimators[i] == "sharedchao") { 
224                                                         sumCalculators.push_back(new SharedChao1());
225                                                 }else if (Estimators[i] == "sharedace") { 
226                                                         sumCalculators.push_back(new SharedAce());
227                                                 }else if (Estimators[i] == "jabund") {  
228                                                         sumCalculators.push_back(new JAbund());
229                                                 }else if (Estimators[i] == "sorabund") { 
230                                                         sumCalculators.push_back(new SorAbund());
231                                                 }else if (Estimators[i] == "jclass") { 
232                                                         sumCalculators.push_back(new Jclass());
233                                                 }else if (Estimators[i] == "sorclass") { 
234                                                         sumCalculators.push_back(new SorClass());
235                                                 }else if (Estimators[i] == "jest") { 
236                                                         sumCalculators.push_back(new Jest());
237                                                 }else if (Estimators[i] == "sorest") { 
238                                                         sumCalculators.push_back(new SorEst());
239                                                 }else if (Estimators[i] == "thetayc") { 
240                                                         sumCalculators.push_back(new ThetaYC());
241                                                 }else if (Estimators[i] == "thetan") { 
242                                                         sumCalculators.push_back(new ThetaN());
243                                                 }else if (Estimators[i] == "kstest") { 
244                                                         sumCalculators.push_back(new KSTest());
245                                                 }else if (Estimators[i] == "sharednseqs") { 
246                                                         sumCalculators.push_back(new SharedNSeqs());
247                                                 }else if (Estimators[i] == "ochiai") { 
248                                                         sumCalculators.push_back(new Ochiai());
249                                                 }else if (Estimators[i] == "anderberg") { 
250                                                         sumCalculators.push_back(new Anderberg());
251                                                 }else if (Estimators[i] == "kulczynski") { 
252                                                         sumCalculators.push_back(new Kulczynski());
253                                                 }else if (Estimators[i] == "kulczynskicody") { 
254                                                         sumCalculators.push_back(new KulczynskiCody());
255                                                 }else if (Estimators[i] == "lennon") { 
256                                                         sumCalculators.push_back(new Lennon());
257                                                 }else if (Estimators[i] == "morisitahorn") { 
258                                                         sumCalculators.push_back(new MorHorn());
259                                                 }else if (Estimators[i] == "braycurtis") { 
260                                                         sumCalculators.push_back(new BrayCurtis());
261                                                 }else if (Estimators[i] == "whittaker") { 
262                                                         sumCalculators.push_back(new Whittaker());
263                                                 }else if (Estimators[i] == "odum") { 
264                                                         sumCalculators.push_back(new Odum());
265                                                 }else if (Estimators[i] == "canberra") { 
266                                                         sumCalculators.push_back(new Canberra());
267                                                 }else if (Estimators[i] == "structeuclidean") { 
268                                                         sumCalculators.push_back(new StructEuclidean());
269                                                 }else if (Estimators[i] == "structchord") { 
270                                                         sumCalculators.push_back(new StructChord());
271                                                 }else if (Estimators[i] == "hellinger") { 
272                                                         sumCalculators.push_back(new Hellinger());
273                                                 }else if (Estimators[i] == "manhattan") { 
274                                                         sumCalculators.push_back(new Manhattan());
275                                                 }else if (Estimators[i] == "structpearson") { 
276                                                         sumCalculators.push_back(new StructPearson());
277                                                 }else if (Estimators[i] == "soergel") { 
278                                                         sumCalculators.push_back(new Soergel());
279                                                 }else if (Estimators[i] == "spearman") { 
280                                                         sumCalculators.push_back(new Spearman());
281                                                 }else if (Estimators[i] == "structkulczynski") { 
282                                                         sumCalculators.push_back(new StructKulczynski());
283                                                 }else if (Estimators[i] == "speciesprofile") { 
284                                                         sumCalculators.push_back(new SpeciesProfile());
285                                                 }else if (Estimators[i] == "hamming") { 
286                                                         sumCalculators.push_back(new Hamming());
287                                                 }else if (Estimators[i] == "structchi2") { 
288                                                         sumCalculators.push_back(new StructChi2());
289                                                 }else if (Estimators[i] == "gower") { 
290                                                         sumCalculators.push_back(new Gower());
291                                                 }else if (Estimators[i] == "memchi2") { 
292                                                         sumCalculators.push_back(new MemChi2());
293                                                 }else if (Estimators[i] == "memchord") { 
294                                                         sumCalculators.push_back(new MemChord());
295                                                 }else if (Estimators[i] == "memeuclidean") { 
296                                                         sumCalculators.push_back(new MemEuclidean());
297                                                 }else if (Estimators[i] == "mempearson") { 
298                                                         sumCalculators.push_back(new MemPearson());
299                                                 }
300                                         }
301                                 }
302                                 
303                                 mult = false;
304                         }
305                 }
306         }
307         catch(exception& e) {
308                 m->errorOut(e, "SummarySharedCommand", "SummarySharedCommand");
309                 exit(1);
310         }
311 }
312 //**********************************************************************************************************************
313
314 int SummarySharedCommand::execute(){
315         try {
316         
317                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
318                 
319                 ofstream outputFileHandle, outAll;
320                 string outputFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + getOutputFileNameTag("summary");
321                 
322                 //if the users entered no valid calculators don't execute command
323                 if (sumCalculators.size() == 0) { return 0; }
324                 //check if any calcs can do multiples
325                 else{
326                         if (all){ 
327                                 for (int i = 0; i < sumCalculators.size(); i++) {
328                                         if (sumCalculators[i]->getMultiple() == true) { mult = true; }
329                                 }
330                         }
331                 }
332                         
333                 input = new InputData(sharedfile, "sharedfile");
334                 lookup = input->getSharedRAbundVectors();
335                 string lastLabel = lookup[0]->getLabel();
336         
337                 /******************************************************/
338                 //output headings for files
339                 /******************************************************/
340                 //output estimator names as column headers
341                 m->openOutputFile(outputFileName, outputFileHandle);
342                 outputFileHandle << "label" <<'\t' << "comparison" << '\t'; 
343                 for(int i=0;i<sumCalculators.size();i++){
344                         outputFileHandle << '\t' << sumCalculators[i]->getName();
345                         if (sumCalculators[i]->getCols() == 3) {   outputFileHandle << "\t" << sumCalculators[i]->getName() << "_lci\t" << sumCalculators[i]->getName() << "_hci";  }
346                 }
347                 outputFileHandle << endl;
348                 outputFileHandle.close();
349                 
350                 //create file and put column headers for multiple groups file
351                 string outAllFileName = ((m->getRootName(sharedfile)) + "multiple." + getOutputFileNameTag("summary"));
352                 if (mult == true) {
353                         m->openOutputFile(outAllFileName, outAll);
354                         outputNames.push_back(outAllFileName);
355                         
356                         outAll << "label" <<'\t' << "comparison" << '\t'; 
357                         for(int i=0;i<sumCalculators.size();i++){
358                                 if (sumCalculators[i]->getMultiple() == true) { 
359                                         outAll << '\t' << sumCalculators[i]->getName();
360                                 }
361                         }
362                         outAll << endl;
363                         outAll.close();
364                 }
365                 
366                 if (lookup.size() < 2) { 
367                         m->mothurOut("I cannot run the command without at least 2 valid groups."); 
368                         for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
369                         
370                         //close files and clean up
371                         m->mothurRemove(outputFileName);
372                         if (mult == true) { m->mothurRemove(outAllFileName);  }
373                         return 0;
374                 //if you only have 2 groups you don't need a .sharedmultiple file
375                 }else if ((lookup.size() == 2) && (mult == true)) { 
376                         mult = false;
377                         m->mothurRemove(outAllFileName);
378                         outputNames.pop_back();
379                 }
380                 
381                 if (m->control_pressed) {
382                         if (mult) {  m->mothurRemove(outAllFileName);  }
383                         m->mothurRemove(outputFileName); 
384                         delete input;
385                         for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
386                         for(int i=0;i<sumCalculators.size();i++){  delete sumCalculators[i]; }
387                         m->clearGroups(); 
388                         return 0;
389                 }
390                 /******************************************************/
391         if (subsample) { 
392             if (subsampleSize == -1) { //user has not set size, set size = smallest samples size
393                 subsampleSize = lookup[0]->getNumSeqs();
394                 for (int i = 1; i < lookup.size(); i++) {
395                     int thisSize = lookup[i]->getNumSeqs();
396                     
397                     if (thisSize < subsampleSize) {     subsampleSize = thisSize;       }
398                 }
399             }else {
400                 m->clearGroups();
401                 Groups.clear();
402                 vector<SharedRAbundVector*> temp;
403                 for (int i = 0; i < lookup.size(); i++) {
404                     if (lookup[i]->getNumSeqs() < subsampleSize) { 
405                         m->mothurOut(lookup[i]->getGroup() + " contains " + toString(lookup[i]->getNumSeqs()) + ". Eliminating."); m->mothurOutEndLine();
406                         delete lookup[i];
407                     }else { 
408                         Groups.push_back(lookup[i]->getGroup()); 
409                         temp.push_back(lookup[i]);
410                     }
411                 } 
412                 lookup = temp;
413                 m->setGroups(Groups);
414             }
415             
416             if (lookup.size() < 2) { m->mothurOut("You have not provided enough valid groups.  I cannot run the command."); m->mothurOutEndLine(); m->control_pressed = true; delete input; return 0; }
417         }
418
419                 
420                 /******************************************************/
421                 //comparison breakup to be used by different processes later
422                 numGroups = lookup.size();
423                 lines.resize(processors);
424                 for (int i = 0; i < processors; i++) {
425                         lines[i].start = int (sqrt(float(i)/float(processors)) * numGroups);
426                         lines[i].end = int (sqrt(float(i+1)/float(processors)) * numGroups);
427                 }               
428                 /******************************************************/
429                 
430                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
431                 set<string> processedLabels;
432                 set<string> userLabels = labels;
433                         
434                 //as long as you are not at the end of the file or done wih the lines you want
435                 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
436                         if (m->control_pressed) {
437                                 if (mult) {  m->mothurRemove(outAllFileName);  }
438                                 m->mothurRemove(outputFileName); 
439                                 delete input; 
440                                 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
441                                 for(int i=0;i<sumCalculators.size();i++){  delete sumCalculators[i]; }
442                                 m->clearGroups(); 
443                                 return 0;
444                         }
445
446                 
447                         if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){                  
448                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
449                                 process(lookup, outputFileName, outAllFileName);
450                                 
451                                 processedLabels.insert(lookup[0]->getLabel());
452                                 userLabels.erase(lookup[0]->getLabel());
453                         }
454                         
455                         if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
456                                         string saveLabel = lookup[0]->getLabel();
457                                         
458                                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
459                                         lookup = input->getSharedRAbundVectors(lastLabel);
460
461                                         m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
462                                         process(lookup, outputFileName, outAllFileName);
463                                         
464                                         processedLabels.insert(lookup[0]->getLabel());
465                                         userLabels.erase(lookup[0]->getLabel());
466                                         
467                                         //restore real lastlabel to save below
468                                         lookup[0]->setLabel(saveLabel);
469                         }
470                         
471                         lastLabel = lookup[0]->getLabel();                      
472                                 
473                         //get next line to process
474                         //prevent memory leak
475                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
476                         lookup = input->getSharedRAbundVectors();
477                 }
478                 
479                 if (m->control_pressed) {
480                         if (mult) { m->mothurRemove(outAllFileName);  }
481                         m->mothurRemove(outputFileName); 
482                         delete input; 
483                         for(int i=0;i<sumCalculators.size();i++){  delete sumCalculators[i]; }
484                         m->clearGroups(); 
485                         return 0;
486                 }
487
488                 //output error messages about any remaining user labels
489                 set<string>::iterator it;
490                 bool needToRun = false;
491                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
492                         m->mothurOut("Your file does not include the label " + *it); 
493                         if (processedLabels.count(lastLabel) != 1) {
494                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
495                                 needToRun = true;
496                         }else {
497                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
498                         }
499                 }
500                 
501                 //run last label if you need to
502                 if (needToRun == true)  {
503                                 for (int i = 0; i < lookup.size(); i++) {  if (lookup[i] != NULL) {     delete lookup[i];       } } 
504                                 lookup = input->getSharedRAbundVectors(lastLabel);
505
506                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
507                                 process(lookup, outputFileName, outAllFileName);
508                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
509                 }
510                 
511                                 
512                 //reset groups parameter
513                 m->clearGroups();  
514                 
515                 for(int i=0;i<sumCalculators.size();i++){  delete sumCalculators[i]; }
516                 delete input;  
517                 
518                 if (m->control_pressed) {
519                         m->mothurRemove(outAllFileName);  
520                         m->mothurRemove(outputFileName); 
521                         return 0;
522                 }
523                 
524                 m->mothurOutEndLine();
525                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
526                 m->mothurOut(outputFileName); m->mothurOutEndLine();    
527                 if (mult) { m->mothurOut(outAllFileName); m->mothurOutEndLine();        outputTypes["summary"].push_back(outAllFileName); }
528                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    } outputTypes["summary"].push_back(outputFileName);
529                 m->mothurOutEndLine();
530
531                 return 0;
532         }
533         catch(exception& e) {
534                 m->errorOut(e, "SummarySharedCommand", "execute");
535                 exit(1);
536         }
537 }
538 /***********************************************************/
539 int SummarySharedCommand::printSims(ostream& out, vector< vector<double> >& simMatrix) {
540         try {
541                 
542                 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
543                 
544                 //output num seqs
545                 out << simMatrix.size() << endl;
546                 
547                 if (output == "lt") {
548                         for (int b = 0; b < simMatrix.size(); b++)      {
549                                 out << lookup[b]->getGroup() << '\t';
550                                 for (int n = 0; n < b; n++)     {
551                     if (m->control_pressed) { return 0; }
552                                         out << simMatrix[b][n] << '\t'; 
553                                 }
554                                 out << endl;
555                         }
556                 }else{
557                         for (int b = 0; b < simMatrix.size(); m++)      {
558                                 out << lookup[b]->getGroup() << '\t';
559                                 for (int n = 0; n < simMatrix[b].size(); n++)   {
560                     if (m->control_pressed) { return 0; }
561                                         out << simMatrix[b][n] << '\t'; 
562                                 }
563                                 out << endl;
564                         }
565                 }
566         
567         return 0;
568         }
569         catch(exception& e) {
570                 m->errorOut(e, "SummarySharedCommand", "printSims");
571                 exit(1);
572         }
573 }
574 /***********************************************************/
575 int SummarySharedCommand::process(vector<SharedRAbundVector*> thisLookup, string sumFileName, string sumAllFileName) {
576         try {
577         vector< vector< vector<seqDist> > > calcDistsTotals;  //each iter, one for each calc, then each groupCombos dists. this will be used to make .dist files
578         vector< vector<seqDist>  > calcDists; calcDists.resize(sumCalculators.size());          
579         
580         for (int thisIter = 0; thisIter < iters+1; thisIter++) {
581             
582             vector<SharedRAbundVector*> thisItersLookup = thisLookup;
583             
584             if (subsample && (thisIter != 0)) { //we want the summary results for the whole dataset, then the subsampling
585                 SubSample sample;
586                 vector<string> tempLabels; //dont need since we arent printing the sampled sharedRabunds
587                 
588                 //make copy of lookup so we don't get access violations
589                 vector<SharedRAbundVector*> newLookup;
590                 for (int k = 0; k < thisItersLookup.size(); k++) {
591                     SharedRAbundVector* temp = new SharedRAbundVector();
592                     temp->setLabel(thisItersLookup[k]->getLabel());
593                     temp->setGroup(thisItersLookup[k]->getGroup());
594                     newLookup.push_back(temp);
595                 }
596                 
597                 //for each bin
598                 for (int k = 0; k < thisItersLookup[0]->getNumBins(); k++) {
599                     if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
600                     for (int j = 0; j < thisItersLookup.size(); j++) { newLookup[j]->push_back(thisItersLookup[j]->getAbundance(k), thisItersLookup[j]->getGroup()); }
601                 }
602                 
603                 tempLabels = sample.getSample(newLookup, subsampleSize);
604                 thisItersLookup = newLookup;
605             }
606         
607             
608             if(processors == 1){
609                 driver(thisLookup, 0, numGroups, sumFileName+".temp", sumAllFileName+".temp", calcDists);
610                 m->appendFiles((sumFileName + ".temp"), sumFileName);
611                 m->mothurRemove((sumFileName + ".temp"));
612                 if (mult) {
613                     m->appendFiles((sumAllFileName + ".temp"), sumAllFileName);
614                     m->mothurRemove((sumAllFileName + ".temp"));
615                 }
616             }else{
617                 
618                 int process = 1;
619                 vector<int> processIDS;
620                 
621 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
622                 //loop through and create all the processes you want
623                 while (process != processors) {
624                     int pid = fork();
625                     
626                     if (pid > 0) {
627                         processIDS.push_back(pid); 
628                         process++;
629                     }else if (pid == 0){
630                         driver(thisLookup, lines[process].start, lines[process].end, sumFileName + toString(getpid()) + ".temp", sumAllFileName + toString(getpid()) + ".temp", calcDists);   
631                         
632                         //only do this if you want a distance file
633                         if (createPhylip) {
634                             string tempdistFileName = m->getRootName(m->getSimpleName(sumFileName)) + toString(getpid()) + ".dist";
635                             ofstream outtemp;
636                             m->openOutputFile(tempdistFileName, outtemp);
637                             
638                             for (int i = 0; i < calcDists.size(); i++) {
639                                 outtemp << calcDists[i].size() << endl;
640                                 
641                                 for (int j = 0; j < calcDists[i].size(); j++) {
642                                     outtemp << calcDists[i][j].seq1 << '\t' << calcDists[i][j].seq2 << '\t' << calcDists[i][j].dist << endl;
643                                 }
644                             }
645                             outtemp.close();
646                         }
647                         
648                         exit(0);
649                     }else { 
650                         m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
651                         for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
652                         exit(0);
653                     }
654                 }
655                 
656                 //parent do your part
657                 driver(thisLookup, lines[0].start, lines[0].end, sumFileName + toString(getpid()) + ".temp", sumAllFileName + toString(getpid()) + ".temp", calcDists);   
658                 m->appendFiles((sumFileName + toString(getpid()) + ".temp"), sumFileName);
659                 m->mothurRemove((sumFileName + toString(getpid()) + ".temp"));
660                 if (mult) { m->appendFiles((sumAllFileName + toString(getpid()) + ".temp"), sumAllFileName); }
661                 
662                 //force parent to wait until all the processes are done
663                 for (int i = 0; i < processIDS.size(); i++) {
664                     int temp = processIDS[i];
665                     wait(&temp);
666                 }
667                 
668                 for (int i = 0; i < processIDS.size(); i++) {
669                     m->appendFiles((sumFileName + toString(processIDS[i]) + ".temp"), sumFileName);
670                     m->mothurRemove((sumFileName + toString(processIDS[i]) + ".temp"));
671                     if (mult) { m->mothurRemove((sumAllFileName + toString(processIDS[i]) + ".temp"));  }
672                     
673                     if (createPhylip) {
674                         string tempdistFileName = m->getRootName(m->getSimpleName(sumFileName)) + toString(processIDS[i]) +  ".dist";
675                         ifstream intemp;
676                         m->openInputFile(tempdistFileName, intemp);
677                         
678                         for (int k = 0; k < calcDists.size(); k++) {
679                             int size = 0;
680                             intemp >> size; m->gobble(intemp);
681                             
682                             for (int j = 0; j < size; j++) {
683                                 int seq1 = 0;
684                                 int seq2 = 0;
685                                 float dist = 1.0;
686                                 
687                                 intemp >> seq1 >> seq2 >> dist;   m->gobble(intemp);
688                                 
689                                 seqDist tempDist(seq1, seq2, dist);
690                                 calcDists[k].push_back(tempDist);
691                             }
692                         }
693                         intemp.close();
694                         m->mothurRemove(tempdistFileName);
695                     }
696                 }
697 #else
698                 //////////////////////////////////////////////////////////////////////////////////////////////////////
699                 //Windows version shared memory, so be careful when passing variables through the summarySharedData struct. 
700                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
701                 //Taking advantage of shared memory to pass results vectors.
702                 //////////////////////////////////////////////////////////////////////////////////////////////////////
703                 
704                 vector<summarySharedData*> pDataArray; 
705                 DWORD   dwThreadIdArray[processors-1];
706                 HANDLE  hThreadArray[processors-1]; 
707                 
708                 //Create processor worker threads.
709                 for( int i=1; i<processors; i++ ){
710                     
711                     //make copy of lookup so we don't get access violations
712                     vector<SharedRAbundVector*> newLookup;
713                     for (int k = 0; k < thisLookup.size(); k++) {
714                         SharedRAbundVector* temp = new SharedRAbundVector();
715                         temp->setLabel(thisLookup[k]->getLabel());
716                         temp->setGroup(thisLookup[k]->getGroup());
717                         newLookup.push_back(temp);
718                     }
719                     
720                     //for each bin
721                     for (int k = 0; k < thisLookup[0]->getNumBins(); k++) {
722                         if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
723                         for (int j = 0; j < thisLookup.size(); j++) { newLookup[j]->push_back(thisLookup[j]->getAbundance(k), thisLookup[j]->getGroup()); }
724                     }
725                     
726                     // Allocate memory for thread data.
727                     summarySharedData* tempSum = new summarySharedData((sumFileName+toString(i)+".temp"), m, lines[i].start, lines[i].end, Estimators, newLookup);
728                     pDataArray.push_back(tempSum);
729                     processIDS.push_back(i);
730                     
731                     hThreadArray[i-1] = CreateThread(NULL, 0, MySummarySharedThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);   
732                 }
733                 
734                 //parent do your part
735                 driver(thisLookup, lines[0].start, lines[0].end, sumFileName +"0.temp", sumAllFileName + "0.temp", calcDists);   
736                 m->appendFiles((sumFileName + "0.temp"), sumFileName);
737                 m->mothurRemove((sumFileName + "0.temp"));
738                 if (mult) { m->appendFiles((sumAllFileName + "0.temp"), sumAllFileName); }
739                 
740                 //Wait until all threads have terminated.
741                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
742                 
743                 //Close all thread handles and free memory allocations.
744                 for(int i=0; i < pDataArray.size(); i++){
745                     m->appendFiles((sumFileName + toString(processIDS[i]) + ".temp"), sumFileName);
746                     m->mothurRemove((sumFileName + toString(processIDS[i]) + ".temp"));
747                     
748                     for (int j = 0; j < pDataArray[i]->thisLookup.size(); j++) {  delete pDataArray[i]->thisLookup[j];  } 
749                     
750                     if (createPhylip) {
751                         for (int k = 0; k < calcDists.size(); k++) {
752                             int size = pDataArray[i]->calcDists[k].size();
753                             for (int j = 0; j < size; j++) {    calcDists[k].push_back(pDataArray[i]->calcDists[k][j]);    }
754                         }
755                     }
756                     
757                     CloseHandle(hThreadArray[i]);
758                     delete pDataArray[i];
759                 }
760                 
761 #endif
762             }
763             
764             if (subsample && (thisIter != 0)) { //we want the summary results for the whole dataset, then the subsampling
765                 
766                 calcDistsTotals.push_back(calcDists); 
767                 //clean up memory
768                 for (int i = 0; i < thisItersLookup.size(); i++) { delete thisItersLookup[i]; }
769                 thisItersLookup.clear();
770             }else {
771                 if (createPhylip) {
772                     for (int i = 0; i < calcDists.size(); i++) {
773                         if (m->control_pressed) { break; }
774                         
775                         //initialize matrix
776                         vector< vector<double> > matrix; //square matrix to represent the distance
777                         matrix.resize(thisLookup.size());
778                         for (int k = 0; k < thisLookup.size(); k++) {  matrix[k].resize(thisLookup.size(), 0.0); }
779                         
780                         for (int j = 0; j < calcDists[i].size(); j++) {
781                             int row = calcDists[i][j].seq1;
782                             int column = calcDists[i][j].seq2;
783                             double dist = calcDists[i][j].dist;
784                             
785                             matrix[row][column] = dist;
786                             matrix[column][row] = dist;
787                         }
788                         
789                         string distFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + sumCalculators[i]->getName() + "." + thisLookup[0]->getLabel()  + "." + output + "." + getOutputFileNameTag("phylip");
790                         outputNames.push_back(distFileName); outputTypes["phylip"].push_back(distFileName);
791                         ofstream outDist;
792                         m->openOutputFile(distFileName, outDist);
793                         outDist.setf(ios::fixed, ios::floatfield); outDist.setf(ios::showpoint);
794                         
795                         printSims(outDist, matrix);
796                         
797                         outDist.close();
798                     }
799                 }
800             }
801             for (int i = 0; i < calcDists.size(); i++) {  calcDists[i].clear(); }
802                 }
803
804         if (iters != 1) {
805             //we need to find the average distance and standard deviation for each groups distance
806             
807             vector< vector<seqDist>  > calcAverages; calcAverages.resize(sumCalculators.size()); 
808             for (int i = 0; i < calcAverages.size(); i++) {  //initialize sums to zero.
809                 calcAverages[i].resize(calcDistsTotals[0][i].size());
810                 
811                 for (int j = 0; j < calcAverages[i].size(); j++) {
812                     calcAverages[i][j].seq1 = calcDists[i][j].seq1;
813                     calcAverages[i][j].seq2 = calcDists[i][j].seq2;
814                     calcAverages[i][j].dist = 0.0;
815                 }
816             }
817             
818             for (int thisIter = 0; thisIter < iters; thisIter++) { //sum all groups dists for each calculator
819                 for (int i = 0; i < calcAverages.size(); i++) {  //initialize sums to zero.
820                     for (int j = 0; j < calcAverages[i].size(); j++) {
821                         calcAverages[i][j].dist += calcDistsTotals[thisIter][i][j].dist;
822                     }
823                 }
824             }
825             
826             for (int i = 0; i < calcAverages.size(); i++) {  //finds average.
827                 for (int j = 0; j < calcAverages[i].size(); j++) {
828                     calcAverages[i][j].dist /= (float) iters;
829                 }
830             }
831             
832             //find standard deviation
833             vector< vector<seqDist>  > stdDev; stdDev.resize(sumCalculators.size());
834             for (int i = 0; i < stdDev.size(); i++) {  //initialize sums to zero.
835                 stdDev[i].resize(calcDistsTotals[0][i].size());
836                 
837                 for (int j = 0; j < stdDev[i].size(); j++) {
838                     stdDev[i][j].seq1 = calcDists[i][j].seq1;
839                     stdDev[i][j].seq2 = calcDists[i][j].seq2;
840                     stdDev[i][j].dist = 0.0;
841                 }
842             }
843             
844             for (int thisIter = 0; thisIter < iters; thisIter++) { //compute the difference of each dist from the mean, and square the result of each
845                 for (int i = 0; i < stdDev.size(); i++) {  
846                     for (int j = 0; j < stdDev[i].size(); j++) {
847                         stdDev[i][j].dist += ((calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist) * (calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist));
848                     }
849                 }
850             }
851             
852             for (int i = 0; i < stdDev.size(); i++) {  //finds average.
853                 for (int j = 0; j < stdDev[i].size(); j++) {
854                     stdDev[i][j].dist /= (float) iters;
855                     stdDev[i][j].dist = sqrt(stdDev[i][j].dist);
856                 }
857             }
858             
859             //print results
860             for (int i = 0; i < calcDists.size(); i++) {
861                 vector< vector<double> > matrix; //square matrix to represent the distance
862                 matrix.resize(thisLookup.size());
863                 for (int k = 0; k < thisLookup.size(); k++) {  matrix[k].resize(thisLookup.size(), 0.0); }
864                 
865                 vector< vector<double> > stdmatrix; //square matrix to represent the stdDev
866                 stdmatrix.resize(thisLookup.size());
867                 for (int k = 0; k < thisLookup.size(); k++) {  stdmatrix[k].resize(thisLookup.size(), 0.0); }
868                 
869                 
870                 for (int j = 0; j < calcAverages[i].size(); j++) {
871                     int row = calcAverages[i][j].seq1;
872                     int column = calcAverages[i][j].seq2;
873                     float dist = calcAverages[i][j].dist;
874                     float stdDist = stdDev[i][j].dist;
875                     
876                     matrix[row][column] = dist;
877                     matrix[column][row] = dist;
878                     stdmatrix[row][column] = stdDist;
879                     stdmatrix[column][row] = stdDist;
880                 }
881                 
882                 string distFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + sumCalculators[i]->getName() + "." + thisLookup[0]->getLabel()  + "." + output + ".ave." + getOutputFileNameTag("phylip");
883                 outputNames.push_back(distFileName); outputTypes["phylip"].push_back(distFileName);
884                 ofstream outAve;
885                 m->openOutputFile(distFileName, outAve);
886                 outAve.setf(ios::fixed, ios::floatfield); outAve.setf(ios::showpoint);
887                 
888                 printSims(outAve, matrix);
889                 
890                 outAve.close();
891                 
892                 distFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + sumCalculators[i]->getName() + "." + thisLookup[0]->getLabel()  + "." + output + ".std." + getOutputFileNameTag("phylip");
893                 outputNames.push_back(distFileName); outputTypes["phylip"].push_back(distFileName);
894                 ofstream outSTD;
895                 m->openOutputFile(distFileName, outSTD);
896                 outSTD.setf(ios::fixed, ios::floatfield); outSTD.setf(ios::showpoint);
897                 
898                 printSims(outSTD, stdmatrix);
899                 
900                 outSTD.close();
901                 
902             }
903         }
904         
905         return 0;
906         }
907         catch(exception& e) {
908                 m->errorOut(e, "SummarySharedCommand", "process");
909                 exit(1);
910         }
911 }
912 /**************************************************************************************************/
913 int SummarySharedCommand::driver(vector<SharedRAbundVector*> thisLookup, int start, int end, string sumFile, string sumAllFile, vector< vector<seqDist> >& calcDists) { 
914         try {
915                 
916                 //loop through calculators and add to file all for all calcs that can do mutiple groups
917                 if (mult == true) {
918                         ofstream outAll;
919                         m->openOutputFile(sumAllFile, outAll);
920                         
921                         //output label
922                         outAll << thisLookup[0]->getLabel() << '\t';
923                         
924                         //output groups names
925                         string outNames = "";
926                         for (int j = 0; j < thisLookup.size(); j++) {
927                                 outNames += thisLookup[j]->getGroup() +  "-";
928                         }
929                         outNames = outNames.substr(0, outNames.length()-1); //rip off extra '-';
930                         outAll << outNames << '\t';
931                         
932                         for(int i=0;i<sumCalculators.size();i++){
933                                 if (sumCalculators[i]->getMultiple() == true) { 
934                                         sumCalculators[i]->getValues(thisLookup);
935                                         
936                                         if (m->control_pressed) { outAll.close(); return 1; }
937                                         
938                                         outAll << '\t';
939                                         sumCalculators[i]->print(outAll);
940                                 }
941                         }
942                         outAll << endl;
943                         outAll.close();
944                 }
945                 
946                 ofstream outputFileHandle;
947                 m->openOutputFile(sumFile, outputFileHandle);
948                 
949                 vector<SharedRAbundVector*> subset;
950                 for (int k = start; k < end; k++) { // pass cdd each set of groups to compare
951
952                         for (int l = 0; l < k; l++) {
953                                 
954                                 outputFileHandle << thisLookup[0]->getLabel() << '\t';
955                                 
956                                 subset.clear(); //clear out old pair of sharedrabunds
957                                 //add new pair of sharedrabunds
958                                 subset.push_back(thisLookup[k]); subset.push_back(thisLookup[l]); 
959                                 
960                                 //sort groups to be alphanumeric
961                                 if (thisLookup[k]->getGroup() > thisLookup[l]->getGroup()) {
962                                         outputFileHandle << (thisLookup[l]->getGroup() +'\t' + thisLookup[k]->getGroup()) << '\t'; //print out groups
963                                 }else{
964                                         outputFileHandle << (thisLookup[k]->getGroup() +'\t' + thisLookup[l]->getGroup()) << '\t'; //print out groups
965                                 }
966                                 
967                                 for(int i=0;i<sumCalculators.size();i++) {
968                                         
969                                         //if this calc needs all groups to calculate the pair load all groups
970                                         if (sumCalculators[i]->getNeedsAll()) { 
971                                                 //load subset with rest of lookup for those calcs that need everyone to calc for a pair
972                                                 for (int w = 0; w < thisLookup.size(); w++) {
973                                                         if ((w != k) && (w != l)) { subset.push_back(thisLookup[w]); }
974                                                 }
975                                         }
976                                         
977                                         vector<double> tempdata = sumCalculators[i]->getValues(subset); //saves the calculator outputs
978                                         
979                                         if (m->control_pressed) { outputFileHandle.close(); return 1; }
980                                         
981                                         outputFileHandle << '\t';
982                                         sumCalculators[i]->print(outputFileHandle);
983                                         
984                                         seqDist temp(l, k, tempdata[0]);
985                                         calcDists[i].push_back(temp);
986                                 }
987                                 outputFileHandle << endl;
988                         }
989                 }
990                 
991                 outputFileHandle.close();
992                 
993                 return 0;
994         }
995         catch(exception& e) {
996                 m->errorOut(e, "SummarySharedCommand", "driver");
997                 exit(1);
998         }
999 }
1000 /**************************************************************************************************/
1001
1002