2 * summarysharedcommand.cpp
5 * Created by Sarah Westcott on 1/2/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "summarysharedcommand.h"
11 #include "subsample.h"
13 //**********************************************************************************************************************
14 vector<string> SummarySharedCommand::setParameters(){
16 CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pshared);
17 CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
18 CommandParameter psubsample("subsample", "String", "", "", "", "", "",false,false); parameters.push_back(psubsample);
19 CommandParameter pdistance("distance", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pdistance);
20 CommandParameter pcalc("calc", "Multiple", "sharedchao-sharedsobs-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan-kstest-whittaker-sharednseqs-ochiai-anderberg-kulczynski-kulczynskicody-lennon-morisitahorn-braycurtis-odum-canberra-structeuclidean-structchord-hellinger-manhattan-structpearson-soergel-spearman-structkulczynski-speciesprofile-structchi2-hamming-gower-memchi2-memchord-memeuclidean-mempearson", "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan", "", "", "",true,false); parameters.push_back(pcalc);
21 CommandParameter poutput("output", "Multiple", "lt-square", "lt", "", "", "",false,false); parameters.push_back(poutput);
22 CommandParameter pall("all", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pall);
23 CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
24 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
25 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
26 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
27 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
29 vector<string> myArray;
30 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
34 m->errorOut(e, "SummarySharedCommand", "setParameters");
38 //**********************************************************************************************************************
39 string SummarySharedCommand::getHelpString(){
41 string helpString = "";
42 ValidCalculators validCalculator;
43 helpString += "The summary.shared command parameters are shared, label, calc, distance, processors, subsample, iters and all. shared is required if there is no current sharedfile.\n";
44 helpString += "The summary.shared command should be in the following format: \n";
45 helpString += "summary.shared(label=yourLabel, calc=yourEstimators, groups=yourGroups).\n";
46 helpString += "Example summary.shared(label=unique-.01-.03, groups=B-C, calc=sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan).\n";
47 helpString += validCalculator.printCalc("sharedsummary");
48 helpString += "The iters parameter allows you to choose the number of times you would like to run the subsample.\n";
49 helpString += "The subsample parameter allows you to enter the size pergroup of the sample or you can set subsample=T and mothur will use the size of your smallest group.\n";
50 helpString += "The output parameter allows you to specify format of your distance matrix. Options are lt, and square. The default is lt.\n";
51 helpString += "The default value for calc is sharedsobs-sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan\n";
52 helpString += "The default value for groups is all the groups in your groupfile.\n";
53 helpString += "The distance parameter allows you to indicate you would like a distance file created for each calculator for each label, default=f.\n";
54 helpString += "The label parameter is used to analyze specific labels in your input.\n";
55 helpString += "The all parameter is used to specify if you want the estimate of all your groups together. This estimate can only be made for sharedsobs and sharedchao calculators. The default is false.\n";
56 helpString += "If you use sharedchao and run into memory issues, set all to false. \n";
57 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 2 valid groups.\n";
58 helpString += "Note: No spaces between parameter labels (i.e. label), '=' and parameters (i.e.yourLabel).\n";
62 m->errorOut(e, "SummarySharedCommand", "getHelpString");
66 //**********************************************************************************************************************
67 SummarySharedCommand::SummarySharedCommand(){
69 abort = true; calledHelp = true;
71 vector<string> tempOutNames;
72 outputTypes["summary"] = tempOutNames;
75 m->errorOut(e, "SummarySharedCommand", "SummarySharedCommand");
79 //**********************************************************************************************************************
81 SummarySharedCommand::SummarySharedCommand(string option) {
83 abort = false; calledHelp = false;
86 //allow user to run help
87 if(option == "help") { help(); abort = true; calledHelp = true; }
88 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
91 vector<string> myArray = setParameters();
93 OptionParser parser(option);
94 map<string, string> parameters = parser.getParameters();
95 map<string, string>::iterator it;
97 ValidParameters validParameter;
99 //check to make sure all parameters are valid for command
100 for (map<string, string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
101 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
104 //initialize outputTypes
105 vector<string> tempOutNames;
106 outputTypes["summary"] = tempOutNames;
108 //if the user changes the input directory command factory will send this info to us in the output parameter
109 string inputDir = validParameter.validFile(parameters, "inputdir", false);
110 if (inputDir == "not found"){ inputDir = ""; }
113 it = parameters.find("shared");
114 //user has given a template file
115 if(it != parameters.end()){
116 path = m->hasPath(it->second);
117 //if the user has not given a path then, add inputdir. else leave path alone.
118 if (path == "") { parameters["shared"] = inputDir + it->second; }
123 sharedfile = validParameter.validFile(parameters, "shared", true);
124 if (sharedfile == "not open") { sharedfile = ""; abort = true; }
125 else if (sharedfile == "not found") {
126 //if there is a current shared file, use it
127 sharedfile = m->getSharedFile();
128 if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
129 else { m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; }
130 }else { m->setSharedFile(sharedfile); }
133 //if the user changes the output directory command factory will send this info to us in the output parameter
134 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = m->hasPath(sharedfile); }
137 //check for optional parameter and set defaults
138 // ...at some point should added some additional type checking...
139 label = validParameter.validFile(parameters, "label", false);
140 if (label == "not found") { label = ""; }
142 if(label != "all") { m->splitAtDash(label, labels); allLines = 0; }
143 else { allLines = 1; }
147 calc = validParameter.validFile(parameters, "calc", false);
148 if (calc == "not found") { calc = "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan"; }
150 if (calc == "default") { calc = "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan"; }
152 m->splitAtDash(calc, Estimators);
153 if (m->inUsersGroups("citation", Estimators)) {
154 ValidCalculators validCalc; validCalc.printCitations(Estimators);
155 //remove citation from list of calcs
156 for (int i = 0; i < Estimators.size(); i++) { if (Estimators[i] == "citation") { Estimators.erase(Estimators.begin()+i); break; } }
159 groups = validParameter.validFile(parameters, "groups", false);
160 if (groups == "not found") { groups = ""; }
162 m->splitAtDash(groups, Groups);
163 m->setGroups(Groups);
166 string temp = validParameter.validFile(parameters, "all", false); if (temp == "not found") { temp = "false"; }
167 all = m->isTrue(temp);
169 temp = validParameter.validFile(parameters, "distance", false); if (temp == "not found") { temp = "false"; }
170 createPhylip = m->isTrue(temp);
172 temp = validParameter.validFile(parameters, "iters", false); if (temp == "not found") { temp = "1000"; }
173 m->mothurConvert(temp, iters);
175 output = validParameter.validFile(parameters, "output", false); if(output == "not found"){ output = "lt"; }
176 if ((output != "lt") && (output != "square")) { m->mothurOut(output + " is not a valid output form. Options are lt and square. I will use lt."); m->mothurOutEndLine(); output = "lt"; }
178 temp = validParameter.validFile(parameters, "subsample", false); if (temp == "not found") { temp = "F"; }
179 if (m->isNumeric1(temp)) { m->mothurConvert(temp, subsampleSize); subsample = true; }
181 if (m->isTrue(temp)) { subsample = true; subsampleSize = -1; } //we will set it to smallest group later
182 else { subsample = false; }
185 if (subsample == false) { iters = 1; }
187 temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
188 m->setProcessors(temp);
189 m->mothurConvert(temp, processors);
191 if (abort == false) {
193 ValidCalculators validCalculator;
196 for (i=0; i<Estimators.size(); i++) {
197 if (validCalculator.isValidCalculator("sharedsummary", Estimators[i]) == true) {
198 if (Estimators[i] == "sharedsobs") {
199 sumCalculators.push_back(new SharedSobsCS());
200 }else if (Estimators[i] == "sharedchao") {
201 sumCalculators.push_back(new SharedChao1());
202 }else if (Estimators[i] == "sharedace") {
203 sumCalculators.push_back(new SharedAce());
204 }else if (Estimators[i] == "jabund") {
205 sumCalculators.push_back(new JAbund());
206 }else if (Estimators[i] == "sorabund") {
207 sumCalculators.push_back(new SorAbund());
208 }else if (Estimators[i] == "jclass") {
209 sumCalculators.push_back(new Jclass());
210 }else if (Estimators[i] == "sorclass") {
211 sumCalculators.push_back(new SorClass());
212 }else if (Estimators[i] == "jest") {
213 sumCalculators.push_back(new Jest());
214 }else if (Estimators[i] == "sorest") {
215 sumCalculators.push_back(new SorEst());
216 }else if (Estimators[i] == "thetayc") {
217 sumCalculators.push_back(new ThetaYC());
218 }else if (Estimators[i] == "thetan") {
219 sumCalculators.push_back(new ThetaN());
220 }else if (Estimators[i] == "kstest") {
221 sumCalculators.push_back(new KSTest());
222 }else if (Estimators[i] == "sharednseqs") {
223 sumCalculators.push_back(new SharedNSeqs());
224 }else if (Estimators[i] == "ochiai") {
225 sumCalculators.push_back(new Ochiai());
226 }else if (Estimators[i] == "anderberg") {
227 sumCalculators.push_back(new Anderberg());
228 }else if (Estimators[i] == "kulczynski") {
229 sumCalculators.push_back(new Kulczynski());
230 }else if (Estimators[i] == "kulczynskicody") {
231 sumCalculators.push_back(new KulczynskiCody());
232 }else if (Estimators[i] == "lennon") {
233 sumCalculators.push_back(new Lennon());
234 }else if (Estimators[i] == "morisitahorn") {
235 sumCalculators.push_back(new MorHorn());
236 }else if (Estimators[i] == "braycurtis") {
237 sumCalculators.push_back(new BrayCurtis());
238 }else if (Estimators[i] == "whittaker") {
239 sumCalculators.push_back(new Whittaker());
240 }else if (Estimators[i] == "odum") {
241 sumCalculators.push_back(new Odum());
242 }else if (Estimators[i] == "canberra") {
243 sumCalculators.push_back(new Canberra());
244 }else if (Estimators[i] == "structeuclidean") {
245 sumCalculators.push_back(new StructEuclidean());
246 }else if (Estimators[i] == "structchord") {
247 sumCalculators.push_back(new StructChord());
248 }else if (Estimators[i] == "hellinger") {
249 sumCalculators.push_back(new Hellinger());
250 }else if (Estimators[i] == "manhattan") {
251 sumCalculators.push_back(new Manhattan());
252 }else if (Estimators[i] == "structpearson") {
253 sumCalculators.push_back(new StructPearson());
254 }else if (Estimators[i] == "soergel") {
255 sumCalculators.push_back(new Soergel());
256 }else if (Estimators[i] == "spearman") {
257 sumCalculators.push_back(new Spearman());
258 }else if (Estimators[i] == "structkulczynski") {
259 sumCalculators.push_back(new StructKulczynski());
260 }else if (Estimators[i] == "speciesprofile") {
261 sumCalculators.push_back(new SpeciesProfile());
262 }else if (Estimators[i] == "hamming") {
263 sumCalculators.push_back(new Hamming());
264 }else if (Estimators[i] == "structchi2") {
265 sumCalculators.push_back(new StructChi2());
266 }else if (Estimators[i] == "gower") {
267 sumCalculators.push_back(new Gower());
268 }else if (Estimators[i] == "memchi2") {
269 sumCalculators.push_back(new MemChi2());
270 }else if (Estimators[i] == "memchord") {
271 sumCalculators.push_back(new MemChord());
272 }else if (Estimators[i] == "memeuclidean") {
273 sumCalculators.push_back(new MemEuclidean());
274 }else if (Estimators[i] == "mempearson") {
275 sumCalculators.push_back(new MemPearson());
284 catch(exception& e) {
285 m->errorOut(e, "SummarySharedCommand", "SummarySharedCommand");
289 //**********************************************************************************************************************
291 int SummarySharedCommand::execute(){
294 if (abort == true) { if (calledHelp) { return 0; } return 2; }
296 ofstream outputFileHandle, outAll;
297 string outputFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + "shared.summary";
299 //if the users entered no valid calculators don't execute command
300 if (sumCalculators.size() == 0) { return 0; }
301 //check if any calcs can do multiples
304 for (int i = 0; i < sumCalculators.size(); i++) {
305 if (sumCalculators[i]->getMultiple() == true) { mult = true; }
310 input = new InputData(sharedfile, "sharedfile");
311 lookup = input->getSharedRAbundVectors();
312 string lastLabel = lookup[0]->getLabel();
314 /******************************************************/
315 //output headings for files
316 /******************************************************/
317 //output estimator names as column headers
318 m->openOutputFile(outputFileName, outputFileHandle);
319 outputFileHandle << "label" <<'\t' << "comparison" << '\t';
320 for(int i=0;i<sumCalculators.size();i++){
321 outputFileHandle << '\t' << sumCalculators[i]->getName();
322 if (sumCalculators[i]->getCols() == 3) { outputFileHandle << "\t" << sumCalculators[i]->getName() << "_lci\t" << sumCalculators[i]->getName() << "_hci"; }
324 outputFileHandle << endl;
325 outputFileHandle.close();
327 //create file and put column headers for multiple groups file
328 string outAllFileName = ((m->getRootName(sharedfile)) + "sharedmultiple.summary");
330 m->openOutputFile(outAllFileName, outAll);
331 outputNames.push_back(outAllFileName);
333 outAll << "label" <<'\t' << "comparison" << '\t';
334 for(int i=0;i<sumCalculators.size();i++){
335 if (sumCalculators[i]->getMultiple() == true) {
336 outAll << '\t' << sumCalculators[i]->getName();
343 if (lookup.size() < 2) {
344 m->mothurOut("I cannot run the command without at least 2 valid groups.");
345 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
347 //close files and clean up
348 m->mothurRemove(outputFileName);
349 if (mult == true) { m->mothurRemove(outAllFileName); }
351 //if you only have 2 groups you don't need a .sharedmultiple file
352 }else if ((lookup.size() == 2) && (mult == true)) {
354 m->mothurRemove(outAllFileName);
355 outputNames.pop_back();
358 if (m->control_pressed) {
359 if (mult) { m->mothurRemove(outAllFileName); }
360 m->mothurRemove(outputFileName);
362 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
363 for(int i=0;i<sumCalculators.size();i++){ delete sumCalculators[i]; }
367 /******************************************************/
369 if (subsampleSize == -1) { //user has not set size, set size = smallest samples size
370 subsampleSize = lookup[0]->getNumSeqs();
371 for (int i = 1; i < lookup.size(); i++) {
372 int thisSize = lookup[i]->getNumSeqs();
374 if (thisSize < subsampleSize) { subsampleSize = thisSize; }
379 vector<SharedRAbundVector*> temp;
380 for (int i = 0; i < lookup.size(); i++) {
381 if (lookup[i]->getNumSeqs() < subsampleSize) {
382 m->mothurOut(lookup[i]->getGroup() + " contains " + toString(lookup[i]->getNumSeqs()) + ". Eliminating."); m->mothurOutEndLine();
385 Groups.push_back(lookup[i]->getGroup());
386 temp.push_back(lookup[i]);
390 m->setGroups(Groups);
393 if (lookup.size() < 2) { m->mothurOut("You have not provided enough valid groups. I cannot run the command."); m->mothurOutEndLine(); m->control_pressed = true; delete input; return 0; }
397 /******************************************************/
398 //comparison breakup to be used by different processes later
399 numGroups = lookup.size();
400 lines.resize(processors);
401 for (int i = 0; i < processors; i++) {
402 lines[i].start = int (sqrt(float(i)/float(processors)) * numGroups);
403 lines[i].end = int (sqrt(float(i+1)/float(processors)) * numGroups);
405 /******************************************************/
407 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
408 set<string> processedLabels;
409 set<string> userLabels = labels;
411 //as long as you are not at the end of the file or done wih the lines you want
412 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
413 if (m->control_pressed) {
414 if (mult) { m->mothurRemove(outAllFileName); }
415 m->mothurRemove(outputFileName);
417 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
418 for(int i=0;i<sumCalculators.size();i++){ delete sumCalculators[i]; }
424 if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){
425 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
426 process(lookup, outputFileName, outAllFileName);
428 processedLabels.insert(lookup[0]->getLabel());
429 userLabels.erase(lookup[0]->getLabel());
432 if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
433 string saveLabel = lookup[0]->getLabel();
435 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
436 lookup = input->getSharedRAbundVectors(lastLabel);
438 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
439 process(lookup, outputFileName, outAllFileName);
441 processedLabels.insert(lookup[0]->getLabel());
442 userLabels.erase(lookup[0]->getLabel());
444 //restore real lastlabel to save below
445 lookup[0]->setLabel(saveLabel);
448 lastLabel = lookup[0]->getLabel();
450 //get next line to process
451 //prevent memory leak
452 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
453 lookup = input->getSharedRAbundVectors();
456 if (m->control_pressed) {
457 if (mult) { m->mothurRemove(outAllFileName); }
458 m->mothurRemove(outputFileName);
460 for(int i=0;i<sumCalculators.size();i++){ delete sumCalculators[i]; }
465 //output error messages about any remaining user labels
466 set<string>::iterator it;
467 bool needToRun = false;
468 for (it = userLabels.begin(); it != userLabels.end(); it++) {
469 m->mothurOut("Your file does not include the label " + *it);
470 if (processedLabels.count(lastLabel) != 1) {
471 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
474 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
478 //run last label if you need to
479 if (needToRun == true) {
480 for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }
481 lookup = input->getSharedRAbundVectors(lastLabel);
483 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
484 process(lookup, outputFileName, outAllFileName);
485 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
489 //reset groups parameter
492 for(int i=0;i<sumCalculators.size();i++){ delete sumCalculators[i]; }
495 if (m->control_pressed) {
496 m->mothurRemove(outAllFileName);
497 m->mothurRemove(outputFileName);
501 m->mothurOutEndLine();
502 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
503 m->mothurOut(outputFileName); m->mothurOutEndLine();
504 if (mult) { m->mothurOut(outAllFileName); m->mothurOutEndLine(); outputTypes["summary"].push_back(outAllFileName); }
505 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } outputTypes["summary"].push_back(outputFileName);
506 m->mothurOutEndLine();
510 catch(exception& e) {
511 m->errorOut(e, "SummarySharedCommand", "execute");
515 /***********************************************************/
516 int SummarySharedCommand::printSims(ostream& out, vector< vector<double> >& simMatrix) {
519 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
522 out << simMatrix.size() << endl;
524 if (output == "lt") {
525 for (int b = 0; b < simMatrix.size(); b++) {
526 out << lookup[b]->getGroup() << '\t';
527 for (int n = 0; n < b; n++) {
528 if (m->control_pressed) { return 0; }
529 out << simMatrix[b][n] << '\t';
534 for (int b = 0; b < simMatrix.size(); m++) {
535 out << lookup[b]->getGroup() << '\t';
536 for (int n = 0; n < simMatrix[b].size(); n++) {
537 if (m->control_pressed) { return 0; }
538 out << simMatrix[b][n] << '\t';
546 catch(exception& e) {
547 m->errorOut(e, "SummarySharedCommand", "printSims");
551 /***********************************************************/
552 int SummarySharedCommand::process(vector<SharedRAbundVector*> thisLookup, string sumFileName, string sumAllFileName) {
554 vector< vector< vector<seqDist> > > calcDistsTotals; //each iter, one for each calc, then each groupCombos dists. this will be used to make .dist files
555 vector< vector<seqDist> > calcDists; calcDists.resize(sumCalculators.size());
557 for (int thisIter = 0; thisIter < iters+1; thisIter++) {
559 vector<SharedRAbundVector*> thisItersLookup = thisLookup;
561 if (subsample && (thisIter != 0)) { //we want the summary results for the whole dataset, then the subsampling
563 vector<string> tempLabels; //dont need since we arent printing the sampled sharedRabunds
565 //make copy of lookup so we don't get access violations
566 vector<SharedRAbundVector*> newLookup;
567 for (int k = 0; k < thisItersLookup.size(); k++) {
568 SharedRAbundVector* temp = new SharedRAbundVector();
569 temp->setLabel(thisItersLookup[k]->getLabel());
570 temp->setGroup(thisItersLookup[k]->getGroup());
571 newLookup.push_back(temp);
575 for (int k = 0; k < thisItersLookup[0]->getNumBins(); k++) {
576 if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } return 0; }
577 for (int j = 0; j < thisItersLookup.size(); j++) { newLookup[j]->push_back(thisItersLookup[j]->getAbundance(k), thisItersLookup[j]->getGroup()); }
580 tempLabels = sample.getSample(newLookup, subsampleSize);
581 thisItersLookup = newLookup;
586 driver(thisLookup, 0, numGroups, sumFileName+".temp", sumAllFileName+".temp", calcDists);
587 m->appendFiles((sumFileName + ".temp"), sumFileName);
588 m->mothurRemove((sumFileName + ".temp"));
590 m->appendFiles((sumAllFileName + ".temp"), sumAllFileName);
591 m->mothurRemove((sumAllFileName + ".temp"));
596 vector<int> processIDS;
598 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
599 //loop through and create all the processes you want
600 while (process != processors) {
604 processIDS.push_back(pid);
607 driver(thisLookup, lines[process].start, lines[process].end, sumFileName + toString(getpid()) + ".temp", sumAllFileName + toString(getpid()) + ".temp", calcDists);
609 //only do this if you want a distance file
611 string tempdistFileName = m->getRootName(m->getSimpleName(sumFileName)) + toString(getpid()) + ".dist";
613 m->openOutputFile(tempdistFileName, outtemp);
615 for (int i = 0; i < calcDists.size(); i++) {
616 outtemp << calcDists[i].size() << endl;
618 for (int j = 0; j < calcDists[i].size(); j++) {
619 outtemp << calcDists[i][j].seq1 << '\t' << calcDists[i][j].seq2 << '\t' << calcDists[i][j].dist << endl;
627 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
628 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
633 //parent do your part
634 driver(thisLookup, lines[0].start, lines[0].end, sumFileName + toString(getpid()) + ".temp", sumAllFileName + toString(getpid()) + ".temp", calcDists);
635 m->appendFiles((sumFileName + toString(getpid()) + ".temp"), sumFileName);
636 m->mothurRemove((sumFileName + toString(getpid()) + ".temp"));
637 if (mult) { m->appendFiles((sumAllFileName + toString(getpid()) + ".temp"), sumAllFileName); }
639 //force parent to wait until all the processes are done
640 for (int i = 0; i < processIDS.size(); i++) {
641 int temp = processIDS[i];
645 for (int i = 0; i < processIDS.size(); i++) {
646 m->appendFiles((sumFileName + toString(processIDS[i]) + ".temp"), sumFileName);
647 m->mothurRemove((sumFileName + toString(processIDS[i]) + ".temp"));
648 if (mult) { m->mothurRemove((sumAllFileName + toString(processIDS[i]) + ".temp")); }
651 string tempdistFileName = m->getRootName(m->getSimpleName(sumFileName)) + toString(processIDS[i]) + ".dist";
653 m->openInputFile(tempdistFileName, intemp);
655 for (int k = 0; k < calcDists.size(); k++) {
657 intemp >> size; m->gobble(intemp);
659 for (int j = 0; j < size; j++) {
664 intemp >> seq1 >> seq2 >> dist; m->gobble(intemp);
666 seqDist tempDist(seq1, seq2, dist);
667 calcDists[k].push_back(tempDist);
671 m->mothurRemove(tempdistFileName);
675 //////////////////////////////////////////////////////////////////////////////////////////////////////
676 //Windows version shared memory, so be careful when passing variables through the summarySharedData struct.
677 //Above fork() will clone, so memory is separate, but that's not the case with windows,
678 //Taking advantage of shared memory to pass results vectors.
679 //////////////////////////////////////////////////////////////////////////////////////////////////////
681 vector<summarySharedData*> pDataArray;
682 DWORD dwThreadIdArray[processors-1];
683 HANDLE hThreadArray[processors-1];
685 //Create processor worker threads.
686 for( int i=1; i<processors; i++ ){
688 //make copy of lookup so we don't get access violations
689 vector<SharedRAbundVector*> newLookup;
690 for (int k = 0; k < thisLookup.size(); k++) {
691 SharedRAbundVector* temp = new SharedRAbundVector();
692 temp->setLabel(thisLookup[k]->getLabel());
693 temp->setGroup(thisLookup[k]->getGroup());
694 newLookup.push_back(temp);
698 for (int k = 0; k < thisLookup[0]->getNumBins(); k++) {
699 if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } return 0; }
700 for (int j = 0; j < thisLookup.size(); j++) { newLookup[j]->push_back(thisLookup[j]->getAbundance(k), thisLookup[j]->getGroup()); }
703 // Allocate memory for thread data.
704 summarySharedData* tempSum = new summarySharedData((sumFileName+toString(i)+".temp"), m, lines[i].start, lines[i].end, Estimators, newLookup);
705 pDataArray.push_back(tempSum);
706 processIDS.push_back(i);
708 hThreadArray[i-1] = CreateThread(NULL, 0, MySummarySharedThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);
711 //parent do your part
712 driver(thisLookup, lines[0].start, lines[0].end, sumFileName +"0.temp", sumAllFileName + "0.temp", calcDists);
713 m->appendFiles((sumFileName + "0.temp"), sumFileName);
714 m->mothurRemove((sumFileName + "0.temp"));
715 if (mult) { m->appendFiles((sumAllFileName + "0.temp"), sumAllFileName); }
717 //Wait until all threads have terminated.
718 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
720 //Close all thread handles and free memory allocations.
721 for(int i=0; i < pDataArray.size(); i++){
722 m->appendFiles((sumFileName + toString(processIDS[i]) + ".temp"), sumFileName);
723 m->mothurRemove((sumFileName + toString(processIDS[i]) + ".temp"));
725 for (int j = 0; j < pDataArray[i]->thisLookup.size(); j++) { delete pDataArray[i]->thisLookup[j]; }
728 for (int k = 0; k < calcDists.size(); k++) {
729 int size = pDataArray[i]->calcDists[k].size();
730 for (int j = 0; j < size; j++) { calcDists[k].push_back(pDataArray[i]->calcDists[k][j]); }
734 CloseHandle(hThreadArray[i]);
735 delete pDataArray[i];
741 if (subsample && (thisIter != 0)) { //we want the summary results for the whole dataset, then the subsampling
743 calcDistsTotals.push_back(calcDists);
745 for (int i = 0; i < thisItersLookup.size(); i++) { delete thisItersLookup[i]; }
746 thisItersLookup.clear();
747 for (int i = 0; i < calcDists.size(); i++) { calcDists[i].clear(); }
750 for (int i = 0; i < calcDists.size(); i++) {
751 if (m->control_pressed) { break; }
754 vector< vector<double> > matrix; //square matrix to represent the distance
755 matrix.resize(thisLookup.size());
756 for (int k = 0; k < thisLookup.size(); k++) { matrix[k].resize(thisLookup.size(), 0.0); }
758 for (int j = 0; j < calcDists[i].size(); j++) {
759 int row = calcDists[i][j].seq1;
760 int column = calcDists[i][j].seq2;
761 double dist = calcDists[i][j].dist;
763 matrix[row][column] = dist;
764 matrix[column][row] = dist;
767 string distFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + sumCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + "." + output + ".dist";
768 outputNames.push_back(distFileName); outputTypes["phylip"].push_back(distFileName);
770 m->openOutputFile(distFileName, outDist);
771 outDist.setf(ios::fixed, ios::floatfield); outDist.setf(ios::showpoint);
773 printSims(outDist, matrix);
782 //we need to find the average distance and standard deviation for each groups distance
784 vector< vector<seqDist> > calcAverages; calcAverages.resize(sumCalculators.size());
785 for (int i = 0; i < calcAverages.size(); i++) { //initialize sums to zero.
786 calcAverages[i].resize(calcDistsTotals[0][i].size());
788 for (int j = 0; j < calcAverages[i].size(); j++) {
789 calcAverages[i][j].seq1 = calcDists[i][j].seq1;
790 calcAverages[i][j].seq2 = calcDists[i][j].seq2;
791 calcAverages[i][j].dist = 0.0;
795 for (int thisIter = 0; thisIter < iters; thisIter++) { //sum all groups dists for each calculator
796 for (int i = 0; i < calcAverages.size(); i++) { //initialize sums to zero.
797 for (int j = 0; j < calcAverages[i].size(); j++) {
798 calcAverages[i][j].dist += calcDistsTotals[thisIter][i][j].dist;
803 for (int i = 0; i < calcAverages.size(); i++) { //finds average.
804 for (int j = 0; j < calcAverages[i].size(); j++) {
805 calcAverages[i][j].dist /= (float) iters;
809 //find standard deviation
810 vector< vector<seqDist> > stdDev; stdDev.resize(sumCalculators.size());
811 for (int i = 0; i < stdDev.size(); i++) { //initialize sums to zero.
812 stdDev[i].resize(calcDistsTotals[0][i].size());
814 for (int j = 0; j < stdDev[i].size(); j++) {
815 stdDev[i][j].seq1 = calcDists[i][j].seq1;
816 stdDev[i][j].seq2 = calcDists[i][j].seq2;
817 stdDev[i][j].dist = 0.0;
821 for (int thisIter = 0; thisIter < iters; thisIter++) { //compute the difference of each dist from the mean, and square the result of each
822 for (int i = 0; i < stdDev.size(); i++) {
823 for (int j = 0; j < stdDev[i].size(); j++) {
824 stdDev[i][j].dist += ((calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist) * (calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist));
829 for (int i = 0; i < stdDev.size(); i++) { //finds average.
830 for (int j = 0; j < stdDev[i].size(); j++) {
831 stdDev[i][j].dist /= (float) iters;
832 stdDev[i][j].dist = sqrt(stdDev[i][j].dist);
837 for (int i = 0; i < calcDists.size(); i++) {
838 vector< vector<double> > matrix; //square matrix to represent the distance
839 matrix.resize(thisLookup.size());
840 for (int k = 0; k < thisLookup.size(); k++) { matrix[k].resize(thisLookup.size(), 0.0); }
842 vector< vector<double> > stdmatrix; //square matrix to represent the stdDev
843 stdmatrix.resize(thisLookup.size());
844 for (int k = 0; k < thisLookup.size(); k++) { stdmatrix[k].resize(thisLookup.size(), 0.0); }
847 for (int j = 0; j < calcAverages[i].size(); j++) {
848 int row = calcAverages[i][j].seq1;
849 int column = calcAverages[i][j].seq2;
850 float dist = calcAverages[i][j].dist;
851 float stdDist = stdDev[i][j].dist;
853 matrix[row][column] = dist;
854 matrix[column][row] = dist;
855 stdmatrix[row][column] = stdDist;
856 stdmatrix[column][row] = stdDist;
859 string distFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + sumCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + "." + output + ".ave.dist";
860 outputNames.push_back(distFileName); outputTypes["phylip"].push_back(distFileName);
862 m->openOutputFile(distFileName, outAve);
863 outAve.setf(ios::fixed, ios::floatfield); outAve.setf(ios::showpoint);
865 printSims(outAve, matrix);
869 distFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + sumCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + "." + output + ".std.dist";
870 outputNames.push_back(distFileName); outputTypes["phylip"].push_back(distFileName);
872 m->openOutputFile(distFileName, outSTD);
873 outSTD.setf(ios::fixed, ios::floatfield); outSTD.setf(ios::showpoint);
875 printSims(outSTD, stdmatrix);
884 catch(exception& e) {
885 m->errorOut(e, "SummarySharedCommand", "process");
889 /**************************************************************************************************/
890 int SummarySharedCommand::driver(vector<SharedRAbundVector*> thisLookup, int start, int end, string sumFile, string sumAllFile, vector< vector<seqDist> >& calcDists) {
893 //loop through calculators and add to file all for all calcs that can do mutiple groups
896 m->openOutputFile(sumAllFile, outAll);
899 outAll << thisLookup[0]->getLabel() << '\t';
901 //output groups names
902 string outNames = "";
903 for (int j = 0; j < thisLookup.size(); j++) {
904 outNames += thisLookup[j]->getGroup() + "-";
906 outNames = outNames.substr(0, outNames.length()-1); //rip off extra '-';
907 outAll << outNames << '\t';
909 for(int i=0;i<sumCalculators.size();i++){
910 if (sumCalculators[i]->getMultiple() == true) {
911 sumCalculators[i]->getValues(thisLookup);
913 if (m->control_pressed) { outAll.close(); return 1; }
916 sumCalculators[i]->print(outAll);
923 ofstream outputFileHandle;
924 m->openOutputFile(sumFile, outputFileHandle);
926 vector<SharedRAbundVector*> subset;
927 for (int k = start; k < end; k++) { // pass cdd each set of groups to compare
929 for (int l = 0; l < k; l++) {
931 outputFileHandle << thisLookup[0]->getLabel() << '\t';
933 subset.clear(); //clear out old pair of sharedrabunds
934 //add new pair of sharedrabunds
935 subset.push_back(thisLookup[k]); subset.push_back(thisLookup[l]);
937 //sort groups to be alphanumeric
938 if (thisLookup[k]->getGroup() > thisLookup[l]->getGroup()) {
939 outputFileHandle << (thisLookup[l]->getGroup() +'\t' + thisLookup[k]->getGroup()) << '\t'; //print out groups
941 outputFileHandle << (thisLookup[k]->getGroup() +'\t' + thisLookup[l]->getGroup()) << '\t'; //print out groups
944 for(int i=0;i<sumCalculators.size();i++) {
946 //if this calc needs all groups to calculate the pair load all groups
947 if (sumCalculators[i]->getNeedsAll()) {
948 //load subset with rest of lookup for those calcs that need everyone to calc for a pair
949 for (int w = 0; w < thisLookup.size(); w++) {
950 if ((w != k) && (w != l)) { subset.push_back(thisLookup[w]); }
954 vector<double> tempdata = sumCalculators[i]->getValues(subset); //saves the calculator outputs
956 if (m->control_pressed) { outputFileHandle.close(); return 1; }
958 outputFileHandle << '\t';
959 sumCalculators[i]->print(outputFileHandle);
961 seqDist temp(l, k, tempdata[0]);
962 calcDists[i].push_back(temp);
964 outputFileHandle << endl;
968 outputFileHandle.close();
972 catch(exception& e) {
973 m->errorOut(e, "SummarySharedCommand", "driver");
977 /**************************************************************************************************/