2 * summarysharedcommand.cpp
5 * Created by Sarah Westcott on 1/2/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "summarysharedcommand.h"
11 #include "subsample.h"
13 //**********************************************************************************************************************
14 vector<string> SummarySharedCommand::setParameters(){
16 CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","summary",false,true,true); parameters.push_back(pshared);
17 CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
18 CommandParameter psubsample("subsample", "String", "", "", "", "", "","phylip",false,false); parameters.push_back(psubsample);
19 CommandParameter pdistance("distance", "Boolean", "", "F", "", "", "","phylip",false,false); parameters.push_back(pdistance);
20 CommandParameter pcalc("calc", "Multiple", "sharedchao-sharedsobs-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan-kstest-whittaker-sharednseqs-ochiai-anderberg-kulczynski-kulczynskicody-lennon-morisitahorn-braycurtis-odum-canberra-structeuclidean-structchord-hellinger-manhattan-structpearson-soergel-spearman-structkulczynski-speciesprofile-structchi2-hamming-gower-memchi2-memchord-memeuclidean-mempearson", "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan", "", "", "","",true,false,true); parameters.push_back(pcalc);
21 CommandParameter poutput("output", "Multiple", "lt-square", "lt", "", "", "","",false,false); parameters.push_back(poutput);
22 CommandParameter pall("all", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pall);
23 CommandParameter piters("iters", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(piters);
24 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
25 CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
26 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
27 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
29 vector<string> myArray;
30 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
34 m->errorOut(e, "SummarySharedCommand", "setParameters");
38 //**********************************************************************************************************************
39 string SummarySharedCommand::getHelpString(){
41 string helpString = "";
42 ValidCalculators validCalculator;
43 helpString += "The summary.shared command parameters are shared, label, calc, distance, processors, subsample, iters and all. shared is required if there is no current sharedfile.\n";
44 helpString += "The summary.shared command should be in the following format: \n";
45 helpString += "summary.shared(label=yourLabel, calc=yourEstimators, groups=yourGroups).\n";
46 helpString += "Example summary.shared(label=unique-.01-.03, groups=B-C, calc=sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan).\n";
47 helpString += validCalculator.printCalc("sharedsummary");
48 helpString += "The iters parameter allows you to choose the number of times you would like to run the subsample.\n";
49 helpString += "The subsample parameter allows you to enter the size pergroup of the sample or you can set subsample=T and mothur will use the size of your smallest group.\n";
50 helpString += "The output parameter allows you to specify format of your distance matrix. Options are lt, and square. The default is lt.\n";
51 helpString += "The default value for calc is sharedsobs-sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan\n";
52 helpString += "The default value for groups is all the groups in your groupfile.\n";
53 helpString += "The distance parameter allows you to indicate you would like a distance file created for each calculator for each label, default=f.\n";
54 helpString += "The label parameter is used to analyze specific labels in your input.\n";
55 helpString += "The all parameter is used to specify if you want the estimate of all your groups together. This estimate can only be made for sharedsobs and sharedchao calculators. The default is false.\n";
56 helpString += "If you use sharedchao and run into memory issues, set all to false. \n";
57 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 2 valid groups.\n";
58 helpString += "Note: No spaces between parameter labels (i.e. label), '=' and parameters (i.e.yourLabel).\n";
62 m->errorOut(e, "SummarySharedCommand", "getHelpString");
66 //**********************************************************************************************************************
67 string SummarySharedCommand::getOutputPattern(string type) {
71 if (type == "summary") { pattern = "[filename],summary-[filename],[tag],summary"; }
72 else if (type == "phylip") { pattern = "[filename],[calc],[distance],[outputtag],[tag2],dist"; }
73 else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
78 m->errorOut(e, "SummarySharedCommand", "getOutputPattern");
82 //**********************************************************************************************************************
83 SummarySharedCommand::SummarySharedCommand(){
85 abort = true; calledHelp = true;
87 vector<string> tempOutNames;
88 outputTypes["summary"] = tempOutNames;
89 outputTypes["phylip"] = tempOutNames;
92 m->errorOut(e, "SummarySharedCommand", "SummarySharedCommand");
96 //**********************************************************************************************************************
98 SummarySharedCommand::SummarySharedCommand(string option) {
100 abort = false; calledHelp = false;
103 //allow user to run help
104 if(option == "help") { help(); abort = true; calledHelp = true; }
105 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
108 vector<string> myArray = setParameters();
110 OptionParser parser(option);
111 map<string, string> parameters = parser.getParameters();
112 map<string, string>::iterator it;
114 ValidParameters validParameter;
116 //check to make sure all parameters are valid for command
117 for (map<string, string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
118 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
121 //initialize outputTypes
122 vector<string> tempOutNames;
123 outputTypes["summary"] = tempOutNames;
124 outputTypes["phylip"] = tempOutNames;
126 //if the user changes the input directory command factory will send this info to us in the output parameter
127 string inputDir = validParameter.validFile(parameters, "inputdir", false);
128 if (inputDir == "not found"){ inputDir = ""; }
131 it = parameters.find("shared");
132 //user has given a template file
133 if(it != parameters.end()){
134 path = m->hasPath(it->second);
135 //if the user has not given a path then, add inputdir. else leave path alone.
136 if (path == "") { parameters["shared"] = inputDir + it->second; }
141 sharedfile = validParameter.validFile(parameters, "shared", true);
142 if (sharedfile == "not open") { sharedfile = ""; abort = true; }
143 else if (sharedfile == "not found") {
144 //if there is a current shared file, use it
145 sharedfile = m->getSharedFile();
146 if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
147 else { m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; }
148 }else { m->setSharedFile(sharedfile); }
151 //if the user changes the output directory command factory will send this info to us in the output parameter
152 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = m->hasPath(sharedfile); }
155 //check for optional parameter and set defaults
156 // ...at some point should added some additional type checking...
157 label = validParameter.validFile(parameters, "label", false);
158 if (label == "not found") { label = ""; }
160 if(label != "all") { m->splitAtDash(label, labels); allLines = 0; }
161 else { allLines = 1; }
165 calc = validParameter.validFile(parameters, "calc", false);
166 if (calc == "not found") { calc = "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan"; }
168 if (calc == "default") { calc = "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan"; }
170 m->splitAtDash(calc, Estimators);
171 if (m->inUsersGroups("citation", Estimators)) {
172 ValidCalculators validCalc; validCalc.printCitations(Estimators);
173 //remove citation from list of calcs
174 for (int i = 0; i < Estimators.size(); i++) { if (Estimators[i] == "citation") { Estimators.erase(Estimators.begin()+i); break; } }
177 groups = validParameter.validFile(parameters, "groups", false);
178 if (groups == "not found") { groups = ""; }
180 m->splitAtDash(groups, Groups);
181 m->setGroups(Groups);
184 string temp = validParameter.validFile(parameters, "all", false); if (temp == "not found") { temp = "false"; }
185 all = m->isTrue(temp);
187 temp = validParameter.validFile(parameters, "distance", false); if (temp == "not found") { temp = "false"; }
188 createPhylip = m->isTrue(temp);
190 temp = validParameter.validFile(parameters, "iters", false); if (temp == "not found") { temp = "1000"; }
191 m->mothurConvert(temp, iters);
193 output = validParameter.validFile(parameters, "output", false);
194 if(output == "not found"){ output = "lt"; }
195 else { createPhylip = true; }
196 if ((output != "lt") && (output != "square")) { m->mothurOut(output + " is not a valid output form. Options are lt and square. I will use lt."); m->mothurOutEndLine(); output = "lt"; }
198 temp = validParameter.validFile(parameters, "subsample", false); if (temp == "not found") { temp = "F"; }
199 if (m->isNumeric1(temp)) { m->mothurConvert(temp, subsampleSize); subsample = true; }
201 if (m->isTrue(temp)) { subsample = true; subsampleSize = -1; } //we will set it to smallest group later
202 else { subsample = false; }
205 if (subsample == false) { iters = 0; }
207 temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
208 m->setProcessors(temp);
209 m->mothurConvert(temp, processors);
211 if (abort == false) {
213 ValidCalculators validCalculator;
216 for (i=0; i<Estimators.size(); i++) {
217 if (validCalculator.isValidCalculator("sharedsummary", Estimators[i]) == true) {
218 if (Estimators[i] == "sharedsobs") {
219 sumCalculators.push_back(new SharedSobsCS());
220 }else if (Estimators[i] == "sharedchao") {
221 sumCalculators.push_back(new SharedChao1());
222 }else if (Estimators[i] == "sharedace") {
223 sumCalculators.push_back(new SharedAce());
224 }else if (Estimators[i] == "jabund") {
225 sumCalculators.push_back(new JAbund());
226 }else if (Estimators[i] == "sorabund") {
227 sumCalculators.push_back(new SorAbund());
228 }else if (Estimators[i] == "jclass") {
229 sumCalculators.push_back(new Jclass());
230 }else if (Estimators[i] == "sorclass") {
231 sumCalculators.push_back(new SorClass());
232 }else if (Estimators[i] == "jest") {
233 sumCalculators.push_back(new Jest());
234 }else if (Estimators[i] == "sorest") {
235 sumCalculators.push_back(new SorEst());
236 }else if (Estimators[i] == "thetayc") {
237 sumCalculators.push_back(new ThetaYC());
238 }else if (Estimators[i] == "thetan") {
239 sumCalculators.push_back(new ThetaN());
240 }else if (Estimators[i] == "kstest") {
241 sumCalculators.push_back(new KSTest());
242 }else if (Estimators[i] == "sharednseqs") {
243 sumCalculators.push_back(new SharedNSeqs());
244 }else if (Estimators[i] == "ochiai") {
245 sumCalculators.push_back(new Ochiai());
246 }else if (Estimators[i] == "anderberg") {
247 sumCalculators.push_back(new Anderberg());
248 }else if (Estimators[i] == "kulczynski") {
249 sumCalculators.push_back(new Kulczynski());
250 }else if (Estimators[i] == "kulczynskicody") {
251 sumCalculators.push_back(new KulczynskiCody());
252 }else if (Estimators[i] == "lennon") {
253 sumCalculators.push_back(new Lennon());
254 }else if (Estimators[i] == "morisitahorn") {
255 sumCalculators.push_back(new MorHorn());
256 }else if (Estimators[i] == "braycurtis") {
257 sumCalculators.push_back(new BrayCurtis());
258 }else if (Estimators[i] == "whittaker") {
259 sumCalculators.push_back(new Whittaker());
260 }else if (Estimators[i] == "odum") {
261 sumCalculators.push_back(new Odum());
262 }else if (Estimators[i] == "canberra") {
263 sumCalculators.push_back(new Canberra());
264 }else if (Estimators[i] == "structeuclidean") {
265 sumCalculators.push_back(new StructEuclidean());
266 }else if (Estimators[i] == "structchord") {
267 sumCalculators.push_back(new StructChord());
268 }else if (Estimators[i] == "hellinger") {
269 sumCalculators.push_back(new Hellinger());
270 }else if (Estimators[i] == "manhattan") {
271 sumCalculators.push_back(new Manhattan());
272 }else if (Estimators[i] == "structpearson") {
273 sumCalculators.push_back(new StructPearson());
274 }else if (Estimators[i] == "soergel") {
275 sumCalculators.push_back(new Soergel());
276 }else if (Estimators[i] == "spearman") {
277 sumCalculators.push_back(new Spearman());
278 }else if (Estimators[i] == "structkulczynski") {
279 sumCalculators.push_back(new StructKulczynski());
280 }else if (Estimators[i] == "speciesprofile") {
281 sumCalculators.push_back(new SpeciesProfile());
282 }else if (Estimators[i] == "hamming") {
283 sumCalculators.push_back(new Hamming());
284 }else if (Estimators[i] == "structchi2") {
285 sumCalculators.push_back(new StructChi2());
286 }else if (Estimators[i] == "gower") {
287 sumCalculators.push_back(new Gower());
288 }else if (Estimators[i] == "memchi2") {
289 sumCalculators.push_back(new MemChi2());
290 }else if (Estimators[i] == "memchord") {
291 sumCalculators.push_back(new MemChord());
292 }else if (Estimators[i] == "memeuclidean") {
293 sumCalculators.push_back(new MemEuclidean());
294 }else if (Estimators[i] == "mempearson") {
295 sumCalculators.push_back(new MemPearson());
304 catch(exception& e) {
305 m->errorOut(e, "SummarySharedCommand", "SummarySharedCommand");
309 //**********************************************************************************************************************
311 int SummarySharedCommand::execute(){
314 if (abort == true) { if (calledHelp) { return 0; } return 2; }
316 ofstream outputFileHandle, outAll;
317 map<string, string> variables;
318 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
319 string outputFileName = getOutputFileName("summary",variables);
321 //if the users entered no valid calculators don't execute command
322 if (sumCalculators.size() == 0) { return 0; }
323 //check if any calcs can do multiples
326 for (int i = 0; i < sumCalculators.size(); i++) {
327 if (sumCalculators[i]->getMultiple() == true) { mult = true; }
332 input = new InputData(sharedfile, "sharedfile");
333 lookup = input->getSharedRAbundVectors();
334 string lastLabel = lookup[0]->getLabel();
336 /******************************************************/
337 //output headings for files
338 /******************************************************/
339 //output estimator names as column headers
340 m->openOutputFile(outputFileName, outputFileHandle);
341 outputFileHandle << "label" <<'\t' << "comparison" << '\t';
342 for(int i=0;i<sumCalculators.size();i++){
343 outputFileHandle << '\t' << sumCalculators[i]->getName();
344 if (sumCalculators[i]->getCols() == 3) { outputFileHandle << "\t" << sumCalculators[i]->getName() << "_lci\t" << sumCalculators[i]->getName() << "_hci"; }
346 outputFileHandle << endl;
347 outputFileHandle.close();
349 //create file and put column headers for multiple groups file
350 variables["[tag]"]= "multiple";
351 string outAllFileName = getOutputFileName("summary",variables);
353 m->openOutputFile(outAllFileName, outAll);
354 outputNames.push_back(outAllFileName);
356 outAll << "label" <<'\t' << "comparison" << '\t';
357 for(int i=0;i<sumCalculators.size();i++){
358 if (sumCalculators[i]->getMultiple() == true) {
359 outAll << '\t' << sumCalculators[i]->getName();
366 if (lookup.size() < 2) {
367 m->mothurOut("I cannot run the command without at least 2 valid groups.");
368 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
370 //close files and clean up
371 m->mothurRemove(outputFileName);
372 if (mult == true) { m->mothurRemove(outAllFileName); }
374 //if you only have 2 groups you don't need a .sharedmultiple file
375 }else if ((lookup.size() == 2) && (mult == true)) {
377 m->mothurRemove(outAllFileName);
378 outputNames.pop_back();
381 if (m->control_pressed) {
382 if (mult) { m->mothurRemove(outAllFileName); }
383 m->mothurRemove(outputFileName);
385 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
386 for(int i=0;i<sumCalculators.size();i++){ delete sumCalculators[i]; }
390 /******************************************************/
392 if (subsampleSize == -1) { //user has not set size, set size = smallest samples size
393 subsampleSize = lookup[0]->getNumSeqs();
394 for (int i = 1; i < lookup.size(); i++) {
395 int thisSize = lookup[i]->getNumSeqs();
397 if (thisSize < subsampleSize) { subsampleSize = thisSize; }
402 vector<SharedRAbundVector*> temp;
403 for (int i = 0; i < lookup.size(); i++) {
404 if (lookup[i]->getNumSeqs() < subsampleSize) {
405 m->mothurOut(lookup[i]->getGroup() + " contains " + toString(lookup[i]->getNumSeqs()) + ". Eliminating."); m->mothurOutEndLine();
408 Groups.push_back(lookup[i]->getGroup());
409 temp.push_back(lookup[i]);
413 m->setGroups(Groups);
416 if (lookup.size() < 2) { m->mothurOut("You have not provided enough valid groups. I cannot run the command."); m->mothurOutEndLine(); m->control_pressed = true; delete input; return 0; }
420 /******************************************************/
421 //comparison breakup to be used by different processes later
422 numGroups = lookup.size();
423 lines.resize(processors);
424 for (int i = 0; i < processors; i++) {
425 lines[i].start = int (sqrt(float(i)/float(processors)) * numGroups);
426 lines[i].end = int (sqrt(float(i+1)/float(processors)) * numGroups);
428 /******************************************************/
430 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
431 set<string> processedLabels;
432 set<string> userLabels = labels;
434 //as long as you are not at the end of the file or done wih the lines you want
435 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
436 if (m->control_pressed) {
437 if (mult) { m->mothurRemove(outAllFileName); }
438 m->mothurRemove(outputFileName);
440 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
441 for(int i=0;i<sumCalculators.size();i++){ delete sumCalculators[i]; }
447 if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){
448 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
449 process(lookup, outputFileName, outAllFileName);
451 processedLabels.insert(lookup[0]->getLabel());
452 userLabels.erase(lookup[0]->getLabel());
455 if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
456 string saveLabel = lookup[0]->getLabel();
458 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
459 lookup = input->getSharedRAbundVectors(lastLabel);
461 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
462 process(lookup, outputFileName, outAllFileName);
464 processedLabels.insert(lookup[0]->getLabel());
465 userLabels.erase(lookup[0]->getLabel());
467 //restore real lastlabel to save below
468 lookup[0]->setLabel(saveLabel);
471 lastLabel = lookup[0]->getLabel();
473 //get next line to process
474 //prevent memory leak
475 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
476 lookup = input->getSharedRAbundVectors();
479 if (m->control_pressed) {
480 if (mult) { m->mothurRemove(outAllFileName); }
481 m->mothurRemove(outputFileName);
483 for(int i=0;i<sumCalculators.size();i++){ delete sumCalculators[i]; }
488 //output error messages about any remaining user labels
489 set<string>::iterator it;
490 bool needToRun = false;
491 for (it = userLabels.begin(); it != userLabels.end(); it++) {
492 m->mothurOut("Your file does not include the label " + *it);
493 if (processedLabels.count(lastLabel) != 1) {
494 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
497 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
501 //run last label if you need to
502 if (needToRun == true) {
503 for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }
504 lookup = input->getSharedRAbundVectors(lastLabel);
506 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
507 process(lookup, outputFileName, outAllFileName);
508 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
512 //reset groups parameter
515 for(int i=0;i<sumCalculators.size();i++){ delete sumCalculators[i]; }
518 if (m->control_pressed) {
519 m->mothurRemove(outAllFileName);
520 m->mothurRemove(outputFileName);
524 m->mothurOutEndLine();
525 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
526 m->mothurOut(outputFileName); m->mothurOutEndLine();
527 if (mult) { m->mothurOut(outAllFileName); m->mothurOutEndLine(); outputTypes["summary"].push_back(outAllFileName); }
528 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } outputTypes["summary"].push_back(outputFileName);
529 m->mothurOutEndLine();
533 catch(exception& e) {
534 m->errorOut(e, "SummarySharedCommand", "execute");
538 /***********************************************************/
539 int SummarySharedCommand::printSims(ostream& out, vector< vector<double> >& simMatrix) {
542 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
545 out << simMatrix.size() << endl;
547 if (output == "lt") {
548 for (int b = 0; b < simMatrix.size(); b++) {
549 out << lookup[b]->getGroup() << '\t';
550 for (int n = 0; n < b; n++) {
551 if (m->control_pressed) { return 0; }
552 out << simMatrix[b][n] << '\t';
557 for (int b = 0; b < simMatrix.size(); m++) {
558 out << lookup[b]->getGroup() << '\t';
559 for (int n = 0; n < simMatrix[b].size(); n++) {
560 if (m->control_pressed) { return 0; }
561 out << simMatrix[b][n] << '\t';
569 catch(exception& e) {
570 m->errorOut(e, "SummarySharedCommand", "printSims");
574 /***********************************************************/
575 int SummarySharedCommand::process(vector<SharedRAbundVector*> thisLookup, string sumFileName, string sumAllFileName) {
577 vector< vector< vector<seqDist> > > calcDistsTotals; //each iter, one for each calc, then each groupCombos dists. this will be used to make .dist files
578 vector< vector<seqDist> > calcDists; calcDists.resize(sumCalculators.size());
580 for (int thisIter = 0; thisIter < iters+1; thisIter++) {
582 vector<SharedRAbundVector*> thisItersLookup = thisLookup;
584 if (subsample && (thisIter != 0)) { //we want the summary results for the whole dataset, then the subsampling
586 vector<string> tempLabels; //dont need since we arent printing the sampled sharedRabunds
588 //make copy of lookup so we don't get access violations
589 vector<SharedRAbundVector*> newLookup;
590 for (int k = 0; k < thisItersLookup.size(); k++) {
591 SharedRAbundVector* temp = new SharedRAbundVector();
592 temp->setLabel(thisItersLookup[k]->getLabel());
593 temp->setGroup(thisItersLookup[k]->getGroup());
594 newLookup.push_back(temp);
598 for (int k = 0; k < thisItersLookup[0]->getNumBins(); k++) {
599 if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } return 0; }
600 for (int j = 0; j < thisItersLookup.size(); j++) { newLookup[j]->push_back(thisItersLookup[j]->getAbundance(k), thisItersLookup[j]->getGroup()); }
603 tempLabels = sample.getSample(newLookup, subsampleSize);
604 thisItersLookup = newLookup;
609 driver(thisLookup, 0, numGroups, sumFileName+".temp", sumAllFileName+".temp", calcDists);
610 m->appendFiles((sumFileName + ".temp"), sumFileName);
611 m->mothurRemove((sumFileName + ".temp"));
613 m->appendFiles((sumAllFileName + ".temp"), sumAllFileName);
614 m->mothurRemove((sumAllFileName + ".temp"));
619 vector<int> processIDS;
621 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
622 //loop through and create all the processes you want
623 while (process != processors) {
627 processIDS.push_back(pid);
630 driver(thisLookup, lines[process].start, lines[process].end, sumFileName + toString(getpid()) + ".temp", sumAllFileName + toString(getpid()) + ".temp", calcDists);
632 //only do this if you want a distance file
634 string tempdistFileName = m->getRootName(m->getSimpleName(sumFileName)) + toString(getpid()) + ".dist";
636 m->openOutputFile(tempdistFileName, outtemp);
638 for (int i = 0; i < calcDists.size(); i++) {
639 outtemp << calcDists[i].size() << endl;
641 for (int j = 0; j < calcDists[i].size(); j++) {
642 outtemp << calcDists[i][j].seq1 << '\t' << calcDists[i][j].seq2 << '\t' << calcDists[i][j].dist << endl;
650 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
651 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
656 //parent do your part
657 driver(thisLookup, lines[0].start, lines[0].end, sumFileName + toString(getpid()) + ".temp", sumAllFileName + toString(getpid()) + ".temp", calcDists);
658 m->appendFiles((sumFileName + toString(getpid()) + ".temp"), sumFileName);
659 m->mothurRemove((sumFileName + toString(getpid()) + ".temp"));
660 if (mult) { m->appendFiles((sumAllFileName + toString(getpid()) + ".temp"), sumAllFileName); }
662 //force parent to wait until all the processes are done
663 for (int i = 0; i < processIDS.size(); i++) {
664 int temp = processIDS[i];
668 for (int i = 0; i < processIDS.size(); i++) {
669 m->appendFiles((sumFileName + toString(processIDS[i]) + ".temp"), sumFileName);
670 m->mothurRemove((sumFileName + toString(processIDS[i]) + ".temp"));
671 if (mult) { m->mothurRemove((sumAllFileName + toString(processIDS[i]) + ".temp")); }
674 string tempdistFileName = m->getRootName(m->getSimpleName(sumFileName)) + toString(processIDS[i]) + ".dist";
676 m->openInputFile(tempdistFileName, intemp);
678 for (int k = 0; k < calcDists.size(); k++) {
680 intemp >> size; m->gobble(intemp);
682 for (int j = 0; j < size; j++) {
687 intemp >> seq1 >> seq2 >> dist; m->gobble(intemp);
689 seqDist tempDist(seq1, seq2, dist);
690 calcDists[k].push_back(tempDist);
694 m->mothurRemove(tempdistFileName);
698 //////////////////////////////////////////////////////////////////////////////////////////////////////
699 //Windows version shared memory, so be careful when passing variables through the summarySharedData struct.
700 //Above fork() will clone, so memory is separate, but that's not the case with windows,
701 //Taking advantage of shared memory to pass results vectors.
702 //////////////////////////////////////////////////////////////////////////////////////////////////////
704 vector<summarySharedData*> pDataArray;
705 DWORD dwThreadIdArray[processors-1];
706 HANDLE hThreadArray[processors-1];
708 //Create processor worker threads.
709 for( int i=1; i<processors; i++ ){
711 //make copy of lookup so we don't get access violations
712 vector<SharedRAbundVector*> newLookup;
713 for (int k = 0; k < thisLookup.size(); k++) {
714 SharedRAbundVector* temp = new SharedRAbundVector();
715 temp->setLabel(thisLookup[k]->getLabel());
716 temp->setGroup(thisLookup[k]->getGroup());
717 newLookup.push_back(temp);
721 for (int k = 0; k < thisLookup[0]->getNumBins(); k++) {
722 if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } return 0; }
723 for (int j = 0; j < thisLookup.size(); j++) { newLookup[j]->push_back(thisLookup[j]->getAbundance(k), thisLookup[j]->getGroup()); }
726 // Allocate memory for thread data.
727 summarySharedData* tempSum = new summarySharedData((sumFileName+toString(i)+".temp"), m, lines[i].start, lines[i].end, Estimators, newLookup);
728 pDataArray.push_back(tempSum);
729 processIDS.push_back(i);
731 hThreadArray[i-1] = CreateThread(NULL, 0, MySummarySharedThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);
734 //parent do your part
735 driver(thisLookup, lines[0].start, lines[0].end, sumFileName +"0.temp", sumAllFileName + "0.temp", calcDists);
736 m->appendFiles((sumFileName + "0.temp"), sumFileName);
737 m->mothurRemove((sumFileName + "0.temp"));
738 if (mult) { m->appendFiles((sumAllFileName + "0.temp"), sumAllFileName); }
740 //Wait until all threads have terminated.
741 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
743 //Close all thread handles and free memory allocations.
744 for(int i=0; i < pDataArray.size(); i++){
745 m->appendFiles((sumFileName + toString(processIDS[i]) + ".temp"), sumFileName);
746 m->mothurRemove((sumFileName + toString(processIDS[i]) + ".temp"));
748 for (int j = 0; j < pDataArray[i]->thisLookup.size(); j++) { delete pDataArray[i]->thisLookup[j]; }
751 for (int k = 0; k < calcDists.size(); k++) {
752 int size = pDataArray[i]->calcDists[k].size();
753 for (int j = 0; j < size; j++) { calcDists[k].push_back(pDataArray[i]->calcDists[k][j]); }
757 CloseHandle(hThreadArray[i]);
758 delete pDataArray[i];
764 if (subsample && (thisIter != 0)) { //we want the summary results for the whole dataset, then the subsampling
766 calcDistsTotals.push_back(calcDists);
768 for (int i = 0; i < thisItersLookup.size(); i++) { delete thisItersLookup[i]; }
769 thisItersLookup.clear();
772 for (int i = 0; i < calcDists.size(); i++) {
773 if (m->control_pressed) { break; }
776 vector< vector<double> > matrix; //square matrix to represent the distance
777 matrix.resize(thisLookup.size());
778 for (int k = 0; k < thisLookup.size(); k++) { matrix[k].resize(thisLookup.size(), 0.0); }
780 for (int j = 0; j < calcDists[i].size(); j++) {
781 int row = calcDists[i][j].seq1;
782 int column = calcDists[i][j].seq2;
783 double dist = calcDists[i][j].dist;
785 matrix[row][column] = dist;
786 matrix[column][row] = dist;
789 map<string, string> variables;
790 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
791 variables["[calc]"] = sumCalculators[i]->getName();
792 variables["[distance]"] = thisLookup[0]->getLabel();
793 variables["[outputtag]"] = output;
794 variables["[tag2]"] = "";
795 string distFileName = getOutputFileName("phylip",variables);
796 outputNames.push_back(distFileName); outputTypes["phylip"].push_back(distFileName);
798 m->openOutputFile(distFileName, outDist);
799 outDist.setf(ios::fixed, ios::floatfield); outDist.setf(ios::showpoint);
801 printSims(outDist, matrix);
807 for (int i = 0; i < calcDists.size(); i++) { calcDists[i].clear(); }
811 //we need to find the average distance and standard deviation for each groups distance
813 vector< vector<seqDist> > calcAverages; calcAverages.resize(sumCalculators.size());
814 for (int i = 0; i < calcAverages.size(); i++) { //initialize sums to zero.
815 calcAverages[i].resize(calcDistsTotals[0][i].size());
817 for (int j = 0; j < calcAverages[i].size(); j++) {
818 calcAverages[i][j].seq1 = calcDists[i][j].seq1;
819 calcAverages[i][j].seq2 = calcDists[i][j].seq2;
820 calcAverages[i][j].dist = 0.0;
824 for (int thisIter = 0; thisIter < iters; thisIter++) { //sum all groups dists for each calculator
825 for (int i = 0; i < calcAverages.size(); i++) { //initialize sums to zero.
826 for (int j = 0; j < calcAverages[i].size(); j++) {
827 calcAverages[i][j].dist += calcDistsTotals[thisIter][i][j].dist;
832 for (int i = 0; i < calcAverages.size(); i++) { //finds average.
833 for (int j = 0; j < calcAverages[i].size(); j++) {
834 calcAverages[i][j].dist /= (float) iters;
838 //find standard deviation
839 vector< vector<seqDist> > stdDev; stdDev.resize(sumCalculators.size());
840 for (int i = 0; i < stdDev.size(); i++) { //initialize sums to zero.
841 stdDev[i].resize(calcDistsTotals[0][i].size());
843 for (int j = 0; j < stdDev[i].size(); j++) {
844 stdDev[i][j].seq1 = calcDists[i][j].seq1;
845 stdDev[i][j].seq2 = calcDists[i][j].seq2;
846 stdDev[i][j].dist = 0.0;
850 for (int thisIter = 0; thisIter < iters; thisIter++) { //compute the difference of each dist from the mean, and square the result of each
851 for (int i = 0; i < stdDev.size(); i++) {
852 for (int j = 0; j < stdDev[i].size(); j++) {
853 stdDev[i][j].dist += ((calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist) * (calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist));
858 for (int i = 0; i < stdDev.size(); i++) { //finds average.
859 for (int j = 0; j < stdDev[i].size(); j++) {
860 stdDev[i][j].dist /= (float) iters;
861 stdDev[i][j].dist = sqrt(stdDev[i][j].dist);
866 for (int i = 0; i < calcDists.size(); i++) {
867 vector< vector<double> > matrix; //square matrix to represent the distance
868 matrix.resize(thisLookup.size());
869 for (int k = 0; k < thisLookup.size(); k++) { matrix[k].resize(thisLookup.size(), 0.0); }
871 vector< vector<double> > stdmatrix; //square matrix to represent the stdDev
872 stdmatrix.resize(thisLookup.size());
873 for (int k = 0; k < thisLookup.size(); k++) { stdmatrix[k].resize(thisLookup.size(), 0.0); }
876 for (int j = 0; j < calcAverages[i].size(); j++) {
877 int row = calcAverages[i][j].seq1;
878 int column = calcAverages[i][j].seq2;
879 float dist = calcAverages[i][j].dist;
880 float stdDist = stdDev[i][j].dist;
882 matrix[row][column] = dist;
883 matrix[column][row] = dist;
884 stdmatrix[row][column] = stdDist;
885 stdmatrix[column][row] = stdDist;
888 map<string, string> variables;
889 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
890 variables["[calc]"] = sumCalculators[i]->getName();
891 variables["[distance]"] = thisLookup[0]->getLabel();
892 variables["[outputtag]"] = output;
893 variables["[tag2]"] = "ave";
894 string distFileName = getOutputFileName("phylip",variables);
895 outputNames.push_back(distFileName); outputTypes["phylip"].push_back(distFileName);
897 m->openOutputFile(distFileName, outAve);
898 outAve.setf(ios::fixed, ios::floatfield); outAve.setf(ios::showpoint);
900 printSims(outAve, matrix);
904 variables["[tag2]"] = "std";
905 distFileName = getOutputFileName("phylip",variables);
906 outputNames.push_back(distFileName); outputTypes["phylip"].push_back(distFileName);
908 m->openOutputFile(distFileName, outSTD);
909 outSTD.setf(ios::fixed, ios::floatfield); outSTD.setf(ios::showpoint);
911 printSims(outSTD, stdmatrix);
920 catch(exception& e) {
921 m->errorOut(e, "SummarySharedCommand", "process");
925 /**************************************************************************************************/
926 int SummarySharedCommand::driver(vector<SharedRAbundVector*> thisLookup, int start, int end, string sumFile, string sumAllFile, vector< vector<seqDist> >& calcDists) {
929 //loop through calculators and add to file all for all calcs that can do mutiple groups
932 m->openOutputFile(sumAllFile, outAll);
935 outAll << thisLookup[0]->getLabel() << '\t';
937 //output groups names
938 string outNames = "";
939 for (int j = 0; j < thisLookup.size(); j++) {
940 outNames += thisLookup[j]->getGroup() + "-";
942 outNames = outNames.substr(0, outNames.length()-1); //rip off extra '-';
943 outAll << outNames << '\t';
945 for(int i=0;i<sumCalculators.size();i++){
946 if (sumCalculators[i]->getMultiple() == true) {
947 sumCalculators[i]->getValues(thisLookup);
949 if (m->control_pressed) { outAll.close(); return 1; }
952 sumCalculators[i]->print(outAll);
959 ofstream outputFileHandle;
960 m->openOutputFile(sumFile, outputFileHandle);
962 vector<SharedRAbundVector*> subset;
963 for (int k = start; k < end; k++) { // pass cdd each set of groups to compare
965 for (int l = 0; l < k; l++) {
967 outputFileHandle << thisLookup[0]->getLabel() << '\t';
969 subset.clear(); //clear out old pair of sharedrabunds
970 //add new pair of sharedrabunds
971 subset.push_back(thisLookup[k]); subset.push_back(thisLookup[l]);
973 //sort groups to be alphanumeric
974 if (thisLookup[k]->getGroup() > thisLookup[l]->getGroup()) {
975 outputFileHandle << (thisLookup[l]->getGroup() +'\t' + thisLookup[k]->getGroup()) << '\t'; //print out groups
977 outputFileHandle << (thisLookup[k]->getGroup() +'\t' + thisLookup[l]->getGroup()) << '\t'; //print out groups
980 for(int i=0;i<sumCalculators.size();i++) {
982 //if this calc needs all groups to calculate the pair load all groups
983 if (sumCalculators[i]->getNeedsAll()) {
984 //load subset with rest of lookup for those calcs that need everyone to calc for a pair
985 for (int w = 0; w < thisLookup.size(); w++) {
986 if ((w != k) && (w != l)) { subset.push_back(thisLookup[w]); }
990 vector<double> tempdata = sumCalculators[i]->getValues(subset); //saves the calculator outputs
992 if (m->control_pressed) { outputFileHandle.close(); return 1; }
994 outputFileHandle << '\t';
995 sumCalculators[i]->print(outputFileHandle);
997 seqDist temp(l, k, tempdata[0]);
998 calcDists[i].push_back(temp);
1000 outputFileHandle << endl;
1004 outputFileHandle.close();
1008 catch(exception& e) {
1009 m->errorOut(e, "SummarySharedCommand", "driver");
1013 /**************************************************************************************************/