2 * summarysharedcommand.cpp
5 * Created by Sarah Westcott on 1/2/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "summarysharedcommand.h"
11 #include "subsample.h"
13 //**********************************************************************************************************************
14 vector<string> SummarySharedCommand::setParameters(){
16 CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","summary",false,true,true); parameters.push_back(pshared);
17 CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
18 CommandParameter psubsample("subsample", "String", "", "", "", "", "","phylip",false,false); parameters.push_back(psubsample);
19 CommandParameter pdistance("distance", "Boolean", "", "F", "", "", "","phylip",false,false); parameters.push_back(pdistance);
20 CommandParameter pcalc("calc", "Multiple", "sharedchao-sharedsobs-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan-kstest-whittaker-sharednseqs-ochiai-anderberg-kulczynski-kulczynskicody-lennon-morisitahorn-braycurtis-odum-canberra-structeuclidean-structchord-hellinger-manhattan-structpearson-soergel-spearman-structkulczynski-speciesprofile-structchi2-hamming-gower-memchi2-memchord-memeuclidean-mempearson", "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan", "", "", "","",true,false,true); parameters.push_back(pcalc);
21 CommandParameter poutput("output", "Multiple", "lt-square", "lt", "", "", "","",false,false); parameters.push_back(poutput);
22 CommandParameter pall("all", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pall);
23 CommandParameter piters("iters", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(piters);
24 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
25 CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
26 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
27 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
29 vector<string> myArray;
30 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
34 m->errorOut(e, "SummarySharedCommand", "setParameters");
38 //**********************************************************************************************************************
39 string SummarySharedCommand::getHelpString(){
41 string helpString = "";
42 ValidCalculators validCalculator;
43 helpString += "The summary.shared command parameters are shared, label, calc, distance, processors, subsample, iters and all. shared is required if there is no current sharedfile.\n";
44 helpString += "The summary.shared command should be in the following format: \n";
45 helpString += "summary.shared(label=yourLabel, calc=yourEstimators, groups=yourGroups).\n";
46 helpString += "Example summary.shared(label=unique-.01-.03, groups=B-C, calc=sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan).\n";
47 helpString += validCalculator.printCalc("sharedsummary");
48 helpString += "The iters parameter allows you to choose the number of times you would like to run the subsample.\n";
49 helpString += "The subsample parameter allows you to enter the size pergroup of the sample or you can set subsample=T and mothur will use the size of your smallest group.\n";
50 helpString += "The output parameter allows you to specify format of your distance matrix. Options are lt, and square. The default is lt.\n";
51 helpString += "The default value for calc is sharedsobs-sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan\n";
52 helpString += "The default value for groups is all the groups in your groupfile.\n";
53 helpString += "The distance parameter allows you to indicate you would like a distance file created for each calculator for each label, default=f.\n";
54 helpString += "The label parameter is used to analyze specific labels in your input.\n";
55 helpString += "The all parameter is used to specify if you want the estimate of all your groups together. This estimate can only be made for sharedsobs and sharedchao calculators. The default is false.\n";
56 helpString += "If you use sharedchao and run into memory issues, set all to false. \n";
57 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 2 valid groups.\n";
58 helpString += "Note: No spaces between parameter labels (i.e. label), '=' and parameters (i.e.yourLabel).\n";
62 m->errorOut(e, "SummarySharedCommand", "getHelpString");
66 //**********************************************************************************************************************
67 string SummarySharedCommand::getOutputPattern(string type) {
71 if (type == "summary") { pattern = "[filename],summary-[filename],[tag],summary"; }
72 else if (type == "phylip") { pattern = "[filename],[calc],[distance],[outputtag],[tag2],dist"; }
73 else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
78 m->errorOut(e, "SummarySharedCommand", "getOutputPattern");
82 //**********************************************************************************************************************
83 SummarySharedCommand::SummarySharedCommand(){
85 abort = true; calledHelp = true;
87 vector<string> tempOutNames;
88 outputTypes["summary"] = tempOutNames;
89 outputTypes["phylip"] = tempOutNames;
92 m->errorOut(e, "SummarySharedCommand", "SummarySharedCommand");
96 //**********************************************************************************************************************
98 SummarySharedCommand::SummarySharedCommand(string option) {
100 abort = false; calledHelp = false;
103 //allow user to run help
104 if(option == "help") { help(); abort = true; calledHelp = true; }
105 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
108 vector<string> myArray = setParameters();
110 OptionParser parser(option);
111 map<string, string> parameters = parser.getParameters();
112 map<string, string>::iterator it;
114 ValidParameters validParameter;
116 //check to make sure all parameters are valid for command
117 for (map<string, string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
118 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
121 //initialize outputTypes
122 vector<string> tempOutNames;
123 outputTypes["summary"] = tempOutNames;
124 outputTypes["phylip"] = tempOutNames;
126 //if the user changes the input directory command factory will send this info to us in the output parameter
127 string inputDir = validParameter.validFile(parameters, "inputdir", false);
128 if (inputDir == "not found"){ inputDir = ""; }
131 it = parameters.find("shared");
132 //user has given a template file
133 if(it != parameters.end()){
134 path = m->hasPath(it->second);
135 //if the user has not given a path then, add inputdir. else leave path alone.
136 if (path == "") { parameters["shared"] = inputDir + it->second; }
141 sharedfile = validParameter.validFile(parameters, "shared", true);
142 if (sharedfile == "not open") { sharedfile = ""; abort = true; }
143 else if (sharedfile == "not found") {
144 //if there is a current shared file, use it
145 sharedfile = m->getSharedFile();
146 if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
147 else { m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; }
148 }else { m->setSharedFile(sharedfile); }
151 //if the user changes the output directory command factory will send this info to us in the output parameter
152 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = m->hasPath(sharedfile); }
155 //check for optional parameter and set defaults
156 // ...at some point should added some additional type checking...
157 label = validParameter.validFile(parameters, "label", false);
158 if (label == "not found") { label = ""; }
160 if(label != "all") { m->splitAtDash(label, labels); allLines = 0; }
161 else { allLines = 1; }
165 calc = validParameter.validFile(parameters, "calc", false);
166 if (calc == "not found") { calc = "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan"; }
168 if (calc == "default") { calc = "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan"; }
170 m->splitAtDash(calc, Estimators);
171 if (m->inUsersGroups("citation", Estimators)) {
172 ValidCalculators validCalc; validCalc.printCitations(Estimators);
173 //remove citation from list of calcs
174 for (int i = 0; i < Estimators.size(); i++) { if (Estimators[i] == "citation") { Estimators.erase(Estimators.begin()+i); break; } }
177 groups = validParameter.validFile(parameters, "groups", false);
178 if (groups == "not found") { groups = ""; }
180 m->splitAtDash(groups, Groups);
181 m->setGroups(Groups);
184 string temp = validParameter.validFile(parameters, "all", false); if (temp == "not found") { temp = "false"; }
185 all = m->isTrue(temp);
187 temp = validParameter.validFile(parameters, "distance", false); if (temp == "not found") { temp = "false"; }
188 createPhylip = m->isTrue(temp);
190 temp = validParameter.validFile(parameters, "iters", false); if (temp == "not found") { temp = "1000"; }
191 m->mothurConvert(temp, iters);
193 output = validParameter.validFile(parameters, "output", false);
194 if(output == "not found"){ output = "lt"; }
195 else { createPhylip = true; }
196 if ((output != "lt") && (output != "square")) { m->mothurOut(output + " is not a valid output form. Options are lt and square. I will use lt."); m->mothurOutEndLine(); output = "lt"; }
198 temp = validParameter.validFile(parameters, "subsample", false); if (temp == "not found") { temp = "F"; }
199 if (m->isNumeric1(temp)) { m->mothurConvert(temp, subsampleSize); subsample = true; }
201 if (m->isTrue(temp)) { subsample = true; subsampleSize = -1; } //we will set it to smallest group later
202 else { subsample = false; }
205 if (subsample == false) { iters = 0; }
207 temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
208 m->setProcessors(temp);
209 m->mothurConvert(temp, processors);
211 if (abort == false) {
213 ValidCalculators validCalculator;
216 for (i=0; i<Estimators.size(); i++) {
217 if (validCalculator.isValidCalculator("sharedsummary", Estimators[i]) == true) {
218 if (Estimators[i] == "sharedsobs") {
219 sumCalculators.push_back(new SharedSobsCS());
220 }else if (Estimators[i] == "sharedchao") {
221 sumCalculators.push_back(new SharedChao1());
222 }else if (Estimators[i] == "sharedace") {
223 sumCalculators.push_back(new SharedAce());
224 }else if (Estimators[i] == "jabund") {
225 sumCalculators.push_back(new JAbund());
226 }else if (Estimators[i] == "sorabund") {
227 sumCalculators.push_back(new SorAbund());
228 }else if (Estimators[i] == "jclass") {
229 sumCalculators.push_back(new Jclass());
230 }else if (Estimators[i] == "sorclass") {
231 sumCalculators.push_back(new SorClass());
232 }else if (Estimators[i] == "jest") {
233 sumCalculators.push_back(new Jest());
234 }else if (Estimators[i] == "sorest") {
235 sumCalculators.push_back(new SorEst());
236 }else if (Estimators[i] == "thetayc") {
237 sumCalculators.push_back(new ThetaYC());
238 }else if (Estimators[i] == "thetan") {
239 sumCalculators.push_back(new ThetaN());
240 }else if (Estimators[i] == "kstest") {
241 sumCalculators.push_back(new KSTest());
242 }else if (Estimators[i] == "sharednseqs") {
243 sumCalculators.push_back(new SharedNSeqs());
244 }else if (Estimators[i] == "ochiai") {
245 sumCalculators.push_back(new Ochiai());
246 }else if (Estimators[i] == "anderberg") {
247 sumCalculators.push_back(new Anderberg());
248 }else if (Estimators[i] == "kulczynski") {
249 sumCalculators.push_back(new Kulczynski());
250 }else if (Estimators[i] == "kulczynskicody") {
251 sumCalculators.push_back(new KulczynskiCody());
252 }else if (Estimators[i] == "lennon") {
253 sumCalculators.push_back(new Lennon());
254 }else if (Estimators[i] == "morisitahorn") {
255 sumCalculators.push_back(new MorHorn());
256 }else if (Estimators[i] == "braycurtis") {
257 sumCalculators.push_back(new BrayCurtis());
258 }else if (Estimators[i] == "whittaker") {
259 sumCalculators.push_back(new Whittaker());
260 }else if (Estimators[i] == "odum") {
261 sumCalculators.push_back(new Odum());
262 }else if (Estimators[i] == "canberra") {
263 sumCalculators.push_back(new Canberra());
264 }else if (Estimators[i] == "structeuclidean") {
265 sumCalculators.push_back(new StructEuclidean());
266 }else if (Estimators[i] == "structchord") {
267 sumCalculators.push_back(new StructChord());
268 }else if (Estimators[i] == "hellinger") {
269 sumCalculators.push_back(new Hellinger());
270 }else if (Estimators[i] == "manhattan") {
271 sumCalculators.push_back(new Manhattan());
272 }else if (Estimators[i] == "structpearson") {
273 sumCalculators.push_back(new StructPearson());
274 }else if (Estimators[i] == "soergel") {
275 sumCalculators.push_back(new Soergel());
276 }else if (Estimators[i] == "spearman") {
277 sumCalculators.push_back(new Spearman());
278 }else if (Estimators[i] == "structkulczynski") {
279 sumCalculators.push_back(new StructKulczynski());
280 }else if (Estimators[i] == "speciesprofile") {
281 sumCalculators.push_back(new SpeciesProfile());
282 }else if (Estimators[i] == "hamming") {
283 sumCalculators.push_back(new Hamming());
284 }else if (Estimators[i] == "structchi2") {
285 sumCalculators.push_back(new StructChi2());
286 }else if (Estimators[i] == "gower") {
287 sumCalculators.push_back(new Gower());
288 }else if (Estimators[i] == "memchi2") {
289 sumCalculators.push_back(new MemChi2());
290 }else if (Estimators[i] == "memchord") {
291 sumCalculators.push_back(new MemChord());
292 }else if (Estimators[i] == "memeuclidean") {
293 sumCalculators.push_back(new MemEuclidean());
294 }else if (Estimators[i] == "mempearson") {
295 sumCalculators.push_back(new MemPearson());
304 catch(exception& e) {
305 m->errorOut(e, "SummarySharedCommand", "SummarySharedCommand");
309 //**********************************************************************************************************************
311 int SummarySharedCommand::execute(){
314 if (abort == true) { if (calledHelp) { return 0; } return 2; }
316 ofstream outputFileHandle, outAll;
317 map<string, string> variables;
318 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
319 string outputFileName = getOutputFileName("summary",variables);
321 //if the users entered no valid calculators don't execute command
322 if (sumCalculators.size() == 0) { return 0; }
323 //check if any calcs can do multiples
326 for (int i = 0; i < sumCalculators.size(); i++) {
327 if (sumCalculators[i]->getMultiple() == true) { mult = true; }
332 input = new InputData(sharedfile, "sharedfile");
333 lookup = input->getSharedRAbundVectors();
334 string lastLabel = lookup[0]->getLabel();
336 /******************************************************/
337 //output headings for files
338 /******************************************************/
339 //output estimator names as column headers
340 m->openOutputFile(outputFileName, outputFileHandle);
341 outputFileHandle << "label" <<'\t' << "comparison" << '\t';
342 for(int i=0;i<sumCalculators.size();i++){
343 outputFileHandle << '\t' << sumCalculators[i]->getName();
344 if (sumCalculators[i]->getCols() == 3) { outputFileHandle << "\t" << sumCalculators[i]->getName() << "_lci\t" << sumCalculators[i]->getName() << "_hci"; }
346 outputFileHandle << endl;
347 outputFileHandle.close();
349 //create file and put column headers for multiple groups file
350 variables["[tag]"]= "multiple";
351 string outAllFileName = getOutputFileName("summary",variables);
353 m->openOutputFile(outAllFileName, outAll);
354 outputNames.push_back(outAllFileName);
356 outAll << "label" <<'\t' << "comparison" << '\t';
357 for(int i=0;i<sumCalculators.size();i++){
358 if (sumCalculators[i]->getMultiple() == true) {
359 outAll << '\t' << sumCalculators[i]->getName();
366 if (lookup.size() < 2) {
367 m->mothurOut("I cannot run the command without at least 2 valid groups.");
368 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
370 //close files and clean up
371 m->mothurRemove(outputFileName);
372 if (mult == true) { m->mothurRemove(outAllFileName); }
374 //if you only have 2 groups you don't need a .sharedmultiple file
375 }else if ((lookup.size() == 2) && (mult == true)) {
377 m->mothurRemove(outAllFileName);
378 outputNames.pop_back();
381 if (m->control_pressed) {
382 if (mult) { m->mothurRemove(outAllFileName); }
383 m->mothurRemove(outputFileName);
385 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
386 for(int i=0;i<sumCalculators.size();i++){ delete sumCalculators[i]; }
390 /******************************************************/
392 if (subsampleSize == -1) { //user has not set size, set size = smallest samples size
393 subsampleSize = lookup[0]->getNumSeqs();
394 for (int i = 1; i < lookup.size(); i++) {
395 int thisSize = lookup[i]->getNumSeqs();
397 if (thisSize < subsampleSize) { subsampleSize = thisSize; }
402 vector<SharedRAbundVector*> temp;
403 for (int i = 0; i < lookup.size(); i++) {
404 if (lookup[i]->getNumSeqs() < subsampleSize) {
405 m->mothurOut(lookup[i]->getGroup() + " contains " + toString(lookup[i]->getNumSeqs()) + ". Eliminating."); m->mothurOutEndLine();
408 Groups.push_back(lookup[i]->getGroup());
409 temp.push_back(lookup[i]);
413 m->setGroups(Groups);
416 if (lookup.size() < 2) { m->mothurOut("You have not provided enough valid groups. I cannot run the command."); m->mothurOutEndLine(); m->control_pressed = true; delete input; return 0; }
420 /******************************************************/
421 //comparison breakup to be used by different processes later
422 numGroups = lookup.size();
423 lines.resize(processors);
424 for (int i = 0; i < processors; i++) {
425 lines[i].start = int (sqrt(float(i)/float(processors)) * numGroups);
426 lines[i].end = int (sqrt(float(i+1)/float(processors)) * numGroups);
428 /******************************************************/
430 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
431 set<string> processedLabels;
432 set<string> userLabels = labels;
434 //as long as you are not at the end of the file or done wih the lines you want
435 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
436 if (m->control_pressed) {
437 if (mult) { m->mothurRemove(outAllFileName); }
438 m->mothurRemove(outputFileName);
440 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
441 for(int i=0;i<sumCalculators.size();i++){ delete sumCalculators[i]; }
447 if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){
448 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
449 process(lookup, outputFileName, outAllFileName);
451 processedLabels.insert(lookup[0]->getLabel());
452 userLabels.erase(lookup[0]->getLabel());
455 if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
456 string saveLabel = lookup[0]->getLabel();
458 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
459 lookup = input->getSharedRAbundVectors(lastLabel);
461 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
462 process(lookup, outputFileName, outAllFileName);
464 processedLabels.insert(lookup[0]->getLabel());
465 userLabels.erase(lookup[0]->getLabel());
467 //restore real lastlabel to save below
468 lookup[0]->setLabel(saveLabel);
471 lastLabel = lookup[0]->getLabel();
473 //get next line to process
474 //prevent memory leak
475 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
476 lookup = input->getSharedRAbundVectors();
479 if (m->control_pressed) {
480 if (mult) { m->mothurRemove(outAllFileName); }
481 m->mothurRemove(outputFileName);
483 for(int i=0;i<sumCalculators.size();i++){ delete sumCalculators[i]; }
488 //output error messages about any remaining user labels
489 set<string>::iterator it;
490 bool needToRun = false;
491 for (it = userLabels.begin(); it != userLabels.end(); it++) {
492 m->mothurOut("Your file does not include the label " + *it);
493 if (processedLabels.count(lastLabel) != 1) {
494 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
497 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
501 //run last label if you need to
502 if (needToRun == true) {
503 for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }
504 lookup = input->getSharedRAbundVectors(lastLabel);
506 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
507 process(lookup, outputFileName, outAllFileName);
508 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
512 //reset groups parameter
515 for(int i=0;i<sumCalculators.size();i++){ delete sumCalculators[i]; }
518 if (m->control_pressed) {
519 m->mothurRemove(outAllFileName);
520 m->mothurRemove(outputFileName);
524 m->mothurOutEndLine();
525 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
526 m->mothurOut(outputFileName); m->mothurOutEndLine();
527 if (mult) { m->mothurOut(outAllFileName); m->mothurOutEndLine(); outputTypes["summary"].push_back(outAllFileName); }
528 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } outputTypes["summary"].push_back(outputFileName);
529 m->mothurOutEndLine();
533 catch(exception& e) {
534 m->errorOut(e, "SummarySharedCommand", "execute");
538 /***********************************************************/
539 int SummarySharedCommand::printSims(ostream& out, vector< vector<double> >& simMatrix) {
542 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
545 out << simMatrix.size() << endl;
547 if (output == "lt") {
548 for (int b = 0; b < simMatrix.size(); b++) {
549 out << lookup[b]->getGroup() << '\t';
550 for (int n = 0; n < b; n++) {
551 if (m->control_pressed) { return 0; }
552 out << simMatrix[b][n] << '\t';
557 for (int b = 0; b < simMatrix.size(); m++) {
558 out << lookup[b]->getGroup() << '\t';
559 for (int n = 0; n < simMatrix[b].size(); n++) {
560 if (m->control_pressed) { return 0; }
561 out << simMatrix[b][n] << '\t';
569 catch(exception& e) {
570 m->errorOut(e, "SummarySharedCommand", "printSims");
574 /***********************************************************/
575 int SummarySharedCommand::process(vector<SharedRAbundVector*> thisLookup, string sumFileName, string sumAllFileName) {
577 vector< vector< vector<seqDist> > > calcDistsTotals; //each iter, one for each calc, then each groupCombos dists. this will be used to make .dist files
578 vector< vector<seqDist> > calcDists; calcDists.resize(sumCalculators.size());
580 for (int thisIter = 0; thisIter < iters+1; thisIter++) {
582 vector<SharedRAbundVector*> thisItersLookup = thisLookup;
584 if (subsample && (thisIter != 0)) { //we want the summary results for the whole dataset, then the subsampling
586 vector<string> tempLabels; //dont need since we arent printing the sampled sharedRabunds
588 //make copy of lookup so we don't get access violations
589 vector<SharedRAbundVector*> newLookup;
590 for (int k = 0; k < thisItersLookup.size(); k++) {
591 SharedRAbundVector* temp = new SharedRAbundVector();
592 temp->setLabel(thisItersLookup[k]->getLabel());
593 temp->setGroup(thisItersLookup[k]->getGroup());
594 newLookup.push_back(temp);
598 for (int k = 0; k < thisItersLookup[0]->getNumBins(); k++) {
599 if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } return 0; }
600 for (int j = 0; j < thisItersLookup.size(); j++) { newLookup[j]->push_back(thisItersLookup[j]->getAbundance(k), thisItersLookup[j]->getGroup()); }
603 tempLabels = sample.getSample(newLookup, subsampleSize);
604 thisItersLookup = newLookup;
609 driver(thisLookup, 0, numGroups, sumFileName+".temp", sumAllFileName+".temp", calcDists);
610 m->appendFiles((sumFileName + ".temp"), sumFileName);
611 m->mothurRemove((sumFileName + ".temp"));
613 m->appendFiles((sumAllFileName + ".temp"), sumAllFileName);
614 m->mothurRemove((sumAllFileName + ".temp"));
619 vector<int> processIDS;
621 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
622 //loop through and create all the processes you want
623 while (process != processors) {
627 processIDS.push_back(pid);
630 driver(thisLookup, lines[process].start, lines[process].end, sumFileName + toString(getpid()) + ".temp", sumAllFileName + toString(getpid()) + ".temp", calcDists);
632 //only do this if you want a distance file
634 string tempdistFileName = m->getRootName(m->getSimpleName(sumFileName)) + toString(getpid()) + ".dist";
636 m->openOutputFile(tempdistFileName, outtemp);
638 for (int i = 0; i < calcDists.size(); i++) {
639 outtemp << calcDists[i].size() << endl;
641 for (int j = 0; j < calcDists[i].size(); j++) {
642 outtemp << calcDists[i][j].seq1 << '\t' << calcDists[i][j].seq2 << '\t' << calcDists[i][j].dist << endl;
650 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
651 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
656 //parent do your part
657 driver(thisLookup, lines[0].start, lines[0].end, sumFileName + toString(getpid()) + ".temp", sumAllFileName + toString(getpid()) + ".temp", calcDists);
658 m->appendFiles((sumFileName + toString(getpid()) + ".temp"), sumFileName);
659 m->mothurRemove((sumFileName + toString(getpid()) + ".temp"));
660 if (mult) { m->appendFiles((sumAllFileName + toString(getpid()) + ".temp"), sumAllFileName); }
662 //force parent to wait until all the processes are done
663 for (int i = 0; i < processIDS.size(); i++) {
664 int temp = processIDS[i];
668 for (int i = 0; i < processIDS.size(); i++) {
669 m->appendFiles((sumFileName + toString(processIDS[i]) + ".temp"), sumFileName);
670 m->mothurRemove((sumFileName + toString(processIDS[i]) + ".temp"));
671 if (mult) { m->mothurRemove((sumAllFileName + toString(processIDS[i]) + ".temp")); }
674 string tempdistFileName = m->getRootName(m->getSimpleName(sumFileName)) + toString(processIDS[i]) + ".dist";
676 m->openInputFile(tempdistFileName, intemp);
678 for (int k = 0; k < calcDists.size(); k++) {
680 intemp >> size; m->gobble(intemp);
682 for (int j = 0; j < size; j++) {
687 intemp >> seq1 >> seq2 >> dist; m->gobble(intemp);
689 seqDist tempDist(seq1, seq2, dist);
690 calcDists[k].push_back(tempDist);
694 m->mothurRemove(tempdistFileName);
698 //////////////////////////////////////////////////////////////////////////////////////////////////////
699 //Windows version shared memory, so be careful when passing variables through the summarySharedData struct.
700 //Above fork() will clone, so memory is separate, but that's not the case with windows,
701 //Taking advantage of shared memory to pass results vectors.
702 //////////////////////////////////////////////////////////////////////////////////////////////////////
704 vector<summarySharedData*> pDataArray;
705 DWORD dwThreadIdArray[processors-1];
706 HANDLE hThreadArray[processors-1];
708 //Create processor worker threads.
709 for( int i=1; i<processors; i++ ){
711 //make copy of lookup so we don't get access violations
712 vector<SharedRAbundVector*> newLookup;
713 for (int k = 0; k < thisLookup.size(); k++) {
714 SharedRAbundVector* temp = new SharedRAbundVector();
715 temp->setLabel(thisLookup[k]->getLabel());
716 temp->setGroup(thisLookup[k]->getGroup());
717 newLookup.push_back(temp);
722 for (int k = 0; k < thisLookup[0]->getNumBins(); k++) {
723 if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } return 0; }
724 for (int j = 0; j < thisLookup.size(); j++) { newLookup[j]->push_back(thisLookup[j]->getAbundance(k), thisLookup[j]->getGroup()); }
727 // Allocate memory for thread data.
728 summarySharedData* tempSum = new summarySharedData((sumFileName+toString(i)+".temp"), m, lines[i].start, lines[i].end, Estimators, newLookup);
729 pDataArray.push_back(tempSum);
730 processIDS.push_back(i);
732 hThreadArray[i-1] = CreateThread(NULL, 0, MySummarySharedThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);
735 //parent do your part
736 driver(thisLookup, lines[0].start, lines[0].end, sumFileName +"0.temp", sumAllFileName + "0.temp", calcDists);
737 m->appendFiles((sumFileName + "0.temp"), sumFileName);
738 m->mothurRemove((sumFileName + "0.temp"));
739 if (mult) { m->appendFiles((sumAllFileName + "0.temp"), sumAllFileName); }
741 //Wait until all threads have terminated.
742 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
744 //Close all thread handles and free memory allocations.
745 for(int i=0; i < pDataArray.size(); i++){
746 if (pDataArray[i]->count != (pDataArray[i]->end-pDataArray[i]->start)) {
747 m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of " + toString(pDataArray[i]->end-pDataArray[i]->start) + " groups assigned to it, quitting. \n"); m->control_pressed = true;
749 m->appendFiles((sumFileName + toString(processIDS[i]) + ".temp"), sumFileName);
750 m->mothurRemove((sumFileName + toString(processIDS[i]) + ".temp"));
752 for (int j = 0; j < pDataArray[i]->thisLookup.size(); j++) { delete pDataArray[i]->thisLookup[j]; }
755 for (int k = 0; k < calcDists.size(); k++) {
756 int size = pDataArray[i]->calcDists[k].size();
757 for (int j = 0; j < size; j++) { calcDists[k].push_back(pDataArray[i]->calcDists[k][j]); }
761 CloseHandle(hThreadArray[i]);
762 delete pDataArray[i];
768 if (subsample && (thisIter != 0)) { //we want the summary results for the whole dataset, then the subsampling
770 calcDistsTotals.push_back(calcDists);
772 for (int i = 0; i < thisItersLookup.size(); i++) { delete thisItersLookup[i]; }
773 thisItersLookup.clear();
776 for (int i = 0; i < calcDists.size(); i++) {
777 if (m->control_pressed) { break; }
780 vector< vector<double> > matrix; //square matrix to represent the distance
781 matrix.resize(thisLookup.size());
782 for (int k = 0; k < thisLookup.size(); k++) { matrix[k].resize(thisLookup.size(), 0.0); }
784 for (int j = 0; j < calcDists[i].size(); j++) {
785 int row = calcDists[i][j].seq1;
786 int column = calcDists[i][j].seq2;
787 double dist = calcDists[i][j].dist;
789 matrix[row][column] = dist;
790 matrix[column][row] = dist;
793 map<string, string> variables;
794 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
795 variables["[calc]"] = sumCalculators[i]->getName();
796 variables["[distance]"] = thisLookup[0]->getLabel();
797 variables["[outputtag]"] = output;
798 variables["[tag2]"] = "";
799 string distFileName = getOutputFileName("phylip",variables);
800 outputNames.push_back(distFileName); outputTypes["phylip"].push_back(distFileName);
802 m->openOutputFile(distFileName, outDist);
803 outDist.setf(ios::fixed, ios::floatfield); outDist.setf(ios::showpoint);
805 printSims(outDist, matrix);
811 for (int i = 0; i < calcDists.size(); i++) { calcDists[i].clear(); }
815 //we need to find the average distance and standard deviation for each groups distance
816 vector< vector<seqDist> > calcAverages = m->getAverages(calcDistsTotals);
818 //find standard deviation
819 vector< vector<seqDist> > stdDev = m->getStandardDeviation(calcDistsTotals, calcAverages);
822 for (int i = 0; i < calcDists.size(); i++) {
823 vector< vector<double> > matrix; //square matrix to represent the distance
824 matrix.resize(thisLookup.size());
825 for (int k = 0; k < thisLookup.size(); k++) { matrix[k].resize(thisLookup.size(), 0.0); }
827 vector< vector<double> > stdmatrix; //square matrix to represent the stdDev
828 stdmatrix.resize(thisLookup.size());
829 for (int k = 0; k < thisLookup.size(); k++) { stdmatrix[k].resize(thisLookup.size(), 0.0); }
832 for (int j = 0; j < calcAverages[i].size(); j++) {
833 int row = calcAverages[i][j].seq1;
834 int column = calcAverages[i][j].seq2;
835 float dist = calcAverages[i][j].dist;
836 float stdDist = stdDev[i][j].dist;
838 matrix[row][column] = dist;
839 matrix[column][row] = dist;
840 stdmatrix[row][column] = stdDist;
841 stdmatrix[column][row] = stdDist;
844 map<string, string> variables;
845 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
846 variables["[calc]"] = sumCalculators[i]->getName();
847 variables["[distance]"] = thisLookup[0]->getLabel();
848 variables["[outputtag]"] = output;
849 variables["[tag2]"] = "ave";
850 string distFileName = getOutputFileName("phylip",variables);
851 outputNames.push_back(distFileName); outputTypes["phylip"].push_back(distFileName);
853 m->openOutputFile(distFileName, outAve);
854 outAve.setf(ios::fixed, ios::floatfield); outAve.setf(ios::showpoint);
856 printSims(outAve, matrix);
860 variables["[tag2]"] = "std";
861 distFileName = getOutputFileName("phylip",variables);
862 outputNames.push_back(distFileName); outputTypes["phylip"].push_back(distFileName);
864 m->openOutputFile(distFileName, outSTD);
865 outSTD.setf(ios::fixed, ios::floatfield); outSTD.setf(ios::showpoint);
867 printSims(outSTD, stdmatrix);
876 catch(exception& e) {
877 m->errorOut(e, "SummarySharedCommand", "process");
881 /**************************************************************************************************/
882 int SummarySharedCommand::driver(vector<SharedRAbundVector*> thisLookup, int start, int end, string sumFile, string sumAllFile, vector< vector<seqDist> >& calcDists) {
885 //loop through calculators and add to file all for all calcs that can do mutiple groups
888 m->openOutputFile(sumAllFile, outAll);
891 outAll << thisLookup[0]->getLabel() << '\t';
893 //output groups names
894 string outNames = "";
895 for (int j = 0; j < thisLookup.size(); j++) {
896 outNames += thisLookup[j]->getGroup() + "-";
898 outNames = outNames.substr(0, outNames.length()-1); //rip off extra '-';
899 outAll << outNames << '\t';
901 for(int i=0;i<sumCalculators.size();i++){
902 if (sumCalculators[i]->getMultiple() == true) {
903 sumCalculators[i]->getValues(thisLookup);
905 if (m->control_pressed) { outAll.close(); return 1; }
908 sumCalculators[i]->print(outAll);
915 ofstream outputFileHandle;
916 m->openOutputFile(sumFile, outputFileHandle);
918 vector<SharedRAbundVector*> subset;
919 for (int k = start; k < end; k++) { // pass cdd each set of groups to compare
921 for (int l = 0; l < k; l++) {
923 outputFileHandle << thisLookup[0]->getLabel() << '\t';
925 subset.clear(); //clear out old pair of sharedrabunds
926 //add new pair of sharedrabunds
927 subset.push_back(thisLookup[k]); subset.push_back(thisLookup[l]);
929 //sort groups to be alphanumeric
930 if (thisLookup[k]->getGroup() > thisLookup[l]->getGroup()) {
931 outputFileHandle << (thisLookup[l]->getGroup() +'\t' + thisLookup[k]->getGroup()) << '\t'; //print out groups
933 outputFileHandle << (thisLookup[k]->getGroup() +'\t' + thisLookup[l]->getGroup()) << '\t'; //print out groups
936 for(int i=0;i<sumCalculators.size();i++) {
938 //if this calc needs all groups to calculate the pair load all groups
939 if (sumCalculators[i]->getNeedsAll()) {
940 //load subset with rest of lookup for those calcs that need everyone to calc for a pair
941 for (int w = 0; w < thisLookup.size(); w++) {
942 if ((w != k) && (w != l)) { subset.push_back(thisLookup[w]); }
946 vector<double> tempdata = sumCalculators[i]->getValues(subset); //saves the calculator outputs
948 if (m->control_pressed) { outputFileHandle.close(); return 1; }
950 outputFileHandle << '\t';
951 sumCalculators[i]->print(outputFileHandle);
953 seqDist temp(l, k, tempdata[0]);
954 calcDists[i].push_back(temp);
956 outputFileHandle << endl;
960 outputFileHandle.close();
964 catch(exception& e) {
965 m->errorOut(e, "SummarySharedCommand", "driver");
969 /**************************************************************************************************/