2 * summarysharedcommand.cpp
5 * Created by Sarah Westcott on 1/2/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "summarysharedcommand.h"
11 #include "subsample.h"
13 //**********************************************************************************************************************
14 vector<string> SummarySharedCommand::setParameters(){
16 CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","summary",false,true,true); parameters.push_back(pshared);
17 CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
18 CommandParameter psubsample("subsample", "String", "", "", "", "", "","phylip",false,false); parameters.push_back(psubsample);
19 CommandParameter pdistance("distance", "Boolean", "", "F", "", "", "","phylip",false,false); parameters.push_back(pdistance);
20 CommandParameter pcalc("calc", "Multiple", "sharedchao-sharedsobs-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan-kstest-whittaker-sharednseqs-ochiai-anderberg-kulczynski-kulczynskicody-lennon-morisitahorn-braycurtis-odum-canberra-structeuclidean-structchord-hellinger-manhattan-structpearson-soergel-spearman-structkulczynski-speciesprofile-structchi2-hamming-gower-memchi2-memchord-memeuclidean-mempearson", "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan", "", "", "","",true,false,true); parameters.push_back(pcalc);
21 CommandParameter poutput("output", "Multiple", "lt-square", "lt", "", "", "","",false,false); parameters.push_back(poutput);
22 CommandParameter pall("all", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pall);
23 CommandParameter piters("iters", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(piters);
24 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
25 CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
26 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
27 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
29 vector<string> myArray;
30 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
34 m->errorOut(e, "SummarySharedCommand", "setParameters");
38 //**********************************************************************************************************************
39 string SummarySharedCommand::getHelpString(){
41 string helpString = "";
42 ValidCalculators validCalculator;
43 helpString += "The summary.shared command parameters are shared, label, calc, distance, processors, subsample, iters and all. shared is required if there is no current sharedfile.\n";
44 helpString += "The summary.shared command should be in the following format: \n";
45 helpString += "summary.shared(label=yourLabel, calc=yourEstimators, groups=yourGroups).\n";
46 helpString += "Example summary.shared(label=unique-.01-.03, groups=B-C, calc=sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan).\n";
47 helpString += validCalculator.printCalc("sharedsummary");
48 helpString += "The iters parameter allows you to choose the number of times you would like to run the subsample.\n";
49 helpString += "The subsample parameter allows you to enter the size pergroup of the sample or you can set subsample=T and mothur will use the size of your smallest group.\n";
50 helpString += "The output parameter allows you to specify format of your distance matrix. Options are lt, and square. The default is lt.\n";
51 helpString += "The default value for calc is sharedsobs-sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan\n";
52 helpString += "The default value for groups is all the groups in your groupfile.\n";
53 helpString += "The distance parameter allows you to indicate you would like a distance file created for each calculator for each label, default=f.\n";
54 helpString += "The label parameter is used to analyze specific labels in your input.\n";
55 helpString += "The all parameter is used to specify if you want the estimate of all your groups together. This estimate can only be made for sharedsobs and sharedchao calculators. The default is false.\n";
56 helpString += "If you use sharedchao and run into memory issues, set all to false. \n";
57 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 2 valid groups.\n";
58 helpString += "Note: No spaces between parameter labels (i.e. label), '=' and parameters (i.e.yourLabel).\n";
62 m->errorOut(e, "SummarySharedCommand", "getHelpString");
66 //**********************************************************************************************************************
67 string SummarySharedCommand::getOutputPattern(string type) {
71 if (type == "summary") { pattern = "[filename],summary-[filename],[tag],summary"; }
72 else if (type == "phylip") { pattern = "[filename],[calc],[distance],[outputtag],[tag2],dist"; }
73 else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
78 m->errorOut(e, "SummarySharedCommand", "getOutputPattern");
82 //**********************************************************************************************************************
83 SummarySharedCommand::SummarySharedCommand(){
85 abort = true; calledHelp = true;
87 vector<string> tempOutNames;
88 outputTypes["summary"] = tempOutNames;
89 outputTypes["phylip"] = tempOutNames;
92 m->errorOut(e, "SummarySharedCommand", "SummarySharedCommand");
96 //**********************************************************************************************************************
98 SummarySharedCommand::SummarySharedCommand(string option) {
100 abort = false; calledHelp = false;
103 //allow user to run help
104 if(option == "help") { help(); abort = true; calledHelp = true; }
105 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
108 vector<string> myArray = setParameters();
110 OptionParser parser(option);
111 map<string, string> parameters = parser.getParameters();
112 map<string, string>::iterator it;
114 ValidParameters validParameter;
116 //check to make sure all parameters are valid for command
117 for (map<string, string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
118 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
121 //initialize outputTypes
122 vector<string> tempOutNames;
123 outputTypes["summary"] = tempOutNames;
124 outputTypes["phylip"] = tempOutNames;
126 //if the user changes the input directory command factory will send this info to us in the output parameter
127 string inputDir = validParameter.validFile(parameters, "inputdir", false);
128 if (inputDir == "not found"){ inputDir = ""; }
131 it = parameters.find("shared");
132 //user has given a template file
133 if(it != parameters.end()){
134 path = m->hasPath(it->second);
135 //if the user has not given a path then, add inputdir. else leave path alone.
136 if (path == "") { parameters["shared"] = inputDir + it->second; }
141 sharedfile = validParameter.validFile(parameters, "shared", true);
142 if (sharedfile == "not open") { sharedfile = ""; abort = true; }
143 else if (sharedfile == "not found") {
144 //if there is a current shared file, use it
145 sharedfile = m->getSharedFile();
146 if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
147 else { m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; }
148 }else { m->setSharedFile(sharedfile); }
151 //if the user changes the output directory command factory will send this info to us in the output parameter
152 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = m->hasPath(sharedfile); }
155 //check for optional parameter and set defaults
156 // ...at some point should added some additional type checking...
157 label = validParameter.validFile(parameters, "label", false);
158 if (label == "not found") { label = ""; }
160 if(label != "all") { m->splitAtDash(label, labels); allLines = 0; }
161 else { allLines = 1; }
165 calc = validParameter.validFile(parameters, "calc", false);
166 if (calc == "not found") { calc = "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan"; }
168 if (calc == "default") { calc = "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan"; }
170 m->splitAtDash(calc, Estimators);
171 if (m->inUsersGroups("citation", Estimators)) {
172 ValidCalculators validCalc; validCalc.printCitations(Estimators);
173 //remove citation from list of calcs
174 for (int i = 0; i < Estimators.size(); i++) { if (Estimators[i] == "citation") { Estimators.erase(Estimators.begin()+i); break; } }
177 groups = validParameter.validFile(parameters, "groups", false);
178 if (groups == "not found") { groups = ""; }
180 m->splitAtDash(groups, Groups);
181 m->setGroups(Groups);
184 string temp = validParameter.validFile(parameters, "all", false); if (temp == "not found") { temp = "false"; }
185 all = m->isTrue(temp);
187 temp = validParameter.validFile(parameters, "iters", false); if (temp == "not found") { temp = "1000"; }
188 m->mothurConvert(temp, iters);
190 output = validParameter.validFile(parameters, "output", false);
191 if(output == "not found"){ output = "lt"; }
192 else { createPhylip = true; }
193 if ((output != "lt") && (output != "square")) { m->mothurOut(output + " is not a valid output form. Options are lt and square. I will use lt."); m->mothurOutEndLine(); output = "lt"; }
195 temp = validParameter.validFile(parameters, "subsample", false); if (temp == "not found") { temp = "F"; }
196 if (m->isNumeric1(temp)) { m->mothurConvert(temp, subsampleSize); subsample = true; }
198 if (m->isTrue(temp)) { subsample = true; subsampleSize = -1; } //we will set it to smallest group later
199 else { subsample = false; }
202 if (subsample == false) { iters = 0; }
204 temp = validParameter.validFile(parameters, "distance", false); if (temp == "not found") { temp = "false"; }
205 createPhylip = m->isTrue(temp);
206 if (subsample) { createPhylip = true; }
208 temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
209 m->setProcessors(temp);
210 m->mothurConvert(temp, processors);
212 if (abort == false) {
214 ValidCalculators validCalculator;
217 for (i=0; i<Estimators.size(); i++) {
218 if (validCalculator.isValidCalculator("sharedsummary", Estimators[i]) == true) {
219 if (Estimators[i] == "sharedsobs") {
220 sumCalculators.push_back(new SharedSobsCS());
221 }else if (Estimators[i] == "sharedchao") {
222 sumCalculators.push_back(new SharedChao1());
223 }else if (Estimators[i] == "sharedace") {
224 sumCalculators.push_back(new SharedAce());
225 }else if (Estimators[i] == "jabund") {
226 sumCalculators.push_back(new JAbund());
227 }else if (Estimators[i] == "sorabund") {
228 sumCalculators.push_back(new SorAbund());
229 }else if (Estimators[i] == "jclass") {
230 sumCalculators.push_back(new Jclass());
231 }else if (Estimators[i] == "sorclass") {
232 sumCalculators.push_back(new SorClass());
233 }else if (Estimators[i] == "jest") {
234 sumCalculators.push_back(new Jest());
235 }else if (Estimators[i] == "sorest") {
236 sumCalculators.push_back(new SorEst());
237 }else if (Estimators[i] == "thetayc") {
238 sumCalculators.push_back(new ThetaYC());
239 }else if (Estimators[i] == "thetan") {
240 sumCalculators.push_back(new ThetaN());
241 }else if (Estimators[i] == "kstest") {
242 sumCalculators.push_back(new KSTest());
243 }else if (Estimators[i] == "sharednseqs") {
244 sumCalculators.push_back(new SharedNSeqs());
245 }else if (Estimators[i] == "ochiai") {
246 sumCalculators.push_back(new Ochiai());
247 }else if (Estimators[i] == "anderberg") {
248 sumCalculators.push_back(new Anderberg());
249 }else if (Estimators[i] == "kulczynski") {
250 sumCalculators.push_back(new Kulczynski());
251 }else if (Estimators[i] == "kulczynskicody") {
252 sumCalculators.push_back(new KulczynskiCody());
253 }else if (Estimators[i] == "lennon") {
254 sumCalculators.push_back(new Lennon());
255 }else if (Estimators[i] == "morisitahorn") {
256 sumCalculators.push_back(new MorHorn());
257 }else if (Estimators[i] == "braycurtis") {
258 sumCalculators.push_back(new BrayCurtis());
259 }else if (Estimators[i] == "whittaker") {
260 sumCalculators.push_back(new Whittaker());
261 }else if (Estimators[i] == "odum") {
262 sumCalculators.push_back(new Odum());
263 }else if (Estimators[i] == "canberra") {
264 sumCalculators.push_back(new Canberra());
265 }else if (Estimators[i] == "structeuclidean") {
266 sumCalculators.push_back(new StructEuclidean());
267 }else if (Estimators[i] == "structchord") {
268 sumCalculators.push_back(new StructChord());
269 }else if (Estimators[i] == "hellinger") {
270 sumCalculators.push_back(new Hellinger());
271 }else if (Estimators[i] == "manhattan") {
272 sumCalculators.push_back(new Manhattan());
273 }else if (Estimators[i] == "structpearson") {
274 sumCalculators.push_back(new StructPearson());
275 }else if (Estimators[i] == "soergel") {
276 sumCalculators.push_back(new Soergel());
277 }else if (Estimators[i] == "spearman") {
278 sumCalculators.push_back(new Spearman());
279 }else if (Estimators[i] == "structkulczynski") {
280 sumCalculators.push_back(new StructKulczynski());
281 }else if (Estimators[i] == "speciesprofile") {
282 sumCalculators.push_back(new SpeciesProfile());
283 }else if (Estimators[i] == "hamming") {
284 sumCalculators.push_back(new Hamming());
285 }else if (Estimators[i] == "structchi2") {
286 sumCalculators.push_back(new StructChi2());
287 }else if (Estimators[i] == "gower") {
288 sumCalculators.push_back(new Gower());
289 }else if (Estimators[i] == "memchi2") {
290 sumCalculators.push_back(new MemChi2());
291 }else if (Estimators[i] == "memchord") {
292 sumCalculators.push_back(new MemChord());
293 }else if (Estimators[i] == "memeuclidean") {
294 sumCalculators.push_back(new MemEuclidean());
295 }else if (Estimators[i] == "mempearson") {
296 sumCalculators.push_back(new MemPearson());
305 catch(exception& e) {
306 m->errorOut(e, "SummarySharedCommand", "SummarySharedCommand");
310 //**********************************************************************************************************************
312 int SummarySharedCommand::execute(){
315 if (abort == true) { if (calledHelp) { return 0; } return 2; }
317 ofstream outputFileHandle, outAll;
318 map<string, string> variables;
319 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
320 string outputFileName = getOutputFileName("summary",variables);
322 //if the users entered no valid calculators don't execute command
323 if (sumCalculators.size() == 0) { return 0; }
324 //check if any calcs can do multiples
327 for (int i = 0; i < sumCalculators.size(); i++) {
328 if (sumCalculators[i]->getMultiple() == true) { mult = true; }
333 input = new InputData(sharedfile, "sharedfile");
334 lookup = input->getSharedRAbundVectors();
335 string lastLabel = lookup[0]->getLabel();
337 /******************************************************/
338 //output headings for files
339 /******************************************************/
340 //output estimator names as column headers
341 m->openOutputFile(outputFileName, outputFileHandle);
342 outputFileHandle << "label" <<'\t' << "comparison" << '\t';
343 for(int i=0;i<sumCalculators.size();i++){
344 outputFileHandle << '\t' << sumCalculators[i]->getName();
345 if (sumCalculators[i]->getCols() == 3) { outputFileHandle << "\t" << sumCalculators[i]->getName() << "_lci\t" << sumCalculators[i]->getName() << "_hci"; }
347 outputFileHandle << endl;
348 outputFileHandle.close();
350 //create file and put column headers for multiple groups file
351 variables["[tag]"]= "multiple";
352 string outAllFileName = getOutputFileName("summary",variables);
354 m->openOutputFile(outAllFileName, outAll);
355 outputNames.push_back(outAllFileName);
357 outAll << "label" <<'\t' << "comparison" << '\t';
358 for(int i=0;i<sumCalculators.size();i++){
359 if (sumCalculators[i]->getMultiple() == true) {
360 outAll << '\t' << sumCalculators[i]->getName();
367 if (lookup.size() < 2) {
368 m->mothurOut("I cannot run the command without at least 2 valid groups.");
369 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
371 //close files and clean up
372 m->mothurRemove(outputFileName);
373 if (mult == true) { m->mothurRemove(outAllFileName); }
375 //if you only have 2 groups you don't need a .sharedmultiple file
376 }else if ((lookup.size() == 2) && (mult == true)) {
378 m->mothurRemove(outAllFileName);
379 outputNames.pop_back();
382 if (m->control_pressed) {
383 if (mult) { m->mothurRemove(outAllFileName); }
384 m->mothurRemove(outputFileName);
386 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
387 for(int i=0;i<sumCalculators.size();i++){ delete sumCalculators[i]; }
391 /******************************************************/
393 if (subsampleSize == -1) { //user has not set size, set size = smallest samples size
394 subsampleSize = lookup[0]->getNumSeqs();
395 for (int i = 1; i < lookup.size(); i++) {
396 int thisSize = lookup[i]->getNumSeqs();
398 if (thisSize < subsampleSize) { subsampleSize = thisSize; }
403 vector<SharedRAbundVector*> temp;
404 for (int i = 0; i < lookup.size(); i++) {
405 if (lookup[i]->getNumSeqs() < subsampleSize) {
406 m->mothurOut(lookup[i]->getGroup() + " contains " + toString(lookup[i]->getNumSeqs()) + ". Eliminating."); m->mothurOutEndLine();
409 Groups.push_back(lookup[i]->getGroup());
410 temp.push_back(lookup[i]);
414 m->setGroups(Groups);
417 if (lookup.size() < 2) { m->mothurOut("You have not provided enough valid groups. I cannot run the command."); m->mothurOutEndLine(); m->control_pressed = true; delete input; return 0; }
421 /******************************************************/
422 //comparison breakup to be used by different processes later
423 numGroups = lookup.size();
424 lines.resize(processors);
425 for (int i = 0; i < processors; i++) {
426 lines[i].start = int (sqrt(float(i)/float(processors)) * numGroups);
427 lines[i].end = int (sqrt(float(i+1)/float(processors)) * numGroups);
429 /******************************************************/
431 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
432 set<string> processedLabels;
433 set<string> userLabels = labels;
435 //as long as you are not at the end of the file or done wih the lines you want
436 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
437 if (m->control_pressed) {
438 if (mult) { m->mothurRemove(outAllFileName); }
439 m->mothurRemove(outputFileName);
441 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
442 for(int i=0;i<sumCalculators.size();i++){ delete sumCalculators[i]; }
448 if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){
449 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
450 process(lookup, outputFileName, outAllFileName);
452 processedLabels.insert(lookup[0]->getLabel());
453 userLabels.erase(lookup[0]->getLabel());
456 if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
457 string saveLabel = lookup[0]->getLabel();
459 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
460 lookup = input->getSharedRAbundVectors(lastLabel);
462 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
463 process(lookup, outputFileName, outAllFileName);
465 processedLabels.insert(lookup[0]->getLabel());
466 userLabels.erase(lookup[0]->getLabel());
468 //restore real lastlabel to save below
469 lookup[0]->setLabel(saveLabel);
472 lastLabel = lookup[0]->getLabel();
474 //get next line to process
475 //prevent memory leak
476 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
477 lookup = input->getSharedRAbundVectors();
480 if (m->control_pressed) {
481 if (mult) { m->mothurRemove(outAllFileName); }
482 m->mothurRemove(outputFileName);
484 for(int i=0;i<sumCalculators.size();i++){ delete sumCalculators[i]; }
489 //output error messages about any remaining user labels
490 set<string>::iterator it;
491 bool needToRun = false;
492 for (it = userLabels.begin(); it != userLabels.end(); it++) {
493 m->mothurOut("Your file does not include the label " + *it);
494 if (processedLabels.count(lastLabel) != 1) {
495 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
498 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
502 //run last label if you need to
503 if (needToRun == true) {
504 for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }
505 lookup = input->getSharedRAbundVectors(lastLabel);
507 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
508 process(lookup, outputFileName, outAllFileName);
509 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
513 //reset groups parameter
516 for(int i=0;i<sumCalculators.size();i++){ delete sumCalculators[i]; }
519 if (m->control_pressed) {
520 m->mothurRemove(outAllFileName);
521 m->mothurRemove(outputFileName);
525 m->mothurOutEndLine();
526 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
527 m->mothurOut(outputFileName); m->mothurOutEndLine();
528 if (mult) { m->mothurOut(outAllFileName); m->mothurOutEndLine(); outputTypes["summary"].push_back(outAllFileName); }
529 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } outputTypes["summary"].push_back(outputFileName);
530 m->mothurOutEndLine();
534 catch(exception& e) {
535 m->errorOut(e, "SummarySharedCommand", "execute");
539 /***********************************************************/
540 int SummarySharedCommand::printSims(ostream& out, vector< vector<double> >& simMatrix) {
543 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
546 out << simMatrix.size() << endl;
548 if (output == "lt") {
549 for (int b = 0; b < simMatrix.size(); b++) {
550 out << lookup[b]->getGroup() << '\t';
551 for (int n = 0; n < b; n++) {
552 if (m->control_pressed) { return 0; }
553 out << simMatrix[b][n] << '\t';
558 for (int b = 0; b < simMatrix.size(); m++) {
559 out << lookup[b]->getGroup() << '\t';
560 for (int n = 0; n < simMatrix[b].size(); n++) {
561 if (m->control_pressed) { return 0; }
562 out << simMatrix[b][n] << '\t';
570 catch(exception& e) {
571 m->errorOut(e, "SummarySharedCommand", "printSims");
575 /***********************************************************/
576 int SummarySharedCommand::process(vector<SharedRAbundVector*> thisLookup, string sumFileName, string sumAllFileName) {
578 vector< vector< vector<seqDist> > > calcDistsTotals; //each iter, one for each calc, then each groupCombos dists. this will be used to make .dist files
579 vector< vector<seqDist> > calcDists; calcDists.resize(sumCalculators.size());
581 for (int thisIter = 0; thisIter < iters+1; thisIter++) {
583 vector<SharedRAbundVector*> thisItersLookup = thisLookup;
585 if (subsample && (thisIter != 0)) { //we want the summary results for the whole dataset, then the subsampling
587 vector<string> tempLabels; //dont need since we arent printing the sampled sharedRabunds
589 //make copy of lookup so we don't get access violations
590 vector<SharedRAbundVector*> newLookup;
591 for (int k = 0; k < thisItersLookup.size(); k++) {
592 SharedRAbundVector* temp = new SharedRAbundVector();
593 temp->setLabel(thisItersLookup[k]->getLabel());
594 temp->setGroup(thisItersLookup[k]->getGroup());
595 newLookup.push_back(temp);
599 for (int k = 0; k < thisItersLookup[0]->getNumBins(); k++) {
600 if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } return 0; }
601 for (int j = 0; j < thisItersLookup.size(); j++) { newLookup[j]->push_back(thisItersLookup[j]->getAbundance(k), thisItersLookup[j]->getGroup()); }
604 tempLabels = sample.getSample(newLookup, subsampleSize);
605 thisItersLookup = newLookup;
610 driver(thisLookup, 0, numGroups, sumFileName+".temp", sumAllFileName+".temp", calcDists);
611 m->appendFiles((sumFileName + ".temp"), sumFileName);
612 m->mothurRemove((sumFileName + ".temp"));
614 m->appendFiles((sumAllFileName + ".temp"), sumAllFileName);
615 m->mothurRemove((sumAllFileName + ".temp"));
620 vector<int> processIDS;
622 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
623 //loop through and create all the processes you want
624 while (process != processors) {
628 processIDS.push_back(pid);
631 driver(thisLookup, lines[process].start, lines[process].end, sumFileName + toString(getpid()) + ".temp", sumAllFileName + toString(getpid()) + ".temp", calcDists);
633 //only do this if you want a distance file
635 string tempdistFileName = m->getRootName(m->getSimpleName(sumFileName)) + toString(getpid()) + ".dist";
637 m->openOutputFile(tempdistFileName, outtemp);
639 for (int i = 0; i < calcDists.size(); i++) {
640 outtemp << calcDists[i].size() << endl;
642 for (int j = 0; j < calcDists[i].size(); j++) {
643 outtemp << calcDists[i][j].seq1 << '\t' << calcDists[i][j].seq2 << '\t' << calcDists[i][j].dist << endl;
651 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
652 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
657 //parent do your part
658 driver(thisLookup, lines[0].start, lines[0].end, sumFileName + toString(getpid()) + ".temp", sumAllFileName + toString(getpid()) + ".temp", calcDists);
659 m->appendFiles((sumFileName + toString(getpid()) + ".temp"), sumFileName);
660 m->mothurRemove((sumFileName + toString(getpid()) + ".temp"));
661 if (mult) { m->appendFiles((sumAllFileName + toString(getpid()) + ".temp"), sumAllFileName); }
663 //force parent to wait until all the processes are done
664 for (int i = 0; i < processIDS.size(); i++) {
665 int temp = processIDS[i];
669 for (int i = 0; i < processIDS.size(); i++) {
670 m->appendFiles((sumFileName + toString(processIDS[i]) + ".temp"), sumFileName);
671 m->mothurRemove((sumFileName + toString(processIDS[i]) + ".temp"));
672 if (mult) { m->mothurRemove((sumAllFileName + toString(processIDS[i]) + ".temp")); }
675 string tempdistFileName = m->getRootName(m->getSimpleName(sumFileName)) + toString(processIDS[i]) + ".dist";
677 m->openInputFile(tempdistFileName, intemp);
679 for (int k = 0; k < calcDists.size(); k++) {
681 intemp >> size; m->gobble(intemp);
683 for (int j = 0; j < size; j++) {
688 intemp >> seq1 >> seq2 >> dist; m->gobble(intemp);
690 seqDist tempDist(seq1, seq2, dist);
691 calcDists[k].push_back(tempDist);
695 m->mothurRemove(tempdistFileName);
699 //////////////////////////////////////////////////////////////////////////////////////////////////////
700 //Windows version shared memory, so be careful when passing variables through the summarySharedData struct.
701 //Above fork() will clone, so memory is separate, but that's not the case with windows,
702 //Taking advantage of shared memory to pass results vectors.
703 //////////////////////////////////////////////////////////////////////////////////////////////////////
705 vector<summarySharedData*> pDataArray;
706 DWORD dwThreadIdArray[processors-1];
707 HANDLE hThreadArray[processors-1];
709 //Create processor worker threads.
710 for( int i=1; i<processors; i++ ){
712 //make copy of lookup so we don't get access violations
713 vector<SharedRAbundVector*> newLookup;
714 for (int k = 0; k < thisLookup.size(); k++) {
715 SharedRAbundVector* temp = new SharedRAbundVector();
716 temp->setLabel(thisLookup[k]->getLabel());
717 temp->setGroup(thisLookup[k]->getGroup());
718 newLookup.push_back(temp);
723 for (int k = 0; k < thisLookup[0]->getNumBins(); k++) {
724 if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } return 0; }
725 for (int j = 0; j < thisLookup.size(); j++) { newLookup[j]->push_back(thisLookup[j]->getAbundance(k), thisLookup[j]->getGroup()); }
728 // Allocate memory for thread data.
729 summarySharedData* tempSum = new summarySharedData((sumFileName+toString(i)+".temp"), m, lines[i].start, lines[i].end, Estimators, newLookup);
730 pDataArray.push_back(tempSum);
731 processIDS.push_back(i);
733 hThreadArray[i-1] = CreateThread(NULL, 0, MySummarySharedThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);
736 //parent do your part
737 driver(thisLookup, lines[0].start, lines[0].end, sumFileName +"0.temp", sumAllFileName + "0.temp", calcDists);
738 m->appendFiles((sumFileName + "0.temp"), sumFileName);
739 m->mothurRemove((sumFileName + "0.temp"));
740 if (mult) { m->appendFiles((sumAllFileName + "0.temp"), sumAllFileName); }
742 //Wait until all threads have terminated.
743 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
745 //Close all thread handles and free memory allocations.
746 for(int i=0; i < pDataArray.size(); i++){
747 if (pDataArray[i]->count != (pDataArray[i]->end-pDataArray[i]->start)) {
748 m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of " + toString(pDataArray[i]->end-pDataArray[i]->start) + " groups assigned to it, quitting. \n"); m->control_pressed = true;
750 m->appendFiles((sumFileName + toString(processIDS[i]) + ".temp"), sumFileName);
751 m->mothurRemove((sumFileName + toString(processIDS[i]) + ".temp"));
753 for (int j = 0; j < pDataArray[i]->thisLookup.size(); j++) { delete pDataArray[i]->thisLookup[j]; }
756 for (int k = 0; k < calcDists.size(); k++) {
757 int size = pDataArray[i]->calcDists[k].size();
758 for (int j = 0; j < size; j++) { calcDists[k].push_back(pDataArray[i]->calcDists[k][j]); }
762 CloseHandle(hThreadArray[i]);
763 delete pDataArray[i];
769 if (subsample && (thisIter != 0)) { //we want the summary results for the whole dataset, then the subsampling
771 calcDistsTotals.push_back(calcDists);
773 for (int i = 0; i < thisItersLookup.size(); i++) { delete thisItersLookup[i]; }
774 thisItersLookup.clear();
777 for (int i = 0; i < calcDists.size(); i++) {
778 if (m->control_pressed) { break; }
781 vector< vector<double> > matrix; //square matrix to represent the distance
782 matrix.resize(thisLookup.size());
783 for (int k = 0; k < thisLookup.size(); k++) { matrix[k].resize(thisLookup.size(), 0.0); }
785 for (int j = 0; j < calcDists[i].size(); j++) {
786 int row = calcDists[i][j].seq1;
787 int column = calcDists[i][j].seq2;
788 double dist = calcDists[i][j].dist;
790 matrix[row][column] = dist;
791 matrix[column][row] = dist;
794 map<string, string> variables;
795 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
796 variables["[calc]"] = sumCalculators[i]->getName();
797 variables["[distance]"] = thisLookup[0]->getLabel();
798 variables["[outputtag]"] = output;
799 variables["[tag2]"] = "";
800 string distFileName = getOutputFileName("phylip",variables);
801 outputNames.push_back(distFileName); outputTypes["phylip"].push_back(distFileName);
803 m->openOutputFile(distFileName, outDist);
804 outDist.setf(ios::fixed, ios::floatfield); outDist.setf(ios::showpoint);
806 printSims(outDist, matrix);
812 for (int i = 0; i < calcDists.size(); i++) { calcDists[i].clear(); }
816 //we need to find the average distance and standard deviation for each groups distance
817 vector< vector<seqDist> > calcAverages = m->getAverages(calcDistsTotals);
819 //find standard deviation
820 vector< vector<seqDist> > stdDev = m->getStandardDeviation(calcDistsTotals, calcAverages);
823 for (int i = 0; i < calcDists.size(); i++) {
824 vector< vector<double> > matrix; //square matrix to represent the distance
825 matrix.resize(thisLookup.size());
826 for (int k = 0; k < thisLookup.size(); k++) { matrix[k].resize(thisLookup.size(), 0.0); }
828 vector< vector<double> > stdmatrix; //square matrix to represent the stdDev
829 stdmatrix.resize(thisLookup.size());
830 for (int k = 0; k < thisLookup.size(); k++) { stdmatrix[k].resize(thisLookup.size(), 0.0); }
833 for (int j = 0; j < calcAverages[i].size(); j++) {
834 int row = calcAverages[i][j].seq1;
835 int column = calcAverages[i][j].seq2;
836 float dist = calcAverages[i][j].dist;
837 float stdDist = stdDev[i][j].dist;
839 matrix[row][column] = dist;
840 matrix[column][row] = dist;
841 stdmatrix[row][column] = stdDist;
842 stdmatrix[column][row] = stdDist;
845 map<string, string> variables;
846 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
847 variables["[calc]"] = sumCalculators[i]->getName();
848 variables["[distance]"] = thisLookup[0]->getLabel();
849 variables["[outputtag]"] = output;
850 variables["[tag2]"] = "ave";
851 string distFileName = getOutputFileName("phylip",variables);
852 outputNames.push_back(distFileName); outputTypes["phylip"].push_back(distFileName);
854 m->openOutputFile(distFileName, outAve);
855 outAve.setf(ios::fixed, ios::floatfield); outAve.setf(ios::showpoint);
857 printSims(outAve, matrix);
861 variables["[tag2]"] = "std";
862 distFileName = getOutputFileName("phylip",variables);
863 outputNames.push_back(distFileName); outputTypes["phylip"].push_back(distFileName);
865 m->openOutputFile(distFileName, outSTD);
866 outSTD.setf(ios::fixed, ios::floatfield); outSTD.setf(ios::showpoint);
868 printSims(outSTD, stdmatrix);
877 catch(exception& e) {
878 m->errorOut(e, "SummarySharedCommand", "process");
882 /**************************************************************************************************/
883 int SummarySharedCommand::driver(vector<SharedRAbundVector*> thisLookup, int start, int end, string sumFile, string sumAllFile, vector< vector<seqDist> >& calcDists) {
886 //loop through calculators and add to file all for all calcs that can do mutiple groups
889 m->openOutputFile(sumAllFile, outAll);
892 outAll << thisLookup[0]->getLabel() << '\t';
894 //output groups names
895 string outNames = "";
896 for (int j = 0; j < thisLookup.size(); j++) {
897 outNames += thisLookup[j]->getGroup() + "-";
899 outNames = outNames.substr(0, outNames.length()-1); //rip off extra '-';
900 outAll << outNames << '\t';
902 for(int i=0;i<sumCalculators.size();i++){
903 if (sumCalculators[i]->getMultiple() == true) {
904 sumCalculators[i]->getValues(thisLookup);
906 if (m->control_pressed) { outAll.close(); return 1; }
909 sumCalculators[i]->print(outAll);
916 ofstream outputFileHandle;
917 m->openOutputFile(sumFile, outputFileHandle);
919 vector<SharedRAbundVector*> subset;
920 for (int k = start; k < end; k++) { // pass cdd each set of groups to compare
922 for (int l = 0; l < k; l++) {
924 outputFileHandle << thisLookup[0]->getLabel() << '\t';
926 subset.clear(); //clear out old pair of sharedrabunds
927 //add new pair of sharedrabunds
928 subset.push_back(thisLookup[k]); subset.push_back(thisLookup[l]);
930 //sort groups to be alphanumeric
931 if (thisLookup[k]->getGroup() > thisLookup[l]->getGroup()) {
932 outputFileHandle << (thisLookup[l]->getGroup() +'\t' + thisLookup[k]->getGroup()) << '\t'; //print out groups
934 outputFileHandle << (thisLookup[k]->getGroup() +'\t' + thisLookup[l]->getGroup()) << '\t'; //print out groups
937 for(int i=0;i<sumCalculators.size();i++) {
939 //if this calc needs all groups to calculate the pair load all groups
940 if (sumCalculators[i]->getNeedsAll()) {
941 //load subset with rest of lookup for those calcs that need everyone to calc for a pair
942 for (int w = 0; w < thisLookup.size(); w++) {
943 if ((w != k) && (w != l)) { subset.push_back(thisLookup[w]); }
947 vector<double> tempdata = sumCalculators[i]->getValues(subset); //saves the calculator outputs
949 if (m->control_pressed) { outputFileHandle.close(); return 1; }
951 outputFileHandle << '\t';
952 sumCalculators[i]->print(outputFileHandle);
954 seqDist temp(l, k, tempdata[0]);
955 calcDists[i].push_back(temp);
957 outputFileHandle << endl;
961 outputFileHandle.close();
965 catch(exception& e) {
966 m->errorOut(e, "SummarySharedCommand", "driver");
970 /**************************************************************************************************/