2 * summarysharedcommand.cpp
5 * Created by Sarah Westcott on 1/2/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "summarysharedcommand.h"
11 #include "subsample.h"
13 //**********************************************************************************************************************
14 vector<string> SummarySharedCommand::setParameters(){
16 CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","summary",false,true,true); parameters.push_back(pshared);
17 CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
18 CommandParameter psubsample("subsample", "String", "", "", "", "", "","phylip",false,false); parameters.push_back(psubsample);
19 CommandParameter pdistance("distance", "Boolean", "", "F", "", "", "","phylip",false,false); parameters.push_back(pdistance);
20 CommandParameter pcalc("calc", "Multiple", "sharedchao-sharedsobs-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan-kstest-whittaker-sharednseqs-ochiai-anderberg-kulczynski-kulczynskicody-lennon-morisitahorn-braycurtis-odum-canberra-structeuclidean-structchord-hellinger-manhattan-structpearson-soergel-spearman-structkulczynski-speciesprofile-structchi2-hamming-gower-memchi2-memchord-memeuclidean-mempearson-jsd-rjsd", "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan", "", "", "","",true,false,true); parameters.push_back(pcalc);
21 CommandParameter poutput("output", "Multiple", "lt-square", "lt", "", "", "","",false,false); parameters.push_back(poutput);
22 CommandParameter pall("all", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pall);
23 CommandParameter piters("iters", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(piters);
24 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
25 CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
26 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
27 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
29 vector<string> myArray;
30 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
34 m->errorOut(e, "SummarySharedCommand", "setParameters");
38 //**********************************************************************************************************************
39 string SummarySharedCommand::getHelpString(){
41 string helpString = "";
42 ValidCalculators validCalculator;
43 helpString += "The summary.shared command parameters are shared, label, calc, distance, processors, subsample, iters and all. shared is required if there is no current sharedfile.\n";
44 helpString += "The summary.shared command should be in the following format: \n";
45 helpString += "summary.shared(label=yourLabel, calc=yourEstimators, groups=yourGroups).\n";
46 helpString += "Example summary.shared(label=unique-.01-.03, groups=B-C, calc=sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan).\n";
47 helpString += validCalculator.printCalc("sharedsummary");
48 helpString += "The iters parameter allows you to choose the number of times you would like to run the subsample.\n";
49 helpString += "The subsample parameter allows you to enter the size pergroup of the sample or you can set subsample=T and mothur will use the size of your smallest group.\n";
50 helpString += "The output parameter allows you to specify format of your distance matrix. Options are lt, and square. The default is lt.\n";
51 helpString += "The default value for calc is sharedsobs-sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan\n";
52 helpString += "The default value for groups is all the groups in your groupfile.\n";
53 helpString += "The distance parameter allows you to indicate you would like a distance file created for each calculator for each label, default=f.\n";
54 helpString += "The label parameter is used to analyze specific labels in your input.\n";
55 helpString += "The all parameter is used to specify if you want the estimate of all your groups together. This estimate can only be made for sharedsobs and sharedchao calculators. The default is false.\n";
56 helpString += "If you use sharedchao and run into memory issues, set all to false. \n";
57 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 2 valid groups.\n";
58 helpString += "Note: No spaces between parameter labels (i.e. label), '=' and parameters (i.e.yourLabel).\n";
62 m->errorOut(e, "SummarySharedCommand", "getHelpString");
66 //**********************************************************************************************************************
67 string SummarySharedCommand::getOutputPattern(string type) {
71 if (type == "summary") { pattern = "[filename],summary-[filename],[tag],summary"; }
72 else if (type == "phylip") { pattern = "[filename],[calc],[distance],[outputtag],[tag2],dist"; }
73 else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
78 m->errorOut(e, "SummarySharedCommand", "getOutputPattern");
82 //**********************************************************************************************************************
83 SummarySharedCommand::SummarySharedCommand(){
85 abort = true; calledHelp = true;
87 vector<string> tempOutNames;
88 outputTypes["summary"] = tempOutNames;
89 outputTypes["phylip"] = tempOutNames;
92 m->errorOut(e, "SummarySharedCommand", "SummarySharedCommand");
96 //**********************************************************************************************************************
98 SummarySharedCommand::SummarySharedCommand(string option) {
100 abort = false; calledHelp = false;
103 //allow user to run help
104 if(option == "help") { help(); abort = true; calledHelp = true; }
105 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
108 vector<string> myArray = setParameters();
110 OptionParser parser(option);
111 map<string, string> parameters = parser.getParameters();
112 map<string, string>::iterator it;
114 ValidParameters validParameter;
116 //check to make sure all parameters are valid for command
117 for (map<string, string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
118 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
121 //initialize outputTypes
122 vector<string> tempOutNames;
123 outputTypes["summary"] = tempOutNames;
124 outputTypes["phylip"] = tempOutNames;
126 //if the user changes the input directory command factory will send this info to us in the output parameter
127 string inputDir = validParameter.validFile(parameters, "inputdir", false);
128 if (inputDir == "not found"){ inputDir = ""; }
131 it = parameters.find("shared");
132 //user has given a template file
133 if(it != parameters.end()){
134 path = m->hasPath(it->second);
135 //if the user has not given a path then, add inputdir. else leave path alone.
136 if (path == "") { parameters["shared"] = inputDir + it->second; }
141 sharedfile = validParameter.validFile(parameters, "shared", true);
142 if (sharedfile == "not open") { sharedfile = ""; abort = true; }
143 else if (sharedfile == "not found") {
144 //if there is a current shared file, use it
145 sharedfile = m->getSharedFile();
146 if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
147 else { m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; }
148 }else { m->setSharedFile(sharedfile); }
151 //if the user changes the output directory command factory will send this info to us in the output parameter
152 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = m->hasPath(sharedfile); }
155 //check for optional parameter and set defaults
156 // ...at some point should added some additional type checking...
157 label = validParameter.validFile(parameters, "label", false);
158 if (label == "not found") { label = ""; }
160 if(label != "all") { m->splitAtDash(label, labels); allLines = 0; }
161 else { allLines = 1; }
165 calc = validParameter.validFile(parameters, "calc", false);
166 if (calc == "not found") { calc = "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan"; }
168 if (calc == "default") { calc = "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan"; }
170 m->splitAtDash(calc, Estimators);
171 if (m->inUsersGroups("citation", Estimators)) {
172 ValidCalculators validCalc; validCalc.printCitations(Estimators);
173 //remove citation from list of calcs
174 for (int i = 0; i < Estimators.size(); i++) { if (Estimators[i] == "citation") { Estimators.erase(Estimators.begin()+i); break; } }
177 groups = validParameter.validFile(parameters, "groups", false);
178 if (groups == "not found") { groups = ""; }
180 m->splitAtDash(groups, Groups);
181 m->setGroups(Groups);
184 string temp = validParameter.validFile(parameters, "all", false); if (temp == "not found") { temp = "false"; }
185 all = m->isTrue(temp);
187 temp = validParameter.validFile(parameters, "iters", false); if (temp == "not found") { temp = "1000"; }
188 m->mothurConvert(temp, iters);
190 output = validParameter.validFile(parameters, "output", false);
191 if(output == "not found"){ output = "lt"; }
192 else { createPhylip = true; }
193 if ((output != "lt") && (output != "square")) { m->mothurOut(output + " is not a valid output form. Options are lt and square. I will use lt."); m->mothurOutEndLine(); output = "lt"; }
195 temp = validParameter.validFile(parameters, "subsample", false); if (temp == "not found") { temp = "F"; }
196 if (m->isNumeric1(temp)) { m->mothurConvert(temp, subsampleSize); subsample = true; }
198 if (m->isTrue(temp)) { subsample = true; subsampleSize = -1; } //we will set it to smallest group later
199 else { subsample = false; }
202 if (subsample == false) { iters = 0; }
204 temp = validParameter.validFile(parameters, "distance", false); if (temp == "not found") { temp = "false"; }
205 createPhylip = m->isTrue(temp);
206 if (subsample) { createPhylip = true; }
208 temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
209 m->setProcessors(temp);
210 m->mothurConvert(temp, processors);
212 if (abort == false) {
214 ValidCalculators validCalculator;
217 for (i=0; i<Estimators.size(); i++) {
218 if (validCalculator.isValidCalculator("sharedsummary", Estimators[i]) == true) {
219 if (Estimators[i] == "sharedsobs") {
220 sumCalculators.push_back(new SharedSobsCS());
221 }else if (Estimators[i] == "sharedchao") {
222 sumCalculators.push_back(new SharedChao1());
223 }else if (Estimators[i] == "sharedace") {
224 sumCalculators.push_back(new SharedAce());
225 }else if (Estimators[i] == "jabund") {
226 sumCalculators.push_back(new JAbund());
227 }else if (Estimators[i] == "sorabund") {
228 sumCalculators.push_back(new SorAbund());
229 }else if (Estimators[i] == "jclass") {
230 sumCalculators.push_back(new Jclass());
231 }else if (Estimators[i] == "sorclass") {
232 sumCalculators.push_back(new SorClass());
233 }else if (Estimators[i] == "jest") {
234 sumCalculators.push_back(new Jest());
235 }else if (Estimators[i] == "sorest") {
236 sumCalculators.push_back(new SorEst());
237 }else if (Estimators[i] == "thetayc") {
238 sumCalculators.push_back(new ThetaYC());
239 }else if (Estimators[i] == "thetan") {
240 sumCalculators.push_back(new ThetaN());
241 }else if (Estimators[i] == "kstest") {
242 sumCalculators.push_back(new KSTest());
243 }else if (Estimators[i] == "sharednseqs") {
244 sumCalculators.push_back(new SharedNSeqs());
245 }else if (Estimators[i] == "ochiai") {
246 sumCalculators.push_back(new Ochiai());
247 }else if (Estimators[i] == "anderberg") {
248 sumCalculators.push_back(new Anderberg());
249 }else if (Estimators[i] == "kulczynski") {
250 sumCalculators.push_back(new Kulczynski());
251 }else if (Estimators[i] == "kulczynskicody") {
252 sumCalculators.push_back(new KulczynskiCody());
253 }else if (Estimators[i] == "lennon") {
254 sumCalculators.push_back(new Lennon());
255 }else if (Estimators[i] == "morisitahorn") {
256 sumCalculators.push_back(new MorHorn());
257 }else if (Estimators[i] == "braycurtis") {
258 sumCalculators.push_back(new BrayCurtis());
259 }else if (Estimators[i] == "whittaker") {
260 sumCalculators.push_back(new Whittaker());
261 }else if (Estimators[i] == "odum") {
262 sumCalculators.push_back(new Odum());
263 }else if (Estimators[i] == "canberra") {
264 sumCalculators.push_back(new Canberra());
265 }else if (Estimators[i] == "structeuclidean") {
266 sumCalculators.push_back(new StructEuclidean());
267 }else if (Estimators[i] == "structchord") {
268 sumCalculators.push_back(new StructChord());
269 }else if (Estimators[i] == "hellinger") {
270 sumCalculators.push_back(new Hellinger());
271 }else if (Estimators[i] == "manhattan") {
272 sumCalculators.push_back(new Manhattan());
273 }else if (Estimators[i] == "structpearson") {
274 sumCalculators.push_back(new StructPearson());
275 }else if (Estimators[i] == "soergel") {
276 sumCalculators.push_back(new Soergel());
277 }else if (Estimators[i] == "spearman") {
278 sumCalculators.push_back(new Spearman());
279 }else if (Estimators[i] == "structkulczynski") {
280 sumCalculators.push_back(new StructKulczynski());
281 }else if (Estimators[i] == "speciesprofile") {
282 sumCalculators.push_back(new SpeciesProfile());
283 }else if (Estimators[i] == "hamming") {
284 sumCalculators.push_back(new Hamming());
285 }else if (Estimators[i] == "structchi2") {
286 sumCalculators.push_back(new StructChi2());
287 }else if (Estimators[i] == "gower") {
288 sumCalculators.push_back(new Gower());
289 }else if (Estimators[i] == "memchi2") {
290 sumCalculators.push_back(new MemChi2());
291 }else if (Estimators[i] == "memchord") {
292 sumCalculators.push_back(new MemChord());
293 }else if (Estimators[i] == "memeuclidean") {
294 sumCalculators.push_back(new MemEuclidean());
295 }else if (Estimators[i] == "mempearson") {
296 sumCalculators.push_back(new MemPearson());
297 }else if (Estimators[i] == "jsd") {
298 sumCalculators.push_back(new JSD());
299 }else if (Estimators[i] == "rjsd") {
300 sumCalculators.push_back(new RJSD());
309 catch(exception& e) {
310 m->errorOut(e, "SummarySharedCommand", "SummarySharedCommand");
314 //**********************************************************************************************************************
316 int SummarySharedCommand::execute(){
319 if (abort == true) { if (calledHelp) { return 0; } return 2; }
321 ofstream outputFileHandle, outAll;
322 map<string, string> variables;
323 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
324 string outputFileName = getOutputFileName("summary",variables);
326 //if the users entered no valid calculators don't execute command
327 if (sumCalculators.size() == 0) { return 0; }
328 //check if any calcs can do multiples
331 for (int i = 0; i < sumCalculators.size(); i++) {
332 if (sumCalculators[i]->getMultiple() == true) { mult = true; }
337 input = new InputData(sharedfile, "sharedfile");
338 lookup = input->getSharedRAbundVectors();
339 string lastLabel = lookup[0]->getLabel();
341 /******************************************************/
342 //output headings for files
343 /******************************************************/
344 //output estimator names as column headers
345 m->openOutputFile(outputFileName, outputFileHandle);
346 outputFileHandle << "label" <<'\t' << "comparison" << '\t';
347 for(int i=0;i<sumCalculators.size();i++){
348 outputFileHandle << '\t' << sumCalculators[i]->getName();
349 if (sumCalculators[i]->getCols() == 3) { outputFileHandle << "\t" << sumCalculators[i]->getName() << "_lci\t" << sumCalculators[i]->getName() << "_hci"; }
351 outputFileHandle << endl;
352 outputFileHandle.close();
354 //create file and put column headers for multiple groups file
355 variables["[tag]"]= "multiple";
356 string outAllFileName = getOutputFileName("summary",variables);
358 m->openOutputFile(outAllFileName, outAll);
359 outputNames.push_back(outAllFileName);
361 outAll << "label" <<'\t' << "comparison" << '\t';
362 for(int i=0;i<sumCalculators.size();i++){
363 if (sumCalculators[i]->getMultiple() == true) {
364 outAll << '\t' << sumCalculators[i]->getName();
371 if (lookup.size() < 2) {
372 m->mothurOut("I cannot run the command without at least 2 valid groups.");
373 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
375 //close files and clean up
376 m->mothurRemove(outputFileName);
377 if (mult == true) { m->mothurRemove(outAllFileName); }
379 //if you only have 2 groups you don't need a .sharedmultiple file
380 }else if ((lookup.size() == 2) && (mult == true)) {
382 m->mothurRemove(outAllFileName);
383 outputNames.pop_back();
386 if (m->control_pressed) {
387 if (mult) { m->mothurRemove(outAllFileName); }
388 m->mothurRemove(outputFileName);
390 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
391 for(int i=0;i<sumCalculators.size();i++){ delete sumCalculators[i]; }
395 /******************************************************/
397 if (subsampleSize == -1) { //user has not set size, set size = smallest samples size
398 subsampleSize = lookup[0]->getNumSeqs();
399 for (int i = 1; i < lookup.size(); i++) {
400 int thisSize = lookup[i]->getNumSeqs();
402 if (thisSize < subsampleSize) { subsampleSize = thisSize; }
407 vector<SharedRAbundVector*> temp;
408 for (int i = 0; i < lookup.size(); i++) {
409 if (lookup[i]->getNumSeqs() < subsampleSize) {
410 m->mothurOut(lookup[i]->getGroup() + " contains " + toString(lookup[i]->getNumSeqs()) + ". Eliminating."); m->mothurOutEndLine();
413 Groups.push_back(lookup[i]->getGroup());
414 temp.push_back(lookup[i]);
418 m->setGroups(Groups);
421 if (lookup.size() < 2) { m->mothurOut("You have not provided enough valid groups. I cannot run the command."); m->mothurOutEndLine(); m->control_pressed = true; delete input; return 0; }
425 /******************************************************/
426 //comparison breakup to be used by different processes later
427 numGroups = lookup.size();
428 lines.resize(processors);
429 for (int i = 0; i < processors; i++) {
430 lines[i].start = int (sqrt(float(i)/float(processors)) * numGroups);
431 lines[i].end = int (sqrt(float(i+1)/float(processors)) * numGroups);
433 /******************************************************/
435 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
436 set<string> processedLabels;
437 set<string> userLabels = labels;
439 //as long as you are not at the end of the file or done wih the lines you want
440 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
441 if (m->control_pressed) {
442 if (mult) { m->mothurRemove(outAllFileName); }
443 m->mothurRemove(outputFileName);
445 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
446 for(int i=0;i<sumCalculators.size();i++){ delete sumCalculators[i]; }
452 if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){
453 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
454 process(lookup, outputFileName, outAllFileName);
456 processedLabels.insert(lookup[0]->getLabel());
457 userLabels.erase(lookup[0]->getLabel());
460 if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
461 string saveLabel = lookup[0]->getLabel();
463 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
464 lookup = input->getSharedRAbundVectors(lastLabel);
466 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
467 process(lookup, outputFileName, outAllFileName);
469 processedLabels.insert(lookup[0]->getLabel());
470 userLabels.erase(lookup[0]->getLabel());
472 //restore real lastlabel to save below
473 lookup[0]->setLabel(saveLabel);
476 lastLabel = lookup[0]->getLabel();
478 //get next line to process
479 //prevent memory leak
480 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
481 lookup = input->getSharedRAbundVectors();
484 if (m->control_pressed) {
485 if (mult) { m->mothurRemove(outAllFileName); }
486 m->mothurRemove(outputFileName);
488 for(int i=0;i<sumCalculators.size();i++){ delete sumCalculators[i]; }
493 //output error messages about any remaining user labels
494 set<string>::iterator it;
495 bool needToRun = false;
496 for (it = userLabels.begin(); it != userLabels.end(); it++) {
497 m->mothurOut("Your file does not include the label " + *it);
498 if (processedLabels.count(lastLabel) != 1) {
499 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
502 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
506 //run last label if you need to
507 if (needToRun == true) {
508 for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }
509 lookup = input->getSharedRAbundVectors(lastLabel);
511 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
512 process(lookup, outputFileName, outAllFileName);
513 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
517 //reset groups parameter
520 for(int i=0;i<sumCalculators.size();i++){ delete sumCalculators[i]; }
523 if (m->control_pressed) {
524 m->mothurRemove(outAllFileName);
525 m->mothurRemove(outputFileName);
529 m->mothurOutEndLine();
530 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
531 m->mothurOut(outputFileName); m->mothurOutEndLine();
532 if (mult) { m->mothurOut(outAllFileName); m->mothurOutEndLine(); outputTypes["summary"].push_back(outAllFileName); }
533 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } outputTypes["summary"].push_back(outputFileName);
534 m->mothurOutEndLine();
538 catch(exception& e) {
539 m->errorOut(e, "SummarySharedCommand", "execute");
543 /***********************************************************/
544 int SummarySharedCommand::printSims(ostream& out, vector< vector<double> >& simMatrix) {
547 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
550 out << simMatrix.size() << endl;
552 if (output == "lt") {
553 for (int b = 0; b < simMatrix.size(); b++) {
554 out << lookup[b]->getGroup() << '\t';
555 for (int n = 0; n < b; n++) {
556 if (m->control_pressed) { return 0; }
557 out << simMatrix[b][n] << '\t';
562 for (int b = 0; b < simMatrix.size(); m++) {
563 out << lookup[b]->getGroup() << '\t';
564 for (int n = 0; n < simMatrix[b].size(); n++) {
565 if (m->control_pressed) { return 0; }
566 out << simMatrix[b][n] << '\t';
574 catch(exception& e) {
575 m->errorOut(e, "SummarySharedCommand", "printSims");
579 /***********************************************************/
580 int SummarySharedCommand::process(vector<SharedRAbundVector*> thisLookup, string sumFileName, string sumAllFileName) {
582 vector< vector< vector<seqDist> > > calcDistsTotals; //each iter, one for each calc, then each groupCombos dists. this will be used to make .dist files
583 vector< vector<seqDist> > calcDists; calcDists.resize(sumCalculators.size());
585 for (int thisIter = 0; thisIter < iters+1; thisIter++) {
587 vector<SharedRAbundVector*> thisItersLookup = thisLookup;
589 if (subsample && (thisIter != 0)) { //we want the summary results for the whole dataset, then the subsampling
591 vector<string> tempLabels; //dont need since we arent printing the sampled sharedRabunds
593 //make copy of lookup so we don't get access violations
594 vector<SharedRAbundVector*> newLookup;
595 for (int k = 0; k < thisItersLookup.size(); k++) {
596 SharedRAbundVector* temp = new SharedRAbundVector();
597 temp->setLabel(thisItersLookup[k]->getLabel());
598 temp->setGroup(thisItersLookup[k]->getGroup());
599 newLookup.push_back(temp);
603 for (int k = 0; k < thisItersLookup[0]->getNumBins(); k++) {
604 if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } return 0; }
605 for (int j = 0; j < thisItersLookup.size(); j++) { newLookup[j]->push_back(thisItersLookup[j]->getAbundance(k), thisItersLookup[j]->getGroup()); }
608 tempLabels = sample.getSample(newLookup, subsampleSize);
609 thisItersLookup = newLookup;
614 driver(thisLookup, 0, numGroups, sumFileName+".temp", sumAllFileName+".temp", calcDists);
615 m->appendFiles((sumFileName + ".temp"), sumFileName);
616 m->mothurRemove((sumFileName + ".temp"));
618 m->appendFiles((sumAllFileName + ".temp"), sumAllFileName);
619 m->mothurRemove((sumAllFileName + ".temp"));
624 vector<int> processIDS;
626 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
627 //loop through and create all the processes you want
628 while (process != processors) {
632 processIDS.push_back(pid);
635 driver(thisLookup, lines[process].start, lines[process].end, sumFileName + toString(getpid()) + ".temp", sumAllFileName + toString(getpid()) + ".temp", calcDists);
637 //only do this if you want a distance file
639 string tempdistFileName = m->getRootName(m->getSimpleName(sumFileName)) + toString(getpid()) + ".dist";
641 m->openOutputFile(tempdistFileName, outtemp);
643 for (int i = 0; i < calcDists.size(); i++) {
644 outtemp << calcDists[i].size() << endl;
646 for (int j = 0; j < calcDists[i].size(); j++) {
647 outtemp << calcDists[i][j].seq1 << '\t' << calcDists[i][j].seq2 << '\t' << calcDists[i][j].dist << endl;
655 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
656 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
661 //parent do your part
662 driver(thisLookup, lines[0].start, lines[0].end, sumFileName + toString(getpid()) + ".temp", sumAllFileName + toString(getpid()) + ".temp", calcDists);
663 m->appendFiles((sumFileName + toString(getpid()) + ".temp"), sumFileName);
664 m->mothurRemove((sumFileName + toString(getpid()) + ".temp"));
665 if (mult) { m->appendFiles((sumAllFileName + toString(getpid()) + ".temp"), sumAllFileName); }
667 //force parent to wait until all the processes are done
668 for (int i = 0; i < processIDS.size(); i++) {
669 int temp = processIDS[i];
673 for (int i = 0; i < processIDS.size(); i++) {
674 m->appendFiles((sumFileName + toString(processIDS[i]) + ".temp"), sumFileName);
675 m->mothurRemove((sumFileName + toString(processIDS[i]) + ".temp"));
676 if (mult) { m->mothurRemove((sumAllFileName + toString(processIDS[i]) + ".temp")); }
679 string tempdistFileName = m->getRootName(m->getSimpleName(sumFileName)) + toString(processIDS[i]) + ".dist";
681 m->openInputFile(tempdistFileName, intemp);
683 for (int k = 0; k < calcDists.size(); k++) {
685 intemp >> size; m->gobble(intemp);
687 for (int j = 0; j < size; j++) {
692 intemp >> seq1 >> seq2 >> dist; m->gobble(intemp);
694 seqDist tempDist(seq1, seq2, dist);
695 calcDists[k].push_back(tempDist);
699 m->mothurRemove(tempdistFileName);
703 //////////////////////////////////////////////////////////////////////////////////////////////////////
704 //Windows version shared memory, so be careful when passing variables through the summarySharedData struct.
705 //Above fork() will clone, so memory is separate, but that's not the case with windows,
706 //Taking advantage of shared memory to pass results vectors.
707 //////////////////////////////////////////////////////////////////////////////////////////////////////
709 vector<summarySharedData*> pDataArray;
710 DWORD dwThreadIdArray[processors-1];
711 HANDLE hThreadArray[processors-1];
713 //Create processor worker threads.
714 for( int i=1; i<processors; i++ ){
716 //make copy of lookup so we don't get access violations
717 vector<SharedRAbundVector*> newLookup;
718 for (int k = 0; k < thisLookup.size(); k++) {
719 SharedRAbundVector* temp = new SharedRAbundVector();
720 temp->setLabel(thisLookup[k]->getLabel());
721 temp->setGroup(thisLookup[k]->getGroup());
722 newLookup.push_back(temp);
727 for (int k = 0; k < thisLookup[0]->getNumBins(); k++) {
728 if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } return 0; }
729 for (int j = 0; j < thisLookup.size(); j++) { newLookup[j]->push_back(thisLookup[j]->getAbundance(k), thisLookup[j]->getGroup()); }
732 // Allocate memory for thread data.
733 summarySharedData* tempSum = new summarySharedData((sumFileName+toString(i)+".temp"), m, lines[i].start, lines[i].end, Estimators, newLookup);
734 pDataArray.push_back(tempSum);
735 processIDS.push_back(i);
737 hThreadArray[i-1] = CreateThread(NULL, 0, MySummarySharedThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);
740 //parent do your part
741 driver(thisLookup, lines[0].start, lines[0].end, sumFileName +"0.temp", sumAllFileName + "0.temp", calcDists);
742 m->appendFiles((sumFileName + "0.temp"), sumFileName);
743 m->mothurRemove((sumFileName + "0.temp"));
744 if (mult) { m->appendFiles((sumAllFileName + "0.temp"), sumAllFileName); }
746 //Wait until all threads have terminated.
747 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
749 //Close all thread handles and free memory allocations.
750 for(int i=0; i < pDataArray.size(); i++){
751 if (pDataArray[i]->count != (pDataArray[i]->end-pDataArray[i]->start)) {
752 m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of " + toString(pDataArray[i]->end-pDataArray[i]->start) + " groups assigned to it, quitting. \n"); m->control_pressed = true;
754 m->appendFiles((sumFileName + toString(processIDS[i]) + ".temp"), sumFileName);
755 m->mothurRemove((sumFileName + toString(processIDS[i]) + ".temp"));
757 for (int j = 0; j < pDataArray[i]->thisLookup.size(); j++) { delete pDataArray[i]->thisLookup[j]; }
760 for (int k = 0; k < calcDists.size(); k++) {
761 int size = pDataArray[i]->calcDists[k].size();
762 for (int j = 0; j < size; j++) { calcDists[k].push_back(pDataArray[i]->calcDists[k][j]); }
766 CloseHandle(hThreadArray[i]);
767 delete pDataArray[i];
773 if (subsample && (thisIter != 0)) { //we want the summary results for the whole dataset, then the subsampling
775 calcDistsTotals.push_back(calcDists);
777 for (int i = 0; i < thisItersLookup.size(); i++) { delete thisItersLookup[i]; }
778 thisItersLookup.clear();
781 for (int i = 0; i < calcDists.size(); i++) {
782 if (m->control_pressed) { break; }
785 vector< vector<double> > matrix; //square matrix to represent the distance
786 matrix.resize(thisLookup.size());
787 for (int k = 0; k < thisLookup.size(); k++) { matrix[k].resize(thisLookup.size(), 0.0); }
789 for (int j = 0; j < calcDists[i].size(); j++) {
790 int row = calcDists[i][j].seq1;
791 int column = calcDists[i][j].seq2;
792 double dist = calcDists[i][j].dist;
794 matrix[row][column] = dist;
795 matrix[column][row] = dist;
798 map<string, string> variables;
799 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
800 variables["[calc]"] = sumCalculators[i]->getName();
801 variables["[distance]"] = thisLookup[0]->getLabel();
802 variables["[outputtag]"] = output;
803 variables["[tag2]"] = "";
804 string distFileName = getOutputFileName("phylip",variables);
805 outputNames.push_back(distFileName); outputTypes["phylip"].push_back(distFileName);
807 m->openOutputFile(distFileName, outDist);
808 outDist.setf(ios::fixed, ios::floatfield); outDist.setf(ios::showpoint);
810 printSims(outDist, matrix);
816 for (int i = 0; i < calcDists.size(); i++) { calcDists[i].clear(); }
820 //we need to find the average distance and standard deviation for each groups distance
821 vector< vector<seqDist> > calcAverages = m->getAverages(calcDistsTotals);
823 //find standard deviation
824 vector< vector<seqDist> > stdDev = m->getStandardDeviation(calcDistsTotals, calcAverages);
827 for (int i = 0; i < calcDists.size(); i++) {
828 vector< vector<double> > matrix; //square matrix to represent the distance
829 matrix.resize(thisLookup.size());
830 for (int k = 0; k < thisLookup.size(); k++) { matrix[k].resize(thisLookup.size(), 0.0); }
832 vector< vector<double> > stdmatrix; //square matrix to represent the stdDev
833 stdmatrix.resize(thisLookup.size());
834 for (int k = 0; k < thisLookup.size(); k++) { stdmatrix[k].resize(thisLookup.size(), 0.0); }
837 for (int j = 0; j < calcAverages[i].size(); j++) {
838 int row = calcAverages[i][j].seq1;
839 int column = calcAverages[i][j].seq2;
840 float dist = calcAverages[i][j].dist;
841 float stdDist = stdDev[i][j].dist;
843 matrix[row][column] = dist;
844 matrix[column][row] = dist;
845 stdmatrix[row][column] = stdDist;
846 stdmatrix[column][row] = stdDist;
849 map<string, string> variables;
850 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
851 variables["[calc]"] = sumCalculators[i]->getName();
852 variables["[distance]"] = thisLookup[0]->getLabel();
853 variables["[outputtag]"] = output;
854 variables["[tag2]"] = "ave";
855 string distFileName = getOutputFileName("phylip",variables);
856 outputNames.push_back(distFileName); outputTypes["phylip"].push_back(distFileName);
858 m->openOutputFile(distFileName, outAve);
859 outAve.setf(ios::fixed, ios::floatfield); outAve.setf(ios::showpoint);
861 printSims(outAve, matrix);
865 variables["[tag2]"] = "std";
866 distFileName = getOutputFileName("phylip",variables);
867 outputNames.push_back(distFileName); outputTypes["phylip"].push_back(distFileName);
869 m->openOutputFile(distFileName, outSTD);
870 outSTD.setf(ios::fixed, ios::floatfield); outSTD.setf(ios::showpoint);
872 printSims(outSTD, stdmatrix);
881 catch(exception& e) {
882 m->errorOut(e, "SummarySharedCommand", "process");
886 /**************************************************************************************************/
887 int SummarySharedCommand::driver(vector<SharedRAbundVector*> thisLookup, int start, int end, string sumFile, string sumAllFile, vector< vector<seqDist> >& calcDists) {
890 //loop through calculators and add to file all for all calcs that can do mutiple groups
893 m->openOutputFile(sumAllFile, outAll);
896 outAll << thisLookup[0]->getLabel() << '\t';
898 //output groups names
899 string outNames = "";
900 for (int j = 0; j < thisLookup.size(); j++) {
901 outNames += thisLookup[j]->getGroup() + "-";
903 outNames = outNames.substr(0, outNames.length()-1); //rip off extra '-';
904 outAll << outNames << '\t';
906 for(int i=0;i<sumCalculators.size();i++){
907 if (sumCalculators[i]->getMultiple() == true) {
908 sumCalculators[i]->getValues(thisLookup);
910 if (m->control_pressed) { outAll.close(); return 1; }
913 sumCalculators[i]->print(outAll);
920 ofstream outputFileHandle;
921 m->openOutputFile(sumFile, outputFileHandle);
923 vector<SharedRAbundVector*> subset;
924 for (int k = start; k < end; k++) { // pass cdd each set of groups to compare
926 for (int l = 0; l < k; l++) {
928 outputFileHandle << thisLookup[0]->getLabel() << '\t';
930 subset.clear(); //clear out old pair of sharedrabunds
931 //add new pair of sharedrabunds
932 subset.push_back(thisLookup[k]); subset.push_back(thisLookup[l]);
934 //sort groups to be alphanumeric
935 if (thisLookup[k]->getGroup() > thisLookup[l]->getGroup()) {
936 outputFileHandle << (thisLookup[l]->getGroup() +'\t' + thisLookup[k]->getGroup()) << '\t'; //print out groups
938 outputFileHandle << (thisLookup[k]->getGroup() +'\t' + thisLookup[l]->getGroup()) << '\t'; //print out groups
941 for(int i=0;i<sumCalculators.size();i++) {
943 //if this calc needs all groups to calculate the pair load all groups
944 if (sumCalculators[i]->getNeedsAll()) {
945 //load subset with rest of lookup for those calcs that need everyone to calc for a pair
946 for (int w = 0; w < thisLookup.size(); w++) {
947 if ((w != k) && (w != l)) { subset.push_back(thisLookup[w]); }
951 vector<double> tempdata = sumCalculators[i]->getValues(subset); //saves the calculator outputs
953 if (m->control_pressed) { outputFileHandle.close(); return 1; }
955 outputFileHandle << '\t';
956 sumCalculators[i]->print(outputFileHandle);
958 seqDist temp(l, k, tempdata[0]);
959 calcDists[i].push_back(temp);
961 outputFileHandle << endl;
965 outputFileHandle.close();
969 catch(exception& e) {
970 m->errorOut(e, "SummarySharedCommand", "driver");
974 /**************************************************************************************************/