2 * summarysharedcommand.cpp
5 * Created by Sarah Westcott on 1/2/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "summarysharedcommand.h"
11 #include "subsample.h"
13 //**********************************************************************************************************************
14 vector<string> SummarySharedCommand::setParameters(){
16 CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","summary",false,true,true); parameters.push_back(pshared);
17 CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
18 CommandParameter psubsample("subsample", "String", "", "", "", "", "","phylip",false,false); parameters.push_back(psubsample);
19 CommandParameter pdistance("distance", "Boolean", "", "F", "", "", "","phylip",false,false); parameters.push_back(pdistance);
20 CommandParameter pcalc("calc", "Multiple", "sharedchao-sharedsobs-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan-kstest-whittaker-sharednseqs-ochiai-anderberg-kulczynski-kulczynskicody-lennon-morisitahorn-braycurtis-odum-canberra-structeuclidean-structchord-hellinger-manhattan-structpearson-soergel-spearman-structkulczynski-speciesprofile-structchi2-hamming-gower-memchi2-memchord-memeuclidean-mempearson-jsd", "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan", "", "", "","",true,false,true); parameters.push_back(pcalc);
21 CommandParameter poutput("output", "Multiple", "lt-square", "lt", "", "", "","",false,false); parameters.push_back(poutput);
22 CommandParameter pall("all", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pall);
23 CommandParameter piters("iters", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(piters);
24 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
25 CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
26 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
27 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
29 vector<string> myArray;
30 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
34 m->errorOut(e, "SummarySharedCommand", "setParameters");
38 //**********************************************************************************************************************
39 string SummarySharedCommand::getHelpString(){
41 string helpString = "";
42 ValidCalculators validCalculator;
43 helpString += "The summary.shared command parameters are shared, label, calc, distance, processors, subsample, iters and all. shared is required if there is no current sharedfile.\n";
44 helpString += "The summary.shared command should be in the following format: \n";
45 helpString += "summary.shared(label=yourLabel, calc=yourEstimators, groups=yourGroups).\n";
46 helpString += "Example summary.shared(label=unique-.01-.03, groups=B-C, calc=sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan).\n";
47 helpString += validCalculator.printCalc("sharedsummary");
48 helpString += "The iters parameter allows you to choose the number of times you would like to run the subsample.\n";
49 helpString += "The subsample parameter allows you to enter the size pergroup of the sample or you can set subsample=T and mothur will use the size of your smallest group.\n";
50 helpString += "The output parameter allows you to specify format of your distance matrix. Options are lt, and square. The default is lt.\n";
51 helpString += "The default value for calc is sharedsobs-sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan\n";
52 helpString += "The default value for groups is all the groups in your groupfile.\n";
53 helpString += "The distance parameter allows you to indicate you would like a distance file created for each calculator for each label, default=f.\n";
54 helpString += "The label parameter is used to analyze specific labels in your input.\n";
55 helpString += "The all parameter is used to specify if you want the estimate of all your groups together. This estimate can only be made for sharedsobs and sharedchao calculators. The default is false.\n";
56 helpString += "If you use sharedchao and run into memory issues, set all to false. \n";
57 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 2 valid groups.\n";
58 helpString += "Note: No spaces between parameter labels (i.e. label), '=' and parameters (i.e.yourLabel).\n";
62 m->errorOut(e, "SummarySharedCommand", "getHelpString");
66 //**********************************************************************************************************************
67 string SummarySharedCommand::getOutputPattern(string type) {
71 if (type == "summary") { pattern = "[filename],summary-[filename],[tag],summary"; }
72 else if (type == "phylip") { pattern = "[filename],[calc],[distance],[outputtag],[tag2],dist"; }
73 else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
78 m->errorOut(e, "SummarySharedCommand", "getOutputPattern");
82 //**********************************************************************************************************************
83 SummarySharedCommand::SummarySharedCommand(){
85 abort = true; calledHelp = true;
87 vector<string> tempOutNames;
88 outputTypes["summary"] = tempOutNames;
89 outputTypes["phylip"] = tempOutNames;
92 m->errorOut(e, "SummarySharedCommand", "SummarySharedCommand");
96 //**********************************************************************************************************************
98 SummarySharedCommand::SummarySharedCommand(string option) {
100 abort = false; calledHelp = false;
103 //allow user to run help
104 if(option == "help") { help(); abort = true; calledHelp = true; }
105 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
108 vector<string> myArray = setParameters();
110 OptionParser parser(option);
111 map<string, string> parameters = parser.getParameters();
112 map<string, string>::iterator it;
114 ValidParameters validParameter;
116 //check to make sure all parameters are valid for command
117 for (map<string, string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
118 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
121 //initialize outputTypes
122 vector<string> tempOutNames;
123 outputTypes["summary"] = tempOutNames;
124 outputTypes["phylip"] = tempOutNames;
126 //if the user changes the input directory command factory will send this info to us in the output parameter
127 string inputDir = validParameter.validFile(parameters, "inputdir", false);
128 if (inputDir == "not found"){ inputDir = ""; }
131 it = parameters.find("shared");
132 //user has given a template file
133 if(it != parameters.end()){
134 path = m->hasPath(it->second);
135 //if the user has not given a path then, add inputdir. else leave path alone.
136 if (path == "") { parameters["shared"] = inputDir + it->second; }
141 sharedfile = validParameter.validFile(parameters, "shared", true);
142 if (sharedfile == "not open") { sharedfile = ""; abort = true; }
143 else if (sharedfile == "not found") {
144 //if there is a current shared file, use it
145 sharedfile = m->getSharedFile();
146 if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
147 else { m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; }
148 }else { m->setSharedFile(sharedfile); }
151 //if the user changes the output directory command factory will send this info to us in the output parameter
152 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = m->hasPath(sharedfile); }
155 //check for optional parameter and set defaults
156 // ...at some point should added some additional type checking...
157 label = validParameter.validFile(parameters, "label", false);
158 if (label == "not found") { label = ""; }
160 if(label != "all") { m->splitAtDash(label, labels); allLines = 0; }
161 else { allLines = 1; }
165 calc = validParameter.validFile(parameters, "calc", false);
166 if (calc == "not found") { calc = "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan"; }
168 if (calc == "default") { calc = "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan"; }
170 m->splitAtDash(calc, Estimators);
171 if (m->inUsersGroups("citation", Estimators)) {
172 ValidCalculators validCalc; validCalc.printCitations(Estimators);
173 //remove citation from list of calcs
174 for (int i = 0; i < Estimators.size(); i++) { if (Estimators[i] == "citation") { Estimators.erase(Estimators.begin()+i); break; } }
177 groups = validParameter.validFile(parameters, "groups", false);
178 if (groups == "not found") { groups = ""; }
180 m->splitAtDash(groups, Groups);
181 m->setGroups(Groups);
184 string temp = validParameter.validFile(parameters, "all", false); if (temp == "not found") { temp = "false"; }
185 all = m->isTrue(temp);
187 temp = validParameter.validFile(parameters, "iters", false); if (temp == "not found") { temp = "1000"; }
188 m->mothurConvert(temp, iters);
190 output = validParameter.validFile(parameters, "output", false);
191 if(output == "not found"){ output = "lt"; }
192 else { createPhylip = true; }
193 if ((output != "lt") && (output != "square")) { m->mothurOut(output + " is not a valid output form. Options are lt and square. I will use lt."); m->mothurOutEndLine(); output = "lt"; }
195 temp = validParameter.validFile(parameters, "subsample", false); if (temp == "not found") { temp = "F"; }
196 if (m->isNumeric1(temp)) { m->mothurConvert(temp, subsampleSize); subsample = true; }
198 if (m->isTrue(temp)) { subsample = true; subsampleSize = -1; } //we will set it to smallest group later
199 else { subsample = false; }
202 if (subsample == false) { iters = 0; }
204 temp = validParameter.validFile(parameters, "distance", false); if (temp == "not found") { temp = "false"; }
205 createPhylip = m->isTrue(temp);
206 if (subsample) { createPhylip = true; }
208 temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
209 m->setProcessors(temp);
210 m->mothurConvert(temp, processors);
212 if (abort == false) {
214 ValidCalculators validCalculator;
217 for (i=0; i<Estimators.size(); i++) {
218 if (validCalculator.isValidCalculator("sharedsummary", Estimators[i]) == true) {
219 if (Estimators[i] == "sharedsobs") {
220 sumCalculators.push_back(new SharedSobsCS());
221 }else if (Estimators[i] == "sharedchao") {
222 sumCalculators.push_back(new SharedChao1());
223 }else if (Estimators[i] == "sharedace") {
224 sumCalculators.push_back(new SharedAce());
225 }else if (Estimators[i] == "jabund") {
226 sumCalculators.push_back(new JAbund());
227 }else if (Estimators[i] == "sorabund") {
228 sumCalculators.push_back(new SorAbund());
229 }else if (Estimators[i] == "jclass") {
230 sumCalculators.push_back(new Jclass());
231 }else if (Estimators[i] == "sorclass") {
232 sumCalculators.push_back(new SorClass());
233 }else if (Estimators[i] == "jest") {
234 sumCalculators.push_back(new Jest());
235 }else if (Estimators[i] == "sorest") {
236 sumCalculators.push_back(new SorEst());
237 }else if (Estimators[i] == "thetayc") {
238 sumCalculators.push_back(new ThetaYC());
239 }else if (Estimators[i] == "thetan") {
240 sumCalculators.push_back(new ThetaN());
241 }else if (Estimators[i] == "kstest") {
242 sumCalculators.push_back(new KSTest());
243 }else if (Estimators[i] == "sharednseqs") {
244 sumCalculators.push_back(new SharedNSeqs());
245 }else if (Estimators[i] == "ochiai") {
246 sumCalculators.push_back(new Ochiai());
247 }else if (Estimators[i] == "anderberg") {
248 sumCalculators.push_back(new Anderberg());
249 }else if (Estimators[i] == "kulczynski") {
250 sumCalculators.push_back(new Kulczynski());
251 }else if (Estimators[i] == "kulczynskicody") {
252 sumCalculators.push_back(new KulczynskiCody());
253 }else if (Estimators[i] == "lennon") {
254 sumCalculators.push_back(new Lennon());
255 }else if (Estimators[i] == "morisitahorn") {
256 sumCalculators.push_back(new MorHorn());
257 }else if (Estimators[i] == "braycurtis") {
258 sumCalculators.push_back(new BrayCurtis());
259 }else if (Estimators[i] == "whittaker") {
260 sumCalculators.push_back(new Whittaker());
261 }else if (Estimators[i] == "odum") {
262 sumCalculators.push_back(new Odum());
263 }else if (Estimators[i] == "canberra") {
264 sumCalculators.push_back(new Canberra());
265 }else if (Estimators[i] == "structeuclidean") {
266 sumCalculators.push_back(new StructEuclidean());
267 }else if (Estimators[i] == "structchord") {
268 sumCalculators.push_back(new StructChord());
269 }else if (Estimators[i] == "hellinger") {
270 sumCalculators.push_back(new Hellinger());
271 }else if (Estimators[i] == "manhattan") {
272 sumCalculators.push_back(new Manhattan());
273 }else if (Estimators[i] == "structpearson") {
274 sumCalculators.push_back(new StructPearson());
275 }else if (Estimators[i] == "soergel") {
276 sumCalculators.push_back(new Soergel());
277 }else if (Estimators[i] == "spearman") {
278 sumCalculators.push_back(new Spearman());
279 }else if (Estimators[i] == "structkulczynski") {
280 sumCalculators.push_back(new StructKulczynski());
281 }else if (Estimators[i] == "speciesprofile") {
282 sumCalculators.push_back(new SpeciesProfile());
283 }else if (Estimators[i] == "hamming") {
284 sumCalculators.push_back(new Hamming());
285 }else if (Estimators[i] == "structchi2") {
286 sumCalculators.push_back(new StructChi2());
287 }else if (Estimators[i] == "gower") {
288 sumCalculators.push_back(new Gower());
289 }else if (Estimators[i] == "memchi2") {
290 sumCalculators.push_back(new MemChi2());
291 }else if (Estimators[i] == "memchord") {
292 sumCalculators.push_back(new MemChord());
293 }else if (Estimators[i] == "memeuclidean") {
294 sumCalculators.push_back(new MemEuclidean());
295 }else if (Estimators[i] == "mempearson") {
296 sumCalculators.push_back(new MemPearson());
297 }else if (Estimators[i] == "jsd") {
298 sumCalculators.push_back(new JSD());
307 catch(exception& e) {
308 m->errorOut(e, "SummarySharedCommand", "SummarySharedCommand");
312 //**********************************************************************************************************************
314 int SummarySharedCommand::execute(){
317 if (abort == true) { if (calledHelp) { return 0; } return 2; }
319 ofstream outputFileHandle, outAll;
320 map<string, string> variables;
321 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
322 string outputFileName = getOutputFileName("summary",variables);
324 //if the users entered no valid calculators don't execute command
325 if (sumCalculators.size() == 0) { return 0; }
326 //check if any calcs can do multiples
329 for (int i = 0; i < sumCalculators.size(); i++) {
330 if (sumCalculators[i]->getMultiple() == true) { mult = true; }
335 input = new InputData(sharedfile, "sharedfile");
336 lookup = input->getSharedRAbundVectors();
337 string lastLabel = lookup[0]->getLabel();
339 /******************************************************/
340 //output headings for files
341 /******************************************************/
342 //output estimator names as column headers
343 m->openOutputFile(outputFileName, outputFileHandle);
344 outputFileHandle << "label" <<'\t' << "comparison" << '\t';
345 for(int i=0;i<sumCalculators.size();i++){
346 outputFileHandle << '\t' << sumCalculators[i]->getName();
347 if (sumCalculators[i]->getCols() == 3) { outputFileHandle << "\t" << sumCalculators[i]->getName() << "_lci\t" << sumCalculators[i]->getName() << "_hci"; }
349 outputFileHandle << endl;
350 outputFileHandle.close();
352 //create file and put column headers for multiple groups file
353 variables["[tag]"]= "multiple";
354 string outAllFileName = getOutputFileName("summary",variables);
356 m->openOutputFile(outAllFileName, outAll);
357 outputNames.push_back(outAllFileName);
359 outAll << "label" <<'\t' << "comparison" << '\t';
360 for(int i=0;i<sumCalculators.size();i++){
361 if (sumCalculators[i]->getMultiple() == true) {
362 outAll << '\t' << sumCalculators[i]->getName();
369 if (lookup.size() < 2) {
370 m->mothurOut("I cannot run the command without at least 2 valid groups.");
371 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
373 //close files and clean up
374 m->mothurRemove(outputFileName);
375 if (mult == true) { m->mothurRemove(outAllFileName); }
377 //if you only have 2 groups you don't need a .sharedmultiple file
378 }else if ((lookup.size() == 2) && (mult == true)) {
380 m->mothurRemove(outAllFileName);
381 outputNames.pop_back();
384 if (m->control_pressed) {
385 if (mult) { m->mothurRemove(outAllFileName); }
386 m->mothurRemove(outputFileName);
388 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
389 for(int i=0;i<sumCalculators.size();i++){ delete sumCalculators[i]; }
393 /******************************************************/
395 if (subsampleSize == -1) { //user has not set size, set size = smallest samples size
396 subsampleSize = lookup[0]->getNumSeqs();
397 for (int i = 1; i < lookup.size(); i++) {
398 int thisSize = lookup[i]->getNumSeqs();
400 if (thisSize < subsampleSize) { subsampleSize = thisSize; }
405 vector<SharedRAbundVector*> temp;
406 for (int i = 0; i < lookup.size(); i++) {
407 if (lookup[i]->getNumSeqs() < subsampleSize) {
408 m->mothurOut(lookup[i]->getGroup() + " contains " + toString(lookup[i]->getNumSeqs()) + ". Eliminating."); m->mothurOutEndLine();
411 Groups.push_back(lookup[i]->getGroup());
412 temp.push_back(lookup[i]);
416 m->setGroups(Groups);
419 if (lookup.size() < 2) { m->mothurOut("You have not provided enough valid groups. I cannot run the command."); m->mothurOutEndLine(); m->control_pressed = true; delete input; return 0; }
423 /******************************************************/
424 //comparison breakup to be used by different processes later
425 numGroups = lookup.size();
426 lines.resize(processors);
427 for (int i = 0; i < processors; i++) {
428 lines[i].start = int (sqrt(float(i)/float(processors)) * numGroups);
429 lines[i].end = int (sqrt(float(i+1)/float(processors)) * numGroups);
431 /******************************************************/
433 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
434 set<string> processedLabels;
435 set<string> userLabels = labels;
437 //as long as you are not at the end of the file or done wih the lines you want
438 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
439 if (m->control_pressed) {
440 if (mult) { m->mothurRemove(outAllFileName); }
441 m->mothurRemove(outputFileName);
443 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
444 for(int i=0;i<sumCalculators.size();i++){ delete sumCalculators[i]; }
450 if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){
451 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
452 process(lookup, outputFileName, outAllFileName);
454 processedLabels.insert(lookup[0]->getLabel());
455 userLabels.erase(lookup[0]->getLabel());
458 if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
459 string saveLabel = lookup[0]->getLabel();
461 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
462 lookup = input->getSharedRAbundVectors(lastLabel);
464 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
465 process(lookup, outputFileName, outAllFileName);
467 processedLabels.insert(lookup[0]->getLabel());
468 userLabels.erase(lookup[0]->getLabel());
470 //restore real lastlabel to save below
471 lookup[0]->setLabel(saveLabel);
474 lastLabel = lookup[0]->getLabel();
476 //get next line to process
477 //prevent memory leak
478 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
479 lookup = input->getSharedRAbundVectors();
482 if (m->control_pressed) {
483 if (mult) { m->mothurRemove(outAllFileName); }
484 m->mothurRemove(outputFileName);
486 for(int i=0;i<sumCalculators.size();i++){ delete sumCalculators[i]; }
491 //output error messages about any remaining user labels
492 set<string>::iterator it;
493 bool needToRun = false;
494 for (it = userLabels.begin(); it != userLabels.end(); it++) {
495 m->mothurOut("Your file does not include the label " + *it);
496 if (processedLabels.count(lastLabel) != 1) {
497 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
500 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
504 //run last label if you need to
505 if (needToRun == true) {
506 for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }
507 lookup = input->getSharedRAbundVectors(lastLabel);
509 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
510 process(lookup, outputFileName, outAllFileName);
511 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
515 //reset groups parameter
518 for(int i=0;i<sumCalculators.size();i++){ delete sumCalculators[i]; }
521 if (m->control_pressed) {
522 m->mothurRemove(outAllFileName);
523 m->mothurRemove(outputFileName);
527 m->mothurOutEndLine();
528 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
529 m->mothurOut(outputFileName); m->mothurOutEndLine();
530 if (mult) { m->mothurOut(outAllFileName); m->mothurOutEndLine(); outputTypes["summary"].push_back(outAllFileName); }
531 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } outputTypes["summary"].push_back(outputFileName);
532 m->mothurOutEndLine();
536 catch(exception& e) {
537 m->errorOut(e, "SummarySharedCommand", "execute");
541 /***********************************************************/
542 int SummarySharedCommand::printSims(ostream& out, vector< vector<double> >& simMatrix) {
545 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
548 out << simMatrix.size() << endl;
550 if (output == "lt") {
551 for (int b = 0; b < simMatrix.size(); b++) {
552 out << lookup[b]->getGroup() << '\t';
553 for (int n = 0; n < b; n++) {
554 if (m->control_pressed) { return 0; }
555 out << simMatrix[b][n] << '\t';
560 for (int b = 0; b < simMatrix.size(); m++) {
561 out << lookup[b]->getGroup() << '\t';
562 for (int n = 0; n < simMatrix[b].size(); n++) {
563 if (m->control_pressed) { return 0; }
564 out << simMatrix[b][n] << '\t';
572 catch(exception& e) {
573 m->errorOut(e, "SummarySharedCommand", "printSims");
577 /***********************************************************/
578 int SummarySharedCommand::process(vector<SharedRAbundVector*> thisLookup, string sumFileName, string sumAllFileName) {
580 vector< vector< vector<seqDist> > > calcDistsTotals; //each iter, one for each calc, then each groupCombos dists. this will be used to make .dist files
581 vector< vector<seqDist> > calcDists; calcDists.resize(sumCalculators.size());
583 for (int thisIter = 0; thisIter < iters+1; thisIter++) {
585 vector<SharedRAbundVector*> thisItersLookup = thisLookup;
587 if (subsample && (thisIter != 0)) { //we want the summary results for the whole dataset, then the subsampling
589 vector<string> tempLabels; //dont need since we arent printing the sampled sharedRabunds
591 //make copy of lookup so we don't get access violations
592 vector<SharedRAbundVector*> newLookup;
593 for (int k = 0; k < thisItersLookup.size(); k++) {
594 SharedRAbundVector* temp = new SharedRAbundVector();
595 temp->setLabel(thisItersLookup[k]->getLabel());
596 temp->setGroup(thisItersLookup[k]->getGroup());
597 newLookup.push_back(temp);
601 for (int k = 0; k < thisItersLookup[0]->getNumBins(); k++) {
602 if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } return 0; }
603 for (int j = 0; j < thisItersLookup.size(); j++) { newLookup[j]->push_back(thisItersLookup[j]->getAbundance(k), thisItersLookup[j]->getGroup()); }
606 tempLabels = sample.getSample(newLookup, subsampleSize);
607 thisItersLookup = newLookup;
612 driver(thisLookup, 0, numGroups, sumFileName+".temp", sumAllFileName+".temp", calcDists);
613 m->appendFiles((sumFileName + ".temp"), sumFileName);
614 m->mothurRemove((sumFileName + ".temp"));
616 m->appendFiles((sumAllFileName + ".temp"), sumAllFileName);
617 m->mothurRemove((sumAllFileName + ".temp"));
622 vector<int> processIDS;
624 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
625 //loop through and create all the processes you want
626 while (process != processors) {
630 processIDS.push_back(pid);
633 driver(thisLookup, lines[process].start, lines[process].end, sumFileName + toString(getpid()) + ".temp", sumAllFileName + toString(getpid()) + ".temp", calcDists);
635 //only do this if you want a distance file
637 string tempdistFileName = m->getRootName(m->getSimpleName(sumFileName)) + toString(getpid()) + ".dist";
639 m->openOutputFile(tempdistFileName, outtemp);
641 for (int i = 0; i < calcDists.size(); i++) {
642 outtemp << calcDists[i].size() << endl;
644 for (int j = 0; j < calcDists[i].size(); j++) {
645 outtemp << calcDists[i][j].seq1 << '\t' << calcDists[i][j].seq2 << '\t' << calcDists[i][j].dist << endl;
653 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
654 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
659 //parent do your part
660 driver(thisLookup, lines[0].start, lines[0].end, sumFileName + toString(getpid()) + ".temp", sumAllFileName + toString(getpid()) + ".temp", calcDists);
661 m->appendFiles((sumFileName + toString(getpid()) + ".temp"), sumFileName);
662 m->mothurRemove((sumFileName + toString(getpid()) + ".temp"));
663 if (mult) { m->appendFiles((sumAllFileName + toString(getpid()) + ".temp"), sumAllFileName); }
665 //force parent to wait until all the processes are done
666 for (int i = 0; i < processIDS.size(); i++) {
667 int temp = processIDS[i];
671 for (int i = 0; i < processIDS.size(); i++) {
672 m->appendFiles((sumFileName + toString(processIDS[i]) + ".temp"), sumFileName);
673 m->mothurRemove((sumFileName + toString(processIDS[i]) + ".temp"));
674 if (mult) { m->mothurRemove((sumAllFileName + toString(processIDS[i]) + ".temp")); }
677 string tempdistFileName = m->getRootName(m->getSimpleName(sumFileName)) + toString(processIDS[i]) + ".dist";
679 m->openInputFile(tempdistFileName, intemp);
681 for (int k = 0; k < calcDists.size(); k++) {
683 intemp >> size; m->gobble(intemp);
685 for (int j = 0; j < size; j++) {
690 intemp >> seq1 >> seq2 >> dist; m->gobble(intemp);
692 seqDist tempDist(seq1, seq2, dist);
693 calcDists[k].push_back(tempDist);
697 m->mothurRemove(tempdistFileName);
701 //////////////////////////////////////////////////////////////////////////////////////////////////////
702 //Windows version shared memory, so be careful when passing variables through the summarySharedData struct.
703 //Above fork() will clone, so memory is separate, but that's not the case with windows,
704 //Taking advantage of shared memory to pass results vectors.
705 //////////////////////////////////////////////////////////////////////////////////////////////////////
707 vector<summarySharedData*> pDataArray;
708 DWORD dwThreadIdArray[processors-1];
709 HANDLE hThreadArray[processors-1];
711 //Create processor worker threads.
712 for( int i=1; i<processors; i++ ){
714 //make copy of lookup so we don't get access violations
715 vector<SharedRAbundVector*> newLookup;
716 for (int k = 0; k < thisLookup.size(); k++) {
717 SharedRAbundVector* temp = new SharedRAbundVector();
718 temp->setLabel(thisLookup[k]->getLabel());
719 temp->setGroup(thisLookup[k]->getGroup());
720 newLookup.push_back(temp);
725 for (int k = 0; k < thisLookup[0]->getNumBins(); k++) {
726 if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } return 0; }
727 for (int j = 0; j < thisLookup.size(); j++) { newLookup[j]->push_back(thisLookup[j]->getAbundance(k), thisLookup[j]->getGroup()); }
730 // Allocate memory for thread data.
731 summarySharedData* tempSum = new summarySharedData((sumFileName+toString(i)+".temp"), m, lines[i].start, lines[i].end, Estimators, newLookup);
732 pDataArray.push_back(tempSum);
733 processIDS.push_back(i);
735 hThreadArray[i-1] = CreateThread(NULL, 0, MySummarySharedThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);
738 //parent do your part
739 driver(thisLookup, lines[0].start, lines[0].end, sumFileName +"0.temp", sumAllFileName + "0.temp", calcDists);
740 m->appendFiles((sumFileName + "0.temp"), sumFileName);
741 m->mothurRemove((sumFileName + "0.temp"));
742 if (mult) { m->appendFiles((sumAllFileName + "0.temp"), sumAllFileName); }
744 //Wait until all threads have terminated.
745 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
747 //Close all thread handles and free memory allocations.
748 for(int i=0; i < pDataArray.size(); i++){
749 if (pDataArray[i]->count != (pDataArray[i]->end-pDataArray[i]->start)) {
750 m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of " + toString(pDataArray[i]->end-pDataArray[i]->start) + " groups assigned to it, quitting. \n"); m->control_pressed = true;
752 m->appendFiles((sumFileName + toString(processIDS[i]) + ".temp"), sumFileName);
753 m->mothurRemove((sumFileName + toString(processIDS[i]) + ".temp"));
755 for (int j = 0; j < pDataArray[i]->thisLookup.size(); j++) { delete pDataArray[i]->thisLookup[j]; }
758 for (int k = 0; k < calcDists.size(); k++) {
759 int size = pDataArray[i]->calcDists[k].size();
760 for (int j = 0; j < size; j++) { calcDists[k].push_back(pDataArray[i]->calcDists[k][j]); }
764 CloseHandle(hThreadArray[i]);
765 delete pDataArray[i];
771 if (subsample && (thisIter != 0)) { //we want the summary results for the whole dataset, then the subsampling
773 calcDistsTotals.push_back(calcDists);
775 for (int i = 0; i < thisItersLookup.size(); i++) { delete thisItersLookup[i]; }
776 thisItersLookup.clear();
779 for (int i = 0; i < calcDists.size(); i++) {
780 if (m->control_pressed) { break; }
783 vector< vector<double> > matrix; //square matrix to represent the distance
784 matrix.resize(thisLookup.size());
785 for (int k = 0; k < thisLookup.size(); k++) { matrix[k].resize(thisLookup.size(), 0.0); }
787 for (int j = 0; j < calcDists[i].size(); j++) {
788 int row = calcDists[i][j].seq1;
789 int column = calcDists[i][j].seq2;
790 double dist = calcDists[i][j].dist;
792 matrix[row][column] = dist;
793 matrix[column][row] = dist;
796 map<string, string> variables;
797 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
798 variables["[calc]"] = sumCalculators[i]->getName();
799 variables["[distance]"] = thisLookup[0]->getLabel();
800 variables["[outputtag]"] = output;
801 variables["[tag2]"] = "";
802 string distFileName = getOutputFileName("phylip",variables);
803 outputNames.push_back(distFileName); outputTypes["phylip"].push_back(distFileName);
805 m->openOutputFile(distFileName, outDist);
806 outDist.setf(ios::fixed, ios::floatfield); outDist.setf(ios::showpoint);
808 printSims(outDist, matrix);
814 for (int i = 0; i < calcDists.size(); i++) { calcDists[i].clear(); }
818 //we need to find the average distance and standard deviation for each groups distance
819 vector< vector<seqDist> > calcAverages = m->getAverages(calcDistsTotals);
821 //find standard deviation
822 vector< vector<seqDist> > stdDev = m->getStandardDeviation(calcDistsTotals, calcAverages);
825 for (int i = 0; i < calcDists.size(); i++) {
826 vector< vector<double> > matrix; //square matrix to represent the distance
827 matrix.resize(thisLookup.size());
828 for (int k = 0; k < thisLookup.size(); k++) { matrix[k].resize(thisLookup.size(), 0.0); }
830 vector< vector<double> > stdmatrix; //square matrix to represent the stdDev
831 stdmatrix.resize(thisLookup.size());
832 for (int k = 0; k < thisLookup.size(); k++) { stdmatrix[k].resize(thisLookup.size(), 0.0); }
835 for (int j = 0; j < calcAverages[i].size(); j++) {
836 int row = calcAverages[i][j].seq1;
837 int column = calcAverages[i][j].seq2;
838 float dist = calcAverages[i][j].dist;
839 float stdDist = stdDev[i][j].dist;
841 matrix[row][column] = dist;
842 matrix[column][row] = dist;
843 stdmatrix[row][column] = stdDist;
844 stdmatrix[column][row] = stdDist;
847 map<string, string> variables;
848 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
849 variables["[calc]"] = sumCalculators[i]->getName();
850 variables["[distance]"] = thisLookup[0]->getLabel();
851 variables["[outputtag]"] = output;
852 variables["[tag2]"] = "ave";
853 string distFileName = getOutputFileName("phylip",variables);
854 outputNames.push_back(distFileName); outputTypes["phylip"].push_back(distFileName);
856 m->openOutputFile(distFileName, outAve);
857 outAve.setf(ios::fixed, ios::floatfield); outAve.setf(ios::showpoint);
859 printSims(outAve, matrix);
863 variables["[tag2]"] = "std";
864 distFileName = getOutputFileName("phylip",variables);
865 outputNames.push_back(distFileName); outputTypes["phylip"].push_back(distFileName);
867 m->openOutputFile(distFileName, outSTD);
868 outSTD.setf(ios::fixed, ios::floatfield); outSTD.setf(ios::showpoint);
870 printSims(outSTD, stdmatrix);
879 catch(exception& e) {
880 m->errorOut(e, "SummarySharedCommand", "process");
884 /**************************************************************************************************/
885 int SummarySharedCommand::driver(vector<SharedRAbundVector*> thisLookup, int start, int end, string sumFile, string sumAllFile, vector< vector<seqDist> >& calcDists) {
888 //loop through calculators and add to file all for all calcs that can do mutiple groups
891 m->openOutputFile(sumAllFile, outAll);
894 outAll << thisLookup[0]->getLabel() << '\t';
896 //output groups names
897 string outNames = "";
898 for (int j = 0; j < thisLookup.size(); j++) {
899 outNames += thisLookup[j]->getGroup() + "-";
901 outNames = outNames.substr(0, outNames.length()-1); //rip off extra '-';
902 outAll << outNames << '\t';
904 for(int i=0;i<sumCalculators.size();i++){
905 if (sumCalculators[i]->getMultiple() == true) {
906 sumCalculators[i]->getValues(thisLookup);
908 if (m->control_pressed) { outAll.close(); return 1; }
911 sumCalculators[i]->print(outAll);
918 ofstream outputFileHandle;
919 m->openOutputFile(sumFile, outputFileHandle);
921 vector<SharedRAbundVector*> subset;
922 for (int k = start; k < end; k++) { // pass cdd each set of groups to compare
924 for (int l = 0; l < k; l++) {
926 outputFileHandle << thisLookup[0]->getLabel() << '\t';
928 subset.clear(); //clear out old pair of sharedrabunds
929 //add new pair of sharedrabunds
930 subset.push_back(thisLookup[k]); subset.push_back(thisLookup[l]);
932 //sort groups to be alphanumeric
933 if (thisLookup[k]->getGroup() > thisLookup[l]->getGroup()) {
934 outputFileHandle << (thisLookup[l]->getGroup() +'\t' + thisLookup[k]->getGroup()) << '\t'; //print out groups
936 outputFileHandle << (thisLookup[k]->getGroup() +'\t' + thisLookup[l]->getGroup()) << '\t'; //print out groups
939 for(int i=0;i<sumCalculators.size();i++) {
941 //if this calc needs all groups to calculate the pair load all groups
942 if (sumCalculators[i]->getNeedsAll()) {
943 //load subset with rest of lookup for those calcs that need everyone to calc for a pair
944 for (int w = 0; w < thisLookup.size(); w++) {
945 if ((w != k) && (w != l)) { subset.push_back(thisLookup[w]); }
949 vector<double> tempdata = sumCalculators[i]->getValues(subset); //saves the calculator outputs
951 if (m->control_pressed) { outputFileHandle.close(); return 1; }
953 outputFileHandle << '\t';
954 sumCalculators[i]->print(outputFileHandle);
956 seqDist temp(l, k, tempdata[0]);
957 calcDists[i].push_back(temp);
959 outputFileHandle << endl;
963 outputFileHandle.close();
967 catch(exception& e) {
968 m->errorOut(e, "SummarySharedCommand", "driver");
972 /**************************************************************************************************/