2 * summarysharedcommand.cpp
5 * Created by Sarah Westcott on 1/2/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "summarysharedcommand.h"
11 #include "subsample.h"
13 //**********************************************************************************************************************
14 vector<string> SummarySharedCommand::setParameters(){
16 CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pshared);
17 CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
18 CommandParameter psubsample("subsample", "String", "", "", "", "", "",false,false); parameters.push_back(psubsample);
19 CommandParameter pdistance("distance", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pdistance);
20 CommandParameter pcalc("calc", "Multiple", "sharedchao-sharedsobs-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan-kstest-whittaker-sharednseqs-ochiai-anderberg-kulczynski-kulczynskicody-lennon-morisitahorn-braycurtis-odum-canberra-structeuclidean-structchord-hellinger-manhattan-structpearson-soergel-spearman-structkulczynski-speciesprofile-structchi2-hamming-gower-memchi2-memchord-memeuclidean-mempearson", "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan", "", "", "",true,false); parameters.push_back(pcalc);
21 CommandParameter poutput("output", "Multiple", "lt-square", "lt", "", "", "",false,false); parameters.push_back(poutput);
22 CommandParameter pall("all", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pall);
23 CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
24 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
25 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
26 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
27 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
29 vector<string> myArray;
30 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
34 m->errorOut(e, "SummarySharedCommand", "setParameters");
38 //**********************************************************************************************************************
39 string SummarySharedCommand::getHelpString(){
41 string helpString = "";
42 ValidCalculators validCalculator;
43 helpString += "The summary.shared command parameters are shared, label, calc, distance, processors, subsample, iters and all. shared is required if there is no current sharedfile.\n";
44 helpString += "The summary.shared command should be in the following format: \n";
45 helpString += "summary.shared(label=yourLabel, calc=yourEstimators, groups=yourGroups).\n";
46 helpString += "Example summary.shared(label=unique-.01-.03, groups=B-C, calc=sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan).\n";
47 helpString += validCalculator.printCalc("sharedsummary");
48 helpString += "The iters parameter allows you to choose the number of times you would like to run the subsample.\n";
49 helpString += "The subsample parameter allows you to enter the size pergroup of the sample or you can set subsample=T and mothur will use the size of your smallest group.\n";
50 helpString += "The output parameter allows you to specify format of your distance matrix. Options are lt, and square. The default is lt.\n";
51 helpString += "The default value for calc is sharedsobs-sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan\n";
52 helpString += "The default value for groups is all the groups in your groupfile.\n";
53 helpString += "The distance parameter allows you to indicate you would like a distance file created for each calculator for each label, default=f.\n";
54 helpString += "The label parameter is used to analyze specific labels in your input.\n";
55 helpString += "The all parameter is used to specify if you want the estimate of all your groups together. This estimate can only be made for sharedsobs and sharedchao calculators. The default is false.\n";
56 helpString += "If you use sharedchao and run into memory issues, set all to false. \n";
57 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 2 valid groups.\n";
58 helpString += "Note: No spaces between parameter labels (i.e. label), '=' and parameters (i.e.yourLabel).\n";
62 m->errorOut(e, "SummarySharedCommand", "getHelpString");
66 //**********************************************************************************************************************
67 string SummarySharedCommand::getOutputFileNameTag(string type, string inputName=""){
69 string outputFileName = "";
70 map<string, vector<string> >::iterator it;
72 //is this a type this command creates
73 it = outputTypes.find(type);
74 if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
76 if (type == "summary") { outputFileName = "shared.summary"; }
77 else if (type == "phylip") { outputFileName = "dist"; }
78 else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
80 return outputFileName;
83 m->errorOut(e, "SummarySharedCommand", "getOutputFileNameTag");
87 //**********************************************************************************************************************
88 SummarySharedCommand::SummarySharedCommand(){
90 abort = true; calledHelp = true;
92 vector<string> tempOutNames;
93 outputTypes["summary"] = tempOutNames;
94 outputTypes["phylip"] = tempOutNames;
97 m->errorOut(e, "SummarySharedCommand", "SummarySharedCommand");
101 //**********************************************************************************************************************
103 SummarySharedCommand::SummarySharedCommand(string option) {
105 abort = false; calledHelp = false;
108 //allow user to run help
109 if(option == "help") { help(); abort = true; calledHelp = true; }
110 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
113 vector<string> myArray = setParameters();
115 OptionParser parser(option);
116 map<string, string> parameters = parser.getParameters();
117 map<string, string>::iterator it;
119 ValidParameters validParameter;
121 //check to make sure all parameters are valid for command
122 for (map<string, string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
123 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
126 //initialize outputTypes
127 vector<string> tempOutNames;
128 outputTypes["summary"] = tempOutNames;
129 outputTypes["phylip"] = tempOutNames;
131 //if the user changes the input directory command factory will send this info to us in the output parameter
132 string inputDir = validParameter.validFile(parameters, "inputdir", false);
133 if (inputDir == "not found"){ inputDir = ""; }
136 it = parameters.find("shared");
137 //user has given a template file
138 if(it != parameters.end()){
139 path = m->hasPath(it->second);
140 //if the user has not given a path then, add inputdir. else leave path alone.
141 if (path == "") { parameters["shared"] = inputDir + it->second; }
146 sharedfile = validParameter.validFile(parameters, "shared", true);
147 if (sharedfile == "not open") { sharedfile = ""; abort = true; }
148 else if (sharedfile == "not found") {
149 //if there is a current shared file, use it
150 sharedfile = m->getSharedFile();
151 if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
152 else { m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; }
153 }else { m->setSharedFile(sharedfile); }
156 //if the user changes the output directory command factory will send this info to us in the output parameter
157 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = m->hasPath(sharedfile); }
160 //check for optional parameter and set defaults
161 // ...at some point should added some additional type checking...
162 label = validParameter.validFile(parameters, "label", false);
163 if (label == "not found") { label = ""; }
165 if(label != "all") { m->splitAtDash(label, labels); allLines = 0; }
166 else { allLines = 1; }
170 calc = validParameter.validFile(parameters, "calc", false);
171 if (calc == "not found") { calc = "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan"; }
173 if (calc == "default") { calc = "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan"; }
175 m->splitAtDash(calc, Estimators);
176 if (m->inUsersGroups("citation", Estimators)) {
177 ValidCalculators validCalc; validCalc.printCitations(Estimators);
178 //remove citation from list of calcs
179 for (int i = 0; i < Estimators.size(); i++) { if (Estimators[i] == "citation") { Estimators.erase(Estimators.begin()+i); break; } }
182 groups = validParameter.validFile(parameters, "groups", false);
183 if (groups == "not found") { groups = ""; }
185 m->splitAtDash(groups, Groups);
186 m->setGroups(Groups);
189 string temp = validParameter.validFile(parameters, "all", false); if (temp == "not found") { temp = "false"; }
190 all = m->isTrue(temp);
192 temp = validParameter.validFile(parameters, "distance", false); if (temp == "not found") { temp = "false"; }
193 createPhylip = m->isTrue(temp);
195 temp = validParameter.validFile(parameters, "iters", false); if (temp == "not found") { temp = "1000"; }
196 m->mothurConvert(temp, iters);
198 output = validParameter.validFile(parameters, "output", false); if(output == "not found"){ output = "lt"; }
199 if ((output != "lt") && (output != "square")) { m->mothurOut(output + " is not a valid output form. Options are lt and square. I will use lt."); m->mothurOutEndLine(); output = "lt"; }
201 temp = validParameter.validFile(parameters, "subsample", false); if (temp == "not found") { temp = "F"; }
202 if (m->isNumeric1(temp)) { m->mothurConvert(temp, subsampleSize); subsample = true; }
204 if (m->isTrue(temp)) { subsample = true; subsampleSize = -1; } //we will set it to smallest group later
205 else { subsample = false; }
208 if (subsample == false) { iters = 1; }
210 temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
211 m->setProcessors(temp);
212 m->mothurConvert(temp, processors);
214 if (abort == false) {
216 ValidCalculators validCalculator;
219 for (i=0; i<Estimators.size(); i++) {
220 if (validCalculator.isValidCalculator("sharedsummary", Estimators[i]) == true) {
221 if (Estimators[i] == "sharedsobs") {
222 sumCalculators.push_back(new SharedSobsCS());
223 }else if (Estimators[i] == "sharedchao") {
224 sumCalculators.push_back(new SharedChao1());
225 }else if (Estimators[i] == "sharedace") {
226 sumCalculators.push_back(new SharedAce());
227 }else if (Estimators[i] == "jabund") {
228 sumCalculators.push_back(new JAbund());
229 }else if (Estimators[i] == "sorabund") {
230 sumCalculators.push_back(new SorAbund());
231 }else if (Estimators[i] == "jclass") {
232 sumCalculators.push_back(new Jclass());
233 }else if (Estimators[i] == "sorclass") {
234 sumCalculators.push_back(new SorClass());
235 }else if (Estimators[i] == "jest") {
236 sumCalculators.push_back(new Jest());
237 }else if (Estimators[i] == "sorest") {
238 sumCalculators.push_back(new SorEst());
239 }else if (Estimators[i] == "thetayc") {
240 sumCalculators.push_back(new ThetaYC());
241 }else if (Estimators[i] == "thetan") {
242 sumCalculators.push_back(new ThetaN());
243 }else if (Estimators[i] == "kstest") {
244 sumCalculators.push_back(new KSTest());
245 }else if (Estimators[i] == "sharednseqs") {
246 sumCalculators.push_back(new SharedNSeqs());
247 }else if (Estimators[i] == "ochiai") {
248 sumCalculators.push_back(new Ochiai());
249 }else if (Estimators[i] == "anderberg") {
250 sumCalculators.push_back(new Anderberg());
251 }else if (Estimators[i] == "kulczynski") {
252 sumCalculators.push_back(new Kulczynski());
253 }else if (Estimators[i] == "kulczynskicody") {
254 sumCalculators.push_back(new KulczynskiCody());
255 }else if (Estimators[i] == "lennon") {
256 sumCalculators.push_back(new Lennon());
257 }else if (Estimators[i] == "morisitahorn") {
258 sumCalculators.push_back(new MorHorn());
259 }else if (Estimators[i] == "braycurtis") {
260 sumCalculators.push_back(new BrayCurtis());
261 }else if (Estimators[i] == "whittaker") {
262 sumCalculators.push_back(new Whittaker());
263 }else if (Estimators[i] == "odum") {
264 sumCalculators.push_back(new Odum());
265 }else if (Estimators[i] == "canberra") {
266 sumCalculators.push_back(new Canberra());
267 }else if (Estimators[i] == "structeuclidean") {
268 sumCalculators.push_back(new StructEuclidean());
269 }else if (Estimators[i] == "structchord") {
270 sumCalculators.push_back(new StructChord());
271 }else if (Estimators[i] == "hellinger") {
272 sumCalculators.push_back(new Hellinger());
273 }else if (Estimators[i] == "manhattan") {
274 sumCalculators.push_back(new Manhattan());
275 }else if (Estimators[i] == "structpearson") {
276 sumCalculators.push_back(new StructPearson());
277 }else if (Estimators[i] == "soergel") {
278 sumCalculators.push_back(new Soergel());
279 }else if (Estimators[i] == "spearman") {
280 sumCalculators.push_back(new Spearman());
281 }else if (Estimators[i] == "structkulczynski") {
282 sumCalculators.push_back(new StructKulczynski());
283 }else if (Estimators[i] == "speciesprofile") {
284 sumCalculators.push_back(new SpeciesProfile());
285 }else if (Estimators[i] == "hamming") {
286 sumCalculators.push_back(new Hamming());
287 }else if (Estimators[i] == "structchi2") {
288 sumCalculators.push_back(new StructChi2());
289 }else if (Estimators[i] == "gower") {
290 sumCalculators.push_back(new Gower());
291 }else if (Estimators[i] == "memchi2") {
292 sumCalculators.push_back(new MemChi2());
293 }else if (Estimators[i] == "memchord") {
294 sumCalculators.push_back(new MemChord());
295 }else if (Estimators[i] == "memeuclidean") {
296 sumCalculators.push_back(new MemEuclidean());
297 }else if (Estimators[i] == "mempearson") {
298 sumCalculators.push_back(new MemPearson());
307 catch(exception& e) {
308 m->errorOut(e, "SummarySharedCommand", "SummarySharedCommand");
312 //**********************************************************************************************************************
314 int SummarySharedCommand::execute(){
317 if (abort == true) { if (calledHelp) { return 0; } return 2; }
319 ofstream outputFileHandle, outAll;
320 string outputFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + getOutputFileNameTag("summary");
322 //if the users entered no valid calculators don't execute command
323 if (sumCalculators.size() == 0) { return 0; }
324 //check if any calcs can do multiples
327 for (int i = 0; i < sumCalculators.size(); i++) {
328 if (sumCalculators[i]->getMultiple() == true) { mult = true; }
333 input = new InputData(sharedfile, "sharedfile");
334 lookup = input->getSharedRAbundVectors();
335 string lastLabel = lookup[0]->getLabel();
337 /******************************************************/
338 //output headings for files
339 /******************************************************/
340 //output estimator names as column headers
341 m->openOutputFile(outputFileName, outputFileHandle);
342 outputFileHandle << "label" <<'\t' << "comparison" << '\t';
343 for(int i=0;i<sumCalculators.size();i++){
344 outputFileHandle << '\t' << sumCalculators[i]->getName();
345 if (sumCalculators[i]->getCols() == 3) { outputFileHandle << "\t" << sumCalculators[i]->getName() << "_lci\t" << sumCalculators[i]->getName() << "_hci"; }
347 outputFileHandle << endl;
348 outputFileHandle.close();
350 //create file and put column headers for multiple groups file
351 string outAllFileName = ((m->getRootName(sharedfile)) + "multiple." + getOutputFileNameTag("summary"));
353 m->openOutputFile(outAllFileName, outAll);
354 outputNames.push_back(outAllFileName);
356 outAll << "label" <<'\t' << "comparison" << '\t';
357 for(int i=0;i<sumCalculators.size();i++){
358 if (sumCalculators[i]->getMultiple() == true) {
359 outAll << '\t' << sumCalculators[i]->getName();
366 if (lookup.size() < 2) {
367 m->mothurOut("I cannot run the command without at least 2 valid groups.");
368 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
370 //close files and clean up
371 m->mothurRemove(outputFileName);
372 if (mult == true) { m->mothurRemove(outAllFileName); }
374 //if you only have 2 groups you don't need a .sharedmultiple file
375 }else if ((lookup.size() == 2) && (mult == true)) {
377 m->mothurRemove(outAllFileName);
378 outputNames.pop_back();
381 if (m->control_pressed) {
382 if (mult) { m->mothurRemove(outAllFileName); }
383 m->mothurRemove(outputFileName);
385 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
386 for(int i=0;i<sumCalculators.size();i++){ delete sumCalculators[i]; }
390 /******************************************************/
392 if (subsampleSize == -1) { //user has not set size, set size = smallest samples size
393 subsampleSize = lookup[0]->getNumSeqs();
394 for (int i = 1; i < lookup.size(); i++) {
395 int thisSize = lookup[i]->getNumSeqs();
397 if (thisSize < subsampleSize) { subsampleSize = thisSize; }
402 vector<SharedRAbundVector*> temp;
403 for (int i = 0; i < lookup.size(); i++) {
404 if (lookup[i]->getNumSeqs() < subsampleSize) {
405 m->mothurOut(lookup[i]->getGroup() + " contains " + toString(lookup[i]->getNumSeqs()) + ". Eliminating."); m->mothurOutEndLine();
408 Groups.push_back(lookup[i]->getGroup());
409 temp.push_back(lookup[i]);
413 m->setGroups(Groups);
416 if (lookup.size() < 2) { m->mothurOut("You have not provided enough valid groups. I cannot run the command."); m->mothurOutEndLine(); m->control_pressed = true; delete input; return 0; }
420 /******************************************************/
421 //comparison breakup to be used by different processes later
422 numGroups = lookup.size();
423 lines.resize(processors);
424 for (int i = 0; i < processors; i++) {
425 lines[i].start = int (sqrt(float(i)/float(processors)) * numGroups);
426 lines[i].end = int (sqrt(float(i+1)/float(processors)) * numGroups);
428 /******************************************************/
430 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
431 set<string> processedLabels;
432 set<string> userLabels = labels;
434 //as long as you are not at the end of the file or done wih the lines you want
435 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
436 if (m->control_pressed) {
437 if (mult) { m->mothurRemove(outAllFileName); }
438 m->mothurRemove(outputFileName);
440 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
441 for(int i=0;i<sumCalculators.size();i++){ delete sumCalculators[i]; }
447 if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){
448 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
449 process(lookup, outputFileName, outAllFileName);
451 processedLabels.insert(lookup[0]->getLabel());
452 userLabels.erase(lookup[0]->getLabel());
455 if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
456 string saveLabel = lookup[0]->getLabel();
458 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
459 lookup = input->getSharedRAbundVectors(lastLabel);
461 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
462 process(lookup, outputFileName, outAllFileName);
464 processedLabels.insert(lookup[0]->getLabel());
465 userLabels.erase(lookup[0]->getLabel());
467 //restore real lastlabel to save below
468 lookup[0]->setLabel(saveLabel);
471 lastLabel = lookup[0]->getLabel();
473 //get next line to process
474 //prevent memory leak
475 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
476 lookup = input->getSharedRAbundVectors();
479 if (m->control_pressed) {
480 if (mult) { m->mothurRemove(outAllFileName); }
481 m->mothurRemove(outputFileName);
483 for(int i=0;i<sumCalculators.size();i++){ delete sumCalculators[i]; }
488 //output error messages about any remaining user labels
489 set<string>::iterator it;
490 bool needToRun = false;
491 for (it = userLabels.begin(); it != userLabels.end(); it++) {
492 m->mothurOut("Your file does not include the label " + *it);
493 if (processedLabels.count(lastLabel) != 1) {
494 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
497 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
501 //run last label if you need to
502 if (needToRun == true) {
503 for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }
504 lookup = input->getSharedRAbundVectors(lastLabel);
506 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
507 process(lookup, outputFileName, outAllFileName);
508 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
512 //reset groups parameter
515 for(int i=0;i<sumCalculators.size();i++){ delete sumCalculators[i]; }
518 if (m->control_pressed) {
519 m->mothurRemove(outAllFileName);
520 m->mothurRemove(outputFileName);
524 m->mothurOutEndLine();
525 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
526 m->mothurOut(outputFileName); m->mothurOutEndLine();
527 if (mult) { m->mothurOut(outAllFileName); m->mothurOutEndLine(); outputTypes["summary"].push_back(outAllFileName); }
528 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } outputTypes["summary"].push_back(outputFileName);
529 m->mothurOutEndLine();
533 catch(exception& e) {
534 m->errorOut(e, "SummarySharedCommand", "execute");
538 /***********************************************************/
539 int SummarySharedCommand::printSims(ostream& out, vector< vector<double> >& simMatrix) {
542 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
545 out << simMatrix.size() << endl;
547 if (output == "lt") {
548 for (int b = 0; b < simMatrix.size(); b++) {
549 out << lookup[b]->getGroup() << '\t';
550 for (int n = 0; n < b; n++) {
551 if (m->control_pressed) { return 0; }
552 out << simMatrix[b][n] << '\t';
557 for (int b = 0; b < simMatrix.size(); m++) {
558 out << lookup[b]->getGroup() << '\t';
559 for (int n = 0; n < simMatrix[b].size(); n++) {
560 if (m->control_pressed) { return 0; }
561 out << simMatrix[b][n] << '\t';
569 catch(exception& e) {
570 m->errorOut(e, "SummarySharedCommand", "printSims");
574 /***********************************************************/
575 int SummarySharedCommand::process(vector<SharedRAbundVector*> thisLookup, string sumFileName, string sumAllFileName) {
577 vector< vector< vector<seqDist> > > calcDistsTotals; //each iter, one for each calc, then each groupCombos dists. this will be used to make .dist files
578 vector< vector<seqDist> > calcDists; calcDists.resize(sumCalculators.size());
580 for (int thisIter = 0; thisIter < iters+1; thisIter++) {
582 vector<SharedRAbundVector*> thisItersLookup = thisLookup;
584 if (subsample && (thisIter != 0)) { //we want the summary results for the whole dataset, then the subsampling
586 vector<string> tempLabels; //dont need since we arent printing the sampled sharedRabunds
588 //make copy of lookup so we don't get access violations
589 vector<SharedRAbundVector*> newLookup;
590 for (int k = 0; k < thisItersLookup.size(); k++) {
591 SharedRAbundVector* temp = new SharedRAbundVector();
592 temp->setLabel(thisItersLookup[k]->getLabel());
593 temp->setGroup(thisItersLookup[k]->getGroup());
594 newLookup.push_back(temp);
598 for (int k = 0; k < thisItersLookup[0]->getNumBins(); k++) {
599 if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } return 0; }
600 for (int j = 0; j < thisItersLookup.size(); j++) { newLookup[j]->push_back(thisItersLookup[j]->getAbundance(k), thisItersLookup[j]->getGroup()); }
603 tempLabels = sample.getSample(newLookup, subsampleSize);
604 thisItersLookup = newLookup;
609 driver(thisLookup, 0, numGroups, sumFileName+".temp", sumAllFileName+".temp", calcDists);
610 m->appendFiles((sumFileName + ".temp"), sumFileName);
611 m->mothurRemove((sumFileName + ".temp"));
613 m->appendFiles((sumAllFileName + ".temp"), sumAllFileName);
614 m->mothurRemove((sumAllFileName + ".temp"));
619 vector<int> processIDS;
621 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
622 //loop through and create all the processes you want
623 while (process != processors) {
627 processIDS.push_back(pid);
630 driver(thisLookup, lines[process].start, lines[process].end, sumFileName + toString(getpid()) + ".temp", sumAllFileName + toString(getpid()) + ".temp", calcDists);
632 //only do this if you want a distance file
634 string tempdistFileName = m->getRootName(m->getSimpleName(sumFileName)) + toString(getpid()) + ".dist";
636 m->openOutputFile(tempdistFileName, outtemp);
638 for (int i = 0; i < calcDists.size(); i++) {
639 outtemp << calcDists[i].size() << endl;
641 for (int j = 0; j < calcDists[i].size(); j++) {
642 outtemp << calcDists[i][j].seq1 << '\t' << calcDists[i][j].seq2 << '\t' << calcDists[i][j].dist << endl;
650 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
651 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
656 //parent do your part
657 driver(thisLookup, lines[0].start, lines[0].end, sumFileName + toString(getpid()) + ".temp", sumAllFileName + toString(getpid()) + ".temp", calcDists);
658 m->appendFiles((sumFileName + toString(getpid()) + ".temp"), sumFileName);
659 m->mothurRemove((sumFileName + toString(getpid()) + ".temp"));
660 if (mult) { m->appendFiles((sumAllFileName + toString(getpid()) + ".temp"), sumAllFileName); }
662 //force parent to wait until all the processes are done
663 for (int i = 0; i < processIDS.size(); i++) {
664 int temp = processIDS[i];
668 for (int i = 0; i < processIDS.size(); i++) {
669 m->appendFiles((sumFileName + toString(processIDS[i]) + ".temp"), sumFileName);
670 m->mothurRemove((sumFileName + toString(processIDS[i]) + ".temp"));
671 if (mult) { m->mothurRemove((sumAllFileName + toString(processIDS[i]) + ".temp")); }
674 string tempdistFileName = m->getRootName(m->getSimpleName(sumFileName)) + toString(processIDS[i]) + ".dist";
676 m->openInputFile(tempdistFileName, intemp);
678 for (int k = 0; k < calcDists.size(); k++) {
680 intemp >> size; m->gobble(intemp);
682 for (int j = 0; j < size; j++) {
687 intemp >> seq1 >> seq2 >> dist; m->gobble(intemp);
689 seqDist tempDist(seq1, seq2, dist);
690 calcDists[k].push_back(tempDist);
694 m->mothurRemove(tempdistFileName);
698 //////////////////////////////////////////////////////////////////////////////////////////////////////
699 //Windows version shared memory, so be careful when passing variables through the summarySharedData struct.
700 //Above fork() will clone, so memory is separate, but that's not the case with windows,
701 //Taking advantage of shared memory to pass results vectors.
702 //////////////////////////////////////////////////////////////////////////////////////////////////////
704 vector<summarySharedData*> pDataArray;
705 DWORD dwThreadIdArray[processors-1];
706 HANDLE hThreadArray[processors-1];
708 //Create processor worker threads.
709 for( int i=1; i<processors; i++ ){
711 //make copy of lookup so we don't get access violations
712 vector<SharedRAbundVector*> newLookup;
713 for (int k = 0; k < thisLookup.size(); k++) {
714 SharedRAbundVector* temp = new SharedRAbundVector();
715 temp->setLabel(thisLookup[k]->getLabel());
716 temp->setGroup(thisLookup[k]->getGroup());
717 newLookup.push_back(temp);
721 for (int k = 0; k < thisLookup[0]->getNumBins(); k++) {
722 if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } return 0; }
723 for (int j = 0; j < thisLookup.size(); j++) { newLookup[j]->push_back(thisLookup[j]->getAbundance(k), thisLookup[j]->getGroup()); }
726 // Allocate memory for thread data.
727 summarySharedData* tempSum = new summarySharedData((sumFileName+toString(i)+".temp"), m, lines[i].start, lines[i].end, Estimators, newLookup);
728 pDataArray.push_back(tempSum);
729 processIDS.push_back(i);
731 hThreadArray[i-1] = CreateThread(NULL, 0, MySummarySharedThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);
734 //parent do your part
735 driver(thisLookup, lines[0].start, lines[0].end, sumFileName +"0.temp", sumAllFileName + "0.temp", calcDists);
736 m->appendFiles((sumFileName + "0.temp"), sumFileName);
737 m->mothurRemove((sumFileName + "0.temp"));
738 if (mult) { m->appendFiles((sumAllFileName + "0.temp"), sumAllFileName); }
740 //Wait until all threads have terminated.
741 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
743 //Close all thread handles and free memory allocations.
744 for(int i=0; i < pDataArray.size(); i++){
745 m->appendFiles((sumFileName + toString(processIDS[i]) + ".temp"), sumFileName);
746 m->mothurRemove((sumFileName + toString(processIDS[i]) + ".temp"));
748 for (int j = 0; j < pDataArray[i]->thisLookup.size(); j++) { delete pDataArray[i]->thisLookup[j]; }
751 for (int k = 0; k < calcDists.size(); k++) {
752 int size = pDataArray[i]->calcDists[k].size();
753 for (int j = 0; j < size; j++) { calcDists[k].push_back(pDataArray[i]->calcDists[k][j]); }
757 CloseHandle(hThreadArray[i]);
758 delete pDataArray[i];
764 if (subsample && (thisIter != 0)) { //we want the summary results for the whole dataset, then the subsampling
766 calcDistsTotals.push_back(calcDists);
768 for (int i = 0; i < thisItersLookup.size(); i++) { delete thisItersLookup[i]; }
769 thisItersLookup.clear();
772 for (int i = 0; i < calcDists.size(); i++) {
773 if (m->control_pressed) { break; }
776 vector< vector<double> > matrix; //square matrix to represent the distance
777 matrix.resize(thisLookup.size());
778 for (int k = 0; k < thisLookup.size(); k++) { matrix[k].resize(thisLookup.size(), 0.0); }
780 for (int j = 0; j < calcDists[i].size(); j++) {
781 int row = calcDists[i][j].seq1;
782 int column = calcDists[i][j].seq2;
783 double dist = calcDists[i][j].dist;
785 matrix[row][column] = dist;
786 matrix[column][row] = dist;
789 string distFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + sumCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + "." + output + "." + getOutputFileNameTag("phylip");
790 outputNames.push_back(distFileName); outputTypes["phylip"].push_back(distFileName);
792 m->openOutputFile(distFileName, outDist);
793 outDist.setf(ios::fixed, ios::floatfield); outDist.setf(ios::showpoint);
795 printSims(outDist, matrix);
801 for (int i = 0; i < calcDists.size(); i++) { calcDists[i].clear(); }
805 //we need to find the average distance and standard deviation for each groups distance
807 vector< vector<seqDist> > calcAverages; calcAverages.resize(sumCalculators.size());
808 for (int i = 0; i < calcAverages.size(); i++) { //initialize sums to zero.
809 calcAverages[i].resize(calcDistsTotals[0][i].size());
811 for (int j = 0; j < calcAverages[i].size(); j++) {
812 calcAverages[i][j].seq1 = calcDists[i][j].seq1;
813 calcAverages[i][j].seq2 = calcDists[i][j].seq2;
814 calcAverages[i][j].dist = 0.0;
818 for (int thisIter = 0; thisIter < iters; thisIter++) { //sum all groups dists for each calculator
819 for (int i = 0; i < calcAverages.size(); i++) { //initialize sums to zero.
820 for (int j = 0; j < calcAverages[i].size(); j++) {
821 calcAverages[i][j].dist += calcDistsTotals[thisIter][i][j].dist;
826 for (int i = 0; i < calcAverages.size(); i++) { //finds average.
827 for (int j = 0; j < calcAverages[i].size(); j++) {
828 calcAverages[i][j].dist /= (float) iters;
832 //find standard deviation
833 vector< vector<seqDist> > stdDev; stdDev.resize(sumCalculators.size());
834 for (int i = 0; i < stdDev.size(); i++) { //initialize sums to zero.
835 stdDev[i].resize(calcDistsTotals[0][i].size());
837 for (int j = 0; j < stdDev[i].size(); j++) {
838 stdDev[i][j].seq1 = calcDists[i][j].seq1;
839 stdDev[i][j].seq2 = calcDists[i][j].seq2;
840 stdDev[i][j].dist = 0.0;
844 for (int thisIter = 0; thisIter < iters; thisIter++) { //compute the difference of each dist from the mean, and square the result of each
845 for (int i = 0; i < stdDev.size(); i++) {
846 for (int j = 0; j < stdDev[i].size(); j++) {
847 stdDev[i][j].dist += ((calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist) * (calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist));
852 for (int i = 0; i < stdDev.size(); i++) { //finds average.
853 for (int j = 0; j < stdDev[i].size(); j++) {
854 stdDev[i][j].dist /= (float) iters;
855 stdDev[i][j].dist = sqrt(stdDev[i][j].dist);
860 for (int i = 0; i < calcDists.size(); i++) {
861 vector< vector<double> > matrix; //square matrix to represent the distance
862 matrix.resize(thisLookup.size());
863 for (int k = 0; k < thisLookup.size(); k++) { matrix[k].resize(thisLookup.size(), 0.0); }
865 vector< vector<double> > stdmatrix; //square matrix to represent the stdDev
866 stdmatrix.resize(thisLookup.size());
867 for (int k = 0; k < thisLookup.size(); k++) { stdmatrix[k].resize(thisLookup.size(), 0.0); }
870 for (int j = 0; j < calcAverages[i].size(); j++) {
871 int row = calcAverages[i][j].seq1;
872 int column = calcAverages[i][j].seq2;
873 float dist = calcAverages[i][j].dist;
874 float stdDist = stdDev[i][j].dist;
876 matrix[row][column] = dist;
877 matrix[column][row] = dist;
878 stdmatrix[row][column] = stdDist;
879 stdmatrix[column][row] = stdDist;
882 string distFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + sumCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + "." + output + ".ave." + getOutputFileNameTag("phylip");
883 outputNames.push_back(distFileName); outputTypes["phylip"].push_back(distFileName);
885 m->openOutputFile(distFileName, outAve);
886 outAve.setf(ios::fixed, ios::floatfield); outAve.setf(ios::showpoint);
888 printSims(outAve, matrix);
892 distFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + sumCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + "." + output + ".std." + getOutputFileNameTag("phylip");
893 outputNames.push_back(distFileName); outputTypes["phylip"].push_back(distFileName);
895 m->openOutputFile(distFileName, outSTD);
896 outSTD.setf(ios::fixed, ios::floatfield); outSTD.setf(ios::showpoint);
898 printSims(outSTD, stdmatrix);
907 catch(exception& e) {
908 m->errorOut(e, "SummarySharedCommand", "process");
912 /**************************************************************************************************/
913 int SummarySharedCommand::driver(vector<SharedRAbundVector*> thisLookup, int start, int end, string sumFile, string sumAllFile, vector< vector<seqDist> >& calcDists) {
916 //loop through calculators and add to file all for all calcs that can do mutiple groups
919 m->openOutputFile(sumAllFile, outAll);
922 outAll << thisLookup[0]->getLabel() << '\t';
924 //output groups names
925 string outNames = "";
926 for (int j = 0; j < thisLookup.size(); j++) {
927 outNames += thisLookup[j]->getGroup() + "-";
929 outNames = outNames.substr(0, outNames.length()-1); //rip off extra '-';
930 outAll << outNames << '\t';
932 for(int i=0;i<sumCalculators.size();i++){
933 if (sumCalculators[i]->getMultiple() == true) {
934 sumCalculators[i]->getValues(thisLookup);
936 if (m->control_pressed) { outAll.close(); return 1; }
939 sumCalculators[i]->print(outAll);
946 ofstream outputFileHandle;
947 m->openOutputFile(sumFile, outputFileHandle);
949 vector<SharedRAbundVector*> subset;
950 for (int k = start; k < end; k++) { // pass cdd each set of groups to compare
952 for (int l = 0; l < k; l++) {
954 outputFileHandle << thisLookup[0]->getLabel() << '\t';
956 subset.clear(); //clear out old pair of sharedrabunds
957 //add new pair of sharedrabunds
958 subset.push_back(thisLookup[k]); subset.push_back(thisLookup[l]);
960 //sort groups to be alphanumeric
961 if (thisLookup[k]->getGroup() > thisLookup[l]->getGroup()) {
962 outputFileHandle << (thisLookup[l]->getGroup() +'\t' + thisLookup[k]->getGroup()) << '\t'; //print out groups
964 outputFileHandle << (thisLookup[k]->getGroup() +'\t' + thisLookup[l]->getGroup()) << '\t'; //print out groups
967 for(int i=0;i<sumCalculators.size();i++) {
969 //if this calc needs all groups to calculate the pair load all groups
970 if (sumCalculators[i]->getNeedsAll()) {
971 //load subset with rest of lookup for those calcs that need everyone to calc for a pair
972 for (int w = 0; w < thisLookup.size(); w++) {
973 if ((w != k) && (w != l)) { subset.push_back(thisLookup[w]); }
977 vector<double> tempdata = sumCalculators[i]->getValues(subset); //saves the calculator outputs
979 if (m->control_pressed) { outputFileHandle.close(); return 1; }
981 outputFileHandle << '\t';
982 sumCalculators[i]->print(outputFileHandle);
984 seqDist temp(l, k, tempdata[0]);
985 calcDists[i].push_back(temp);
987 outputFileHandle << endl;
991 outputFileHandle.close();
995 catch(exception& e) {
996 m->errorOut(e, "SummarySharedCommand", "driver");
1000 /**************************************************************************************************/