2 * summarysharedcommand.cpp
5 * Created by Sarah Westcott on 1/2/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "summarysharedcommand.h"
11 #include "subsample.h"
13 //**********************************************************************************************************************
14 vector<string> SummarySharedCommand::setParameters(){
16 CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","summary",false,true,true); parameters.push_back(pshared);
17 CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
18 CommandParameter psubsample("subsample", "String", "", "", "", "", "","phylip",false,false); parameters.push_back(psubsample);
19 CommandParameter pdistance("distance", "Boolean", "", "F", "", "", "","phylip",false,false); parameters.push_back(pdistance);
20 CommandParameter pcalc("calc", "Multiple", "sharedchao-sharedsobs-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan-kstest-whittaker-sharednseqs-ochiai-anderberg-kulczynski-kulczynskicody-lennon-morisitahorn-braycurtis-odum-canberra-structeuclidean-structchord-hellinger-manhattan-structpearson-soergel-spearman-structkulczynski-speciesprofile-structchi2-hamming-gower-memchi2-memchord-memeuclidean-mempearson", "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan", "", "", "","",true,false,true); parameters.push_back(pcalc);
21 CommandParameter poutput("output", "Multiple", "lt-square", "lt", "", "", "","",false,false); parameters.push_back(poutput);
22 CommandParameter pall("all", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pall);
23 CommandParameter piters("iters", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(piters);
24 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
25 CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
26 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
27 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
29 vector<string> myArray;
30 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
34 m->errorOut(e, "SummarySharedCommand", "setParameters");
38 //**********************************************************************************************************************
39 string SummarySharedCommand::getHelpString(){
41 string helpString = "";
42 ValidCalculators validCalculator;
43 helpString += "The summary.shared command parameters are shared, label, calc, distance, processors, subsample, iters and all. shared is required if there is no current sharedfile.\n";
44 helpString += "The summary.shared command should be in the following format: \n";
45 helpString += "summary.shared(label=yourLabel, calc=yourEstimators, groups=yourGroups).\n";
46 helpString += "Example summary.shared(label=unique-.01-.03, groups=B-C, calc=sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan).\n";
47 helpString += validCalculator.printCalc("sharedsummary");
48 helpString += "The iters parameter allows you to choose the number of times you would like to run the subsample.\n";
49 helpString += "The subsample parameter allows you to enter the size pergroup of the sample or you can set subsample=T and mothur will use the size of your smallest group.\n";
50 helpString += "The output parameter allows you to specify format of your distance matrix. Options are lt, and square. The default is lt.\n";
51 helpString += "The default value for calc is sharedsobs-sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan\n";
52 helpString += "The default value for groups is all the groups in your groupfile.\n";
53 helpString += "The distance parameter allows you to indicate you would like a distance file created for each calculator for each label, default=f.\n";
54 helpString += "The label parameter is used to analyze specific labels in your input.\n";
55 helpString += "The all parameter is used to specify if you want the estimate of all your groups together. This estimate can only be made for sharedsobs and sharedchao calculators. The default is false.\n";
56 helpString += "If you use sharedchao and run into memory issues, set all to false. \n";
57 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 2 valid groups.\n";
58 helpString += "Note: No spaces between parameter labels (i.e. label), '=' and parameters (i.e.yourLabel).\n";
62 m->errorOut(e, "SummarySharedCommand", "getHelpString");
66 //**********************************************************************************************************************
67 string SummarySharedCommand::getOutputPattern(string type) {
71 if (type == "summary") { pattern = "[filename],summary-[filename],[tag],summary"; }
72 else if (type == "phylip") { pattern = "[filename],[calc],[distance],[outputtag],[tag2],dist"; }
73 else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
78 m->errorOut(e, "SummarySharedCommand", "getOutputPattern");
82 //**********************************************************************************************************************
83 SummarySharedCommand::SummarySharedCommand(){
85 abort = true; calledHelp = true;
87 vector<string> tempOutNames;
88 outputTypes["summary"] = tempOutNames;
89 outputTypes["phylip"] = tempOutNames;
92 m->errorOut(e, "SummarySharedCommand", "SummarySharedCommand");
96 //**********************************************************************************************************************
98 SummarySharedCommand::SummarySharedCommand(string option) {
100 abort = false; calledHelp = false;
103 //allow user to run help
104 if(option == "help") { help(); abort = true; calledHelp = true; }
105 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
108 vector<string> myArray = setParameters();
110 OptionParser parser(option);
111 map<string, string> parameters = parser.getParameters();
112 map<string, string>::iterator it;
114 ValidParameters validParameter;
116 //check to make sure all parameters are valid for command
117 for (map<string, string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
118 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
121 //initialize outputTypes
122 vector<string> tempOutNames;
123 outputTypes["summary"] = tempOutNames;
124 outputTypes["phylip"] = tempOutNames;
126 //if the user changes the input directory command factory will send this info to us in the output parameter
127 string inputDir = validParameter.validFile(parameters, "inputdir", false);
128 if (inputDir == "not found"){ inputDir = ""; }
131 it = parameters.find("shared");
132 //user has given a template file
133 if(it != parameters.end()){
134 path = m->hasPath(it->second);
135 //if the user has not given a path then, add inputdir. else leave path alone.
136 if (path == "") { parameters["shared"] = inputDir + it->second; }
141 sharedfile = validParameter.validFile(parameters, "shared", true);
142 if (sharedfile == "not open") { sharedfile = ""; abort = true; }
143 else if (sharedfile == "not found") {
144 //if there is a current shared file, use it
145 sharedfile = m->getSharedFile();
146 if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
147 else { m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; }
148 }else { m->setSharedFile(sharedfile); }
151 //if the user changes the output directory command factory will send this info to us in the output parameter
152 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = m->hasPath(sharedfile); }
155 //check for optional parameter and set defaults
156 // ...at some point should added some additional type checking...
157 label = validParameter.validFile(parameters, "label", false);
158 if (label == "not found") { label = ""; }
160 if(label != "all") { m->splitAtDash(label, labels); allLines = 0; }
161 else { allLines = 1; }
165 calc = validParameter.validFile(parameters, "calc", false);
166 if (calc == "not found") { calc = "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan"; }
168 if (calc == "default") { calc = "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan"; }
170 m->splitAtDash(calc, Estimators);
171 if (m->inUsersGroups("citation", Estimators)) {
172 ValidCalculators validCalc; validCalc.printCitations(Estimators);
173 //remove citation from list of calcs
174 for (int i = 0; i < Estimators.size(); i++) { if (Estimators[i] == "citation") { Estimators.erase(Estimators.begin()+i); break; } }
177 groups = validParameter.validFile(parameters, "groups", false);
178 if (groups == "not found") { groups = ""; }
180 m->splitAtDash(groups, Groups);
181 m->setGroups(Groups);
184 string temp = validParameter.validFile(parameters, "all", false); if (temp == "not found") { temp = "false"; }
185 all = m->isTrue(temp);
187 temp = validParameter.validFile(parameters, "distance", false); if (temp == "not found") { temp = "false"; }
188 createPhylip = m->isTrue(temp);
190 temp = validParameter.validFile(parameters, "iters", false); if (temp == "not found") { temp = "1000"; }
191 m->mothurConvert(temp, iters);
193 output = validParameter.validFile(parameters, "output", false); if(output == "not found"){ output = "lt"; }
194 if ((output != "lt") && (output != "square")) { m->mothurOut(output + " is not a valid output form. Options are lt and square. I will use lt."); m->mothurOutEndLine(); output = "lt"; }
196 temp = validParameter.validFile(parameters, "subsample", false); if (temp == "not found") { temp = "F"; }
197 if (m->isNumeric1(temp)) { m->mothurConvert(temp, subsampleSize); subsample = true; }
199 if (m->isTrue(temp)) { subsample = true; subsampleSize = -1; } //we will set it to smallest group later
200 else { subsample = false; }
203 if (subsample == false) { iters = 0; }
205 temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
206 m->setProcessors(temp);
207 m->mothurConvert(temp, processors);
209 if (abort == false) {
211 ValidCalculators validCalculator;
214 for (i=0; i<Estimators.size(); i++) {
215 if (validCalculator.isValidCalculator("sharedsummary", Estimators[i]) == true) {
216 if (Estimators[i] == "sharedsobs") {
217 sumCalculators.push_back(new SharedSobsCS());
218 }else if (Estimators[i] == "sharedchao") {
219 sumCalculators.push_back(new SharedChao1());
220 }else if (Estimators[i] == "sharedace") {
221 sumCalculators.push_back(new SharedAce());
222 }else if (Estimators[i] == "jabund") {
223 sumCalculators.push_back(new JAbund());
224 }else if (Estimators[i] == "sorabund") {
225 sumCalculators.push_back(new SorAbund());
226 }else if (Estimators[i] == "jclass") {
227 sumCalculators.push_back(new Jclass());
228 }else if (Estimators[i] == "sorclass") {
229 sumCalculators.push_back(new SorClass());
230 }else if (Estimators[i] == "jest") {
231 sumCalculators.push_back(new Jest());
232 }else if (Estimators[i] == "sorest") {
233 sumCalculators.push_back(new SorEst());
234 }else if (Estimators[i] == "thetayc") {
235 sumCalculators.push_back(new ThetaYC());
236 }else if (Estimators[i] == "thetan") {
237 sumCalculators.push_back(new ThetaN());
238 }else if (Estimators[i] == "kstest") {
239 sumCalculators.push_back(new KSTest());
240 }else if (Estimators[i] == "sharednseqs") {
241 sumCalculators.push_back(new SharedNSeqs());
242 }else if (Estimators[i] == "ochiai") {
243 sumCalculators.push_back(new Ochiai());
244 }else if (Estimators[i] == "anderberg") {
245 sumCalculators.push_back(new Anderberg());
246 }else if (Estimators[i] == "kulczynski") {
247 sumCalculators.push_back(new Kulczynski());
248 }else if (Estimators[i] == "kulczynskicody") {
249 sumCalculators.push_back(new KulczynskiCody());
250 }else if (Estimators[i] == "lennon") {
251 sumCalculators.push_back(new Lennon());
252 }else if (Estimators[i] == "morisitahorn") {
253 sumCalculators.push_back(new MorHorn());
254 }else if (Estimators[i] == "braycurtis") {
255 sumCalculators.push_back(new BrayCurtis());
256 }else if (Estimators[i] == "whittaker") {
257 sumCalculators.push_back(new Whittaker());
258 }else if (Estimators[i] == "odum") {
259 sumCalculators.push_back(new Odum());
260 }else if (Estimators[i] == "canberra") {
261 sumCalculators.push_back(new Canberra());
262 }else if (Estimators[i] == "structeuclidean") {
263 sumCalculators.push_back(new StructEuclidean());
264 }else if (Estimators[i] == "structchord") {
265 sumCalculators.push_back(new StructChord());
266 }else if (Estimators[i] == "hellinger") {
267 sumCalculators.push_back(new Hellinger());
268 }else if (Estimators[i] == "manhattan") {
269 sumCalculators.push_back(new Manhattan());
270 }else if (Estimators[i] == "structpearson") {
271 sumCalculators.push_back(new StructPearson());
272 }else if (Estimators[i] == "soergel") {
273 sumCalculators.push_back(new Soergel());
274 }else if (Estimators[i] == "spearman") {
275 sumCalculators.push_back(new Spearman());
276 }else if (Estimators[i] == "structkulczynski") {
277 sumCalculators.push_back(new StructKulczynski());
278 }else if (Estimators[i] == "speciesprofile") {
279 sumCalculators.push_back(new SpeciesProfile());
280 }else if (Estimators[i] == "hamming") {
281 sumCalculators.push_back(new Hamming());
282 }else if (Estimators[i] == "structchi2") {
283 sumCalculators.push_back(new StructChi2());
284 }else if (Estimators[i] == "gower") {
285 sumCalculators.push_back(new Gower());
286 }else if (Estimators[i] == "memchi2") {
287 sumCalculators.push_back(new MemChi2());
288 }else if (Estimators[i] == "memchord") {
289 sumCalculators.push_back(new MemChord());
290 }else if (Estimators[i] == "memeuclidean") {
291 sumCalculators.push_back(new MemEuclidean());
292 }else if (Estimators[i] == "mempearson") {
293 sumCalculators.push_back(new MemPearson());
302 catch(exception& e) {
303 m->errorOut(e, "SummarySharedCommand", "SummarySharedCommand");
307 //**********************************************************************************************************************
309 int SummarySharedCommand::execute(){
312 if (abort == true) { if (calledHelp) { return 0; } return 2; }
314 ofstream outputFileHandle, outAll;
315 map<string, string> variables;
316 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
317 string outputFileName = getOutputFileName("summary",variables);
319 //if the users entered no valid calculators don't execute command
320 if (sumCalculators.size() == 0) { return 0; }
321 //check if any calcs can do multiples
324 for (int i = 0; i < sumCalculators.size(); i++) {
325 if (sumCalculators[i]->getMultiple() == true) { mult = true; }
330 input = new InputData(sharedfile, "sharedfile");
331 lookup = input->getSharedRAbundVectors();
332 string lastLabel = lookup[0]->getLabel();
334 /******************************************************/
335 //output headings for files
336 /******************************************************/
337 //output estimator names as column headers
338 m->openOutputFile(outputFileName, outputFileHandle);
339 outputFileHandle << "label" <<'\t' << "comparison" << '\t';
340 for(int i=0;i<sumCalculators.size();i++){
341 outputFileHandle << '\t' << sumCalculators[i]->getName();
342 if (sumCalculators[i]->getCols() == 3) { outputFileHandle << "\t" << sumCalculators[i]->getName() << "_lci\t" << sumCalculators[i]->getName() << "_hci"; }
344 outputFileHandle << endl;
345 outputFileHandle.close();
347 //create file and put column headers for multiple groups file
348 variables["[tag]"]= "multiple";
349 string outAllFileName = getOutputFileName("summary",variables);
351 m->openOutputFile(outAllFileName, outAll);
352 outputNames.push_back(outAllFileName);
354 outAll << "label" <<'\t' << "comparison" << '\t';
355 for(int i=0;i<sumCalculators.size();i++){
356 if (sumCalculators[i]->getMultiple() == true) {
357 outAll << '\t' << sumCalculators[i]->getName();
364 if (lookup.size() < 2) {
365 m->mothurOut("I cannot run the command without at least 2 valid groups.");
366 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
368 //close files and clean up
369 m->mothurRemove(outputFileName);
370 if (mult == true) { m->mothurRemove(outAllFileName); }
372 //if you only have 2 groups you don't need a .sharedmultiple file
373 }else if ((lookup.size() == 2) && (mult == true)) {
375 m->mothurRemove(outAllFileName);
376 outputNames.pop_back();
379 if (m->control_pressed) {
380 if (mult) { m->mothurRemove(outAllFileName); }
381 m->mothurRemove(outputFileName);
383 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
384 for(int i=0;i<sumCalculators.size();i++){ delete sumCalculators[i]; }
388 /******************************************************/
390 if (subsampleSize == -1) { //user has not set size, set size = smallest samples size
391 subsampleSize = lookup[0]->getNumSeqs();
392 for (int i = 1; i < lookup.size(); i++) {
393 int thisSize = lookup[i]->getNumSeqs();
395 if (thisSize < subsampleSize) { subsampleSize = thisSize; }
400 vector<SharedRAbundVector*> temp;
401 for (int i = 0; i < lookup.size(); i++) {
402 if (lookup[i]->getNumSeqs() < subsampleSize) {
403 m->mothurOut(lookup[i]->getGroup() + " contains " + toString(lookup[i]->getNumSeqs()) + ". Eliminating."); m->mothurOutEndLine();
406 Groups.push_back(lookup[i]->getGroup());
407 temp.push_back(lookup[i]);
411 m->setGroups(Groups);
414 if (lookup.size() < 2) { m->mothurOut("You have not provided enough valid groups. I cannot run the command."); m->mothurOutEndLine(); m->control_pressed = true; delete input; return 0; }
418 /******************************************************/
419 //comparison breakup to be used by different processes later
420 numGroups = lookup.size();
421 lines.resize(processors);
422 for (int i = 0; i < processors; i++) {
423 lines[i].start = int (sqrt(float(i)/float(processors)) * numGroups);
424 lines[i].end = int (sqrt(float(i+1)/float(processors)) * numGroups);
426 /******************************************************/
428 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
429 set<string> processedLabels;
430 set<string> userLabels = labels;
432 //as long as you are not at the end of the file or done wih the lines you want
433 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
434 if (m->control_pressed) {
435 if (mult) { m->mothurRemove(outAllFileName); }
436 m->mothurRemove(outputFileName);
438 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
439 for(int i=0;i<sumCalculators.size();i++){ delete sumCalculators[i]; }
445 if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){
446 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
447 process(lookup, outputFileName, outAllFileName);
449 processedLabels.insert(lookup[0]->getLabel());
450 userLabels.erase(lookup[0]->getLabel());
453 if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
454 string saveLabel = lookup[0]->getLabel();
456 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
457 lookup = input->getSharedRAbundVectors(lastLabel);
459 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
460 process(lookup, outputFileName, outAllFileName);
462 processedLabels.insert(lookup[0]->getLabel());
463 userLabels.erase(lookup[0]->getLabel());
465 //restore real lastlabel to save below
466 lookup[0]->setLabel(saveLabel);
469 lastLabel = lookup[0]->getLabel();
471 //get next line to process
472 //prevent memory leak
473 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
474 lookup = input->getSharedRAbundVectors();
477 if (m->control_pressed) {
478 if (mult) { m->mothurRemove(outAllFileName); }
479 m->mothurRemove(outputFileName);
481 for(int i=0;i<sumCalculators.size();i++){ delete sumCalculators[i]; }
486 //output error messages about any remaining user labels
487 set<string>::iterator it;
488 bool needToRun = false;
489 for (it = userLabels.begin(); it != userLabels.end(); it++) {
490 m->mothurOut("Your file does not include the label " + *it);
491 if (processedLabels.count(lastLabel) != 1) {
492 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
495 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
499 //run last label if you need to
500 if (needToRun == true) {
501 for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }
502 lookup = input->getSharedRAbundVectors(lastLabel);
504 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
505 process(lookup, outputFileName, outAllFileName);
506 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
510 //reset groups parameter
513 for(int i=0;i<sumCalculators.size();i++){ delete sumCalculators[i]; }
516 if (m->control_pressed) {
517 m->mothurRemove(outAllFileName);
518 m->mothurRemove(outputFileName);
522 m->mothurOutEndLine();
523 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
524 m->mothurOut(outputFileName); m->mothurOutEndLine();
525 if (mult) { m->mothurOut(outAllFileName); m->mothurOutEndLine(); outputTypes["summary"].push_back(outAllFileName); }
526 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } outputTypes["summary"].push_back(outputFileName);
527 m->mothurOutEndLine();
531 catch(exception& e) {
532 m->errorOut(e, "SummarySharedCommand", "execute");
536 /***********************************************************/
537 int SummarySharedCommand::printSims(ostream& out, vector< vector<double> >& simMatrix) {
540 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
543 out << simMatrix.size() << endl;
545 if (output == "lt") {
546 for (int b = 0; b < simMatrix.size(); b++) {
547 out << lookup[b]->getGroup() << '\t';
548 for (int n = 0; n < b; n++) {
549 if (m->control_pressed) { return 0; }
550 out << simMatrix[b][n] << '\t';
555 for (int b = 0; b < simMatrix.size(); m++) {
556 out << lookup[b]->getGroup() << '\t';
557 for (int n = 0; n < simMatrix[b].size(); n++) {
558 if (m->control_pressed) { return 0; }
559 out << simMatrix[b][n] << '\t';
567 catch(exception& e) {
568 m->errorOut(e, "SummarySharedCommand", "printSims");
572 /***********************************************************/
573 int SummarySharedCommand::process(vector<SharedRAbundVector*> thisLookup, string sumFileName, string sumAllFileName) {
575 vector< vector< vector<seqDist> > > calcDistsTotals; //each iter, one for each calc, then each groupCombos dists. this will be used to make .dist files
576 vector< vector<seqDist> > calcDists; calcDists.resize(sumCalculators.size());
578 for (int thisIter = 0; thisIter < iters+1; thisIter++) {
580 vector<SharedRAbundVector*> thisItersLookup = thisLookup;
582 if (subsample && (thisIter != 0)) { //we want the summary results for the whole dataset, then the subsampling
584 vector<string> tempLabels; //dont need since we arent printing the sampled sharedRabunds
586 //make copy of lookup so we don't get access violations
587 vector<SharedRAbundVector*> newLookup;
588 for (int k = 0; k < thisItersLookup.size(); k++) {
589 SharedRAbundVector* temp = new SharedRAbundVector();
590 temp->setLabel(thisItersLookup[k]->getLabel());
591 temp->setGroup(thisItersLookup[k]->getGroup());
592 newLookup.push_back(temp);
596 for (int k = 0; k < thisItersLookup[0]->getNumBins(); k++) {
597 if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } return 0; }
598 for (int j = 0; j < thisItersLookup.size(); j++) { newLookup[j]->push_back(thisItersLookup[j]->getAbundance(k), thisItersLookup[j]->getGroup()); }
601 tempLabels = sample.getSample(newLookup, subsampleSize);
602 thisItersLookup = newLookup;
607 driver(thisLookup, 0, numGroups, sumFileName+".temp", sumAllFileName+".temp", calcDists);
608 m->appendFiles((sumFileName + ".temp"), sumFileName);
609 m->mothurRemove((sumFileName + ".temp"));
611 m->appendFiles((sumAllFileName + ".temp"), sumAllFileName);
612 m->mothurRemove((sumAllFileName + ".temp"));
617 vector<int> processIDS;
619 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
620 //loop through and create all the processes you want
621 while (process != processors) {
625 processIDS.push_back(pid);
628 driver(thisLookup, lines[process].start, lines[process].end, sumFileName + toString(getpid()) + ".temp", sumAllFileName + toString(getpid()) + ".temp", calcDists);
630 //only do this if you want a distance file
632 string tempdistFileName = m->getRootName(m->getSimpleName(sumFileName)) + toString(getpid()) + ".dist";
634 m->openOutputFile(tempdistFileName, outtemp);
636 for (int i = 0; i < calcDists.size(); i++) {
637 outtemp << calcDists[i].size() << endl;
639 for (int j = 0; j < calcDists[i].size(); j++) {
640 outtemp << calcDists[i][j].seq1 << '\t' << calcDists[i][j].seq2 << '\t' << calcDists[i][j].dist << endl;
648 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
649 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
654 //parent do your part
655 driver(thisLookup, lines[0].start, lines[0].end, sumFileName + toString(getpid()) + ".temp", sumAllFileName + toString(getpid()) + ".temp", calcDists);
656 m->appendFiles((sumFileName + toString(getpid()) + ".temp"), sumFileName);
657 m->mothurRemove((sumFileName + toString(getpid()) + ".temp"));
658 if (mult) { m->appendFiles((sumAllFileName + toString(getpid()) + ".temp"), sumAllFileName); }
660 //force parent to wait until all the processes are done
661 for (int i = 0; i < processIDS.size(); i++) {
662 int temp = processIDS[i];
666 for (int i = 0; i < processIDS.size(); i++) {
667 m->appendFiles((sumFileName + toString(processIDS[i]) + ".temp"), sumFileName);
668 m->mothurRemove((sumFileName + toString(processIDS[i]) + ".temp"));
669 if (mult) { m->mothurRemove((sumAllFileName + toString(processIDS[i]) + ".temp")); }
672 string tempdistFileName = m->getRootName(m->getSimpleName(sumFileName)) + toString(processIDS[i]) + ".dist";
674 m->openInputFile(tempdistFileName, intemp);
676 for (int k = 0; k < calcDists.size(); k++) {
678 intemp >> size; m->gobble(intemp);
680 for (int j = 0; j < size; j++) {
685 intemp >> seq1 >> seq2 >> dist; m->gobble(intemp);
687 seqDist tempDist(seq1, seq2, dist);
688 calcDists[k].push_back(tempDist);
692 m->mothurRemove(tempdistFileName);
696 //////////////////////////////////////////////////////////////////////////////////////////////////////
697 //Windows version shared memory, so be careful when passing variables through the summarySharedData struct.
698 //Above fork() will clone, so memory is separate, but that's not the case with windows,
699 //Taking advantage of shared memory to pass results vectors.
700 //////////////////////////////////////////////////////////////////////////////////////////////////////
702 vector<summarySharedData*> pDataArray;
703 DWORD dwThreadIdArray[processors-1];
704 HANDLE hThreadArray[processors-1];
706 //Create processor worker threads.
707 for( int i=1; i<processors; i++ ){
709 //make copy of lookup so we don't get access violations
710 vector<SharedRAbundVector*> newLookup;
711 for (int k = 0; k < thisLookup.size(); k++) {
712 SharedRAbundVector* temp = new SharedRAbundVector();
713 temp->setLabel(thisLookup[k]->getLabel());
714 temp->setGroup(thisLookup[k]->getGroup());
715 newLookup.push_back(temp);
719 for (int k = 0; k < thisLookup[0]->getNumBins(); k++) {
720 if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } return 0; }
721 for (int j = 0; j < thisLookup.size(); j++) { newLookup[j]->push_back(thisLookup[j]->getAbundance(k), thisLookup[j]->getGroup()); }
724 // Allocate memory for thread data.
725 summarySharedData* tempSum = new summarySharedData((sumFileName+toString(i)+".temp"), m, lines[i].start, lines[i].end, Estimators, newLookup);
726 pDataArray.push_back(tempSum);
727 processIDS.push_back(i);
729 hThreadArray[i-1] = CreateThread(NULL, 0, MySummarySharedThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);
732 //parent do your part
733 driver(thisLookup, lines[0].start, lines[0].end, sumFileName +"0.temp", sumAllFileName + "0.temp", calcDists);
734 m->appendFiles((sumFileName + "0.temp"), sumFileName);
735 m->mothurRemove((sumFileName + "0.temp"));
736 if (mult) { m->appendFiles((sumAllFileName + "0.temp"), sumAllFileName); }
738 //Wait until all threads have terminated.
739 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
741 //Close all thread handles and free memory allocations.
742 for(int i=0; i < pDataArray.size(); i++){
743 m->appendFiles((sumFileName + toString(processIDS[i]) + ".temp"), sumFileName);
744 m->mothurRemove((sumFileName + toString(processIDS[i]) + ".temp"));
746 for (int j = 0; j < pDataArray[i]->thisLookup.size(); j++) { delete pDataArray[i]->thisLookup[j]; }
749 for (int k = 0; k < calcDists.size(); k++) {
750 int size = pDataArray[i]->calcDists[k].size();
751 for (int j = 0; j < size; j++) { calcDists[k].push_back(pDataArray[i]->calcDists[k][j]); }
755 CloseHandle(hThreadArray[i]);
756 delete pDataArray[i];
762 if (subsample && (thisIter != 0)) { //we want the summary results for the whole dataset, then the subsampling
764 calcDistsTotals.push_back(calcDists);
766 for (int i = 0; i < thisItersLookup.size(); i++) { delete thisItersLookup[i]; }
767 thisItersLookup.clear();
770 for (int i = 0; i < calcDists.size(); i++) {
771 if (m->control_pressed) { break; }
774 vector< vector<double> > matrix; //square matrix to represent the distance
775 matrix.resize(thisLookup.size());
776 for (int k = 0; k < thisLookup.size(); k++) { matrix[k].resize(thisLookup.size(), 0.0); }
778 for (int j = 0; j < calcDists[i].size(); j++) {
779 int row = calcDists[i][j].seq1;
780 int column = calcDists[i][j].seq2;
781 double dist = calcDists[i][j].dist;
783 matrix[row][column] = dist;
784 matrix[column][row] = dist;
787 map<string, string> variables;
788 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
789 variables["[calc]"] = sumCalculators[i]->getName();
790 variables["[distance]"] = thisLookup[0]->getLabel();
791 variables["[outputtag]"] = output;
792 string distFileName = getOutputFileName("phylip",variables);
793 outputNames.push_back(distFileName); outputTypes["phylip"].push_back(distFileName);
795 m->openOutputFile(distFileName, outDist);
796 outDist.setf(ios::fixed, ios::floatfield); outDist.setf(ios::showpoint);
798 printSims(outDist, matrix);
804 for (int i = 0; i < calcDists.size(); i++) { calcDists[i].clear(); }
808 //we need to find the average distance and standard deviation for each groups distance
810 vector< vector<seqDist> > calcAverages; calcAverages.resize(sumCalculators.size());
811 for (int i = 0; i < calcAverages.size(); i++) { //initialize sums to zero.
812 calcAverages[i].resize(calcDistsTotals[0][i].size());
814 for (int j = 0; j < calcAverages[i].size(); j++) {
815 calcAverages[i][j].seq1 = calcDists[i][j].seq1;
816 calcAverages[i][j].seq2 = calcDists[i][j].seq2;
817 calcAverages[i][j].dist = 0.0;
821 for (int thisIter = 0; thisIter < iters; thisIter++) { //sum all groups dists for each calculator
822 for (int i = 0; i < calcAverages.size(); i++) { //initialize sums to zero.
823 for (int j = 0; j < calcAverages[i].size(); j++) {
824 calcAverages[i][j].dist += calcDistsTotals[thisIter][i][j].dist;
829 for (int i = 0; i < calcAverages.size(); i++) { //finds average.
830 for (int j = 0; j < calcAverages[i].size(); j++) {
831 calcAverages[i][j].dist /= (float) iters;
835 //find standard deviation
836 vector< vector<seqDist> > stdDev; stdDev.resize(sumCalculators.size());
837 for (int i = 0; i < stdDev.size(); i++) { //initialize sums to zero.
838 stdDev[i].resize(calcDistsTotals[0][i].size());
840 for (int j = 0; j < stdDev[i].size(); j++) {
841 stdDev[i][j].seq1 = calcDists[i][j].seq1;
842 stdDev[i][j].seq2 = calcDists[i][j].seq2;
843 stdDev[i][j].dist = 0.0;
847 for (int thisIter = 0; thisIter < iters; thisIter++) { //compute the difference of each dist from the mean, and square the result of each
848 for (int i = 0; i < stdDev.size(); i++) {
849 for (int j = 0; j < stdDev[i].size(); j++) {
850 stdDev[i][j].dist += ((calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist) * (calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist));
855 for (int i = 0; i < stdDev.size(); i++) { //finds average.
856 for (int j = 0; j < stdDev[i].size(); j++) {
857 stdDev[i][j].dist /= (float) iters;
858 stdDev[i][j].dist = sqrt(stdDev[i][j].dist);
863 for (int i = 0; i < calcDists.size(); i++) {
864 vector< vector<double> > matrix; //square matrix to represent the distance
865 matrix.resize(thisLookup.size());
866 for (int k = 0; k < thisLookup.size(); k++) { matrix[k].resize(thisLookup.size(), 0.0); }
868 vector< vector<double> > stdmatrix; //square matrix to represent the stdDev
869 stdmatrix.resize(thisLookup.size());
870 for (int k = 0; k < thisLookup.size(); k++) { stdmatrix[k].resize(thisLookup.size(), 0.0); }
873 for (int j = 0; j < calcAverages[i].size(); j++) {
874 int row = calcAverages[i][j].seq1;
875 int column = calcAverages[i][j].seq2;
876 float dist = calcAverages[i][j].dist;
877 float stdDist = stdDev[i][j].dist;
879 matrix[row][column] = dist;
880 matrix[column][row] = dist;
881 stdmatrix[row][column] = stdDist;
882 stdmatrix[column][row] = stdDist;
885 map<string, string> variables;
886 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
887 variables["[calc]"] = sumCalculators[i]->getName();
888 variables["[distance]"] = thisLookup[0]->getLabel();
889 variables["[outputtag]"] = output;
890 variables["[tag2]"] = "ave";
891 string distFileName = getOutputFileName("phylip",variables);
892 outputNames.push_back(distFileName); outputTypes["phylip"].push_back(distFileName);
894 m->openOutputFile(distFileName, outAve);
895 outAve.setf(ios::fixed, ios::floatfield); outAve.setf(ios::showpoint);
897 printSims(outAve, matrix);
901 variables["[tag2]"] = "std";
902 distFileName = getOutputFileName("phylip",variables);
903 outputNames.push_back(distFileName); outputTypes["phylip"].push_back(distFileName);
905 m->openOutputFile(distFileName, outSTD);
906 outSTD.setf(ios::fixed, ios::floatfield); outSTD.setf(ios::showpoint);
908 printSims(outSTD, stdmatrix);
917 catch(exception& e) {
918 m->errorOut(e, "SummarySharedCommand", "process");
922 /**************************************************************************************************/
923 int SummarySharedCommand::driver(vector<SharedRAbundVector*> thisLookup, int start, int end, string sumFile, string sumAllFile, vector< vector<seqDist> >& calcDists) {
926 //loop through calculators and add to file all for all calcs that can do mutiple groups
929 m->openOutputFile(sumAllFile, outAll);
932 outAll << thisLookup[0]->getLabel() << '\t';
934 //output groups names
935 string outNames = "";
936 for (int j = 0; j < thisLookup.size(); j++) {
937 outNames += thisLookup[j]->getGroup() + "-";
939 outNames = outNames.substr(0, outNames.length()-1); //rip off extra '-';
940 outAll << outNames << '\t';
942 for(int i=0;i<sumCalculators.size();i++){
943 if (sumCalculators[i]->getMultiple() == true) {
944 sumCalculators[i]->getValues(thisLookup);
946 if (m->control_pressed) { outAll.close(); return 1; }
949 sumCalculators[i]->print(outAll);
956 ofstream outputFileHandle;
957 m->openOutputFile(sumFile, outputFileHandle);
959 vector<SharedRAbundVector*> subset;
960 for (int k = start; k < end; k++) { // pass cdd each set of groups to compare
962 for (int l = 0; l < k; l++) {
964 outputFileHandle << thisLookup[0]->getLabel() << '\t';
966 subset.clear(); //clear out old pair of sharedrabunds
967 //add new pair of sharedrabunds
968 subset.push_back(thisLookup[k]); subset.push_back(thisLookup[l]);
970 //sort groups to be alphanumeric
971 if (thisLookup[k]->getGroup() > thisLookup[l]->getGroup()) {
972 outputFileHandle << (thisLookup[l]->getGroup() +'\t' + thisLookup[k]->getGroup()) << '\t'; //print out groups
974 outputFileHandle << (thisLookup[k]->getGroup() +'\t' + thisLookup[l]->getGroup()) << '\t'; //print out groups
977 for(int i=0;i<sumCalculators.size();i++) {
979 //if this calc needs all groups to calculate the pair load all groups
980 if (sumCalculators[i]->getNeedsAll()) {
981 //load subset with rest of lookup for those calcs that need everyone to calc for a pair
982 for (int w = 0; w < thisLookup.size(); w++) {
983 if ((w != k) && (w != l)) { subset.push_back(thisLookup[w]); }
987 vector<double> tempdata = sumCalculators[i]->getValues(subset); //saves the calculator outputs
989 if (m->control_pressed) { outputFileHandle.close(); return 1; }
991 outputFileHandle << '\t';
992 sumCalculators[i]->print(outputFileHandle);
994 seqDist temp(l, k, tempdata[0]);
995 calcDists[i].push_back(temp);
997 outputFileHandle << endl;
1001 outputFileHandle.close();
1005 catch(exception& e) {
1006 m->errorOut(e, "SummarySharedCommand", "driver");
1010 /**************************************************************************************************/