2 * summarysharedcommand.cpp
5 * Created by Sarah Westcott on 1/2/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "summarysharedcommand.h"
11 #include "subsample.h"
13 //**********************************************************************************************************************
14 vector<string> SummarySharedCommand::setParameters(){
16 CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pshared);
17 CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
18 CommandParameter psubsample("subsample", "String", "", "", "", "", "",false,false); parameters.push_back(psubsample);
19 CommandParameter pdistance("distance", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pdistance);
20 CommandParameter pcalc("calc", "Multiple", "sharedchao-sharedsobs-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan-kstest-whittaker-sharednseqs-ochiai-anderberg-kulczynski-kulczynskicody-lennon-morisitahorn-braycurtis-odum-canberra-structeuclidean-structchord-hellinger-manhattan-structpearson-soergel-spearman-structkulczynski-speciesprofile-structchi2-hamming-gower-memchi2-memchord-memeuclidean-mempearson", "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan", "", "", "",true,false); parameters.push_back(pcalc);
21 CommandParameter poutput("output", "Multiple", "lt-square", "lt", "", "", "",false,false); parameters.push_back(poutput);
22 CommandParameter pall("all", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pall);
23 CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
24 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
25 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
26 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
27 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
29 vector<string> myArray;
30 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
34 m->errorOut(e, "SummarySharedCommand", "setParameters");
38 //**********************************************************************************************************************
39 string SummarySharedCommand::getHelpString(){
41 string helpString = "";
42 ValidCalculators validCalculator;
43 helpString += "The summary.shared command parameters are shared, label, calc, distance, processors, subsample, iters and all. shared is required if there is no current sharedfile.\n";
44 helpString += "The summary.shared command should be in the following format: \n";
45 helpString += "summary.shared(label=yourLabel, calc=yourEstimators, groups=yourGroups).\n";
46 helpString += "Example summary.shared(label=unique-.01-.03, groups=B-C, calc=sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan).\n";
47 helpString += validCalculator.printCalc("sharedsummary");
48 helpString += "The iters parameter allows you to choose the number of times you would like to run the subsample.\n";
49 helpString += "The subsample parameter allows you to enter the size pergroup of the sample or you can set subsample=T and mothur will use the size of your smallest group.\n";
50 helpString += "The output parameter allows you to specify format of your distance matrix. Options are lt, and square. The default is lt.\n";
51 helpString += "The default value for calc is sharedsobs-sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan\n";
52 helpString += "The default value for groups is all the groups in your groupfile.\n";
53 helpString += "The distance parameter allows you to indicate you would like a distance file created for each calculator for each label, default=f.\n";
54 helpString += "The label parameter is used to analyze specific labels in your input.\n";
55 helpString += "The all parameter is used to specify if you want the estimate of all your groups together. This estimate can only be made for sharedsobs and sharedchao calculators. The default is false.\n";
56 helpString += "If you use sharedchao and run into memory issues, set all to false. \n";
57 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 2 valid groups.\n";
58 helpString += "Note: No spaces between parameter labels (i.e. label), '=' and parameters (i.e.yourLabel).\n";
62 m->errorOut(e, "SummarySharedCommand", "getHelpString");
66 //**********************************************************************************************************************
67 SummarySharedCommand::SummarySharedCommand(){
69 abort = true; calledHelp = true;
71 vector<string> tempOutNames;
72 outputTypes["summary"] = tempOutNames;
75 m->errorOut(e, "SummarySharedCommand", "SummarySharedCommand");
79 //**********************************************************************************************************************
81 SummarySharedCommand::SummarySharedCommand(string option) {
83 abort = false; calledHelp = false;
86 //allow user to run help
87 if(option == "help") { help(); abort = true; calledHelp = true; }
88 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
91 vector<string> myArray = setParameters();
93 OptionParser parser(option);
94 map<string, string> parameters = parser.getParameters();
95 map<string, string>::iterator it;
97 ValidParameters validParameter;
99 //check to make sure all parameters are valid for command
100 for (map<string, string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
101 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
104 //initialize outputTypes
105 vector<string> tempOutNames;
106 outputTypes["summary"] = tempOutNames;
108 //if the user changes the input directory command factory will send this info to us in the output parameter
109 string inputDir = validParameter.validFile(parameters, "inputdir", false);
110 if (inputDir == "not found"){ inputDir = ""; }
113 it = parameters.find("shared");
114 //user has given a template file
115 if(it != parameters.end()){
116 path = m->hasPath(it->second);
117 //if the user has not given a path then, add inputdir. else leave path alone.
118 if (path == "") { parameters["shared"] = inputDir + it->second; }
123 sharedfile = validParameter.validFile(parameters, "shared", true);
124 if (sharedfile == "not open") { sharedfile = ""; abort = true; }
125 else if (sharedfile == "not found") {
126 //if there is a current shared file, use it
127 sharedfile = m->getSharedFile();
128 if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
129 else { m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; }
130 }else { m->setSharedFile(sharedfile); }
133 //if the user changes the output directory command factory will send this info to us in the output parameter
134 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = m->hasPath(sharedfile); }
137 //check for optional parameter and set defaults
138 // ...at some point should added some additional type checking...
139 label = validParameter.validFile(parameters, "label", false);
140 if (label == "not found") { label = ""; }
142 if(label != "all") { m->splitAtDash(label, labels); allLines = 0; }
143 else { allLines = 1; }
147 calc = validParameter.validFile(parameters, "calc", false);
148 if (calc == "not found") { calc = "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan"; }
150 if (calc == "default") { calc = "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan"; }
152 m->splitAtDash(calc, Estimators);
153 if (m->inUsersGroups("citation", Estimators)) {
154 ValidCalculators validCalc; validCalc.printCitations(Estimators);
155 //remove citation from list of calcs
156 for (int i = 0; i < Estimators.size(); i++) { if (Estimators[i] == "citation") { Estimators.erase(Estimators.begin()+i); break; } }
159 groups = validParameter.validFile(parameters, "groups", false);
160 if (groups == "not found") { groups = ""; }
162 m->splitAtDash(groups, Groups);
163 m->setGroups(Groups);
166 string temp = validParameter.validFile(parameters, "all", false); if (temp == "not found") { temp = "false"; }
167 all = m->isTrue(temp);
169 temp = validParameter.validFile(parameters, "distance", false); if (temp == "not found") { temp = "false"; }
170 createPhylip = m->isTrue(temp);
172 temp = validParameter.validFile(parameters, "iters", false); if (temp == "not found") { temp = "1000"; }
173 m->mothurConvert(temp, iters);
175 output = validParameter.validFile(parameters, "output", false); if(output == "not found"){ output = "lt"; }
176 if ((output != "lt") && (output != "square")) { m->mothurOut(output + " is not a valid output form. Options are lt and square. I will use lt."); m->mothurOutEndLine(); output = "lt"; }
178 temp = validParameter.validFile(parameters, "subsample", false); if (temp == "not found") { temp = "F"; }
179 if (m->isNumeric1(temp)) { m->mothurConvert(temp, subsampleSize); subsample = true; }
181 if (m->isTrue(temp)) { subsample = true; subsampleSize = -1; } //we will set it to smallest group later
182 else { subsample = false; }
185 if (subsample == false) { iters = 1; }
187 temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
188 m->setProcessors(temp);
189 m->mothurConvert(temp, processors);
191 if (abort == false) {
193 ValidCalculators validCalculator;
196 for (i=0; i<Estimators.size(); i++) {
197 if (validCalculator.isValidCalculator("sharedsummary", Estimators[i]) == true) {
198 if (Estimators[i] == "sharedsobs") {
199 sumCalculators.push_back(new SharedSobsCS());
200 }else if (Estimators[i] == "sharedchao") {
201 sumCalculators.push_back(new SharedChao1());
202 }else if (Estimators[i] == "sharedace") {
203 sumCalculators.push_back(new SharedAce());
204 }else if (Estimators[i] == "jabund") {
205 sumCalculators.push_back(new JAbund());
206 }else if (Estimators[i] == "sorabund") {
207 sumCalculators.push_back(new SorAbund());
208 }else if (Estimators[i] == "jclass") {
209 sumCalculators.push_back(new Jclass());
210 }else if (Estimators[i] == "sorclass") {
211 sumCalculators.push_back(new SorClass());
212 }else if (Estimators[i] == "jest") {
213 sumCalculators.push_back(new Jest());
214 }else if (Estimators[i] == "sorest") {
215 sumCalculators.push_back(new SorEst());
216 }else if (Estimators[i] == "thetayc") {
217 sumCalculators.push_back(new ThetaYC());
218 }else if (Estimators[i] == "thetan") {
219 sumCalculators.push_back(new ThetaN());
220 }else if (Estimators[i] == "kstest") {
221 sumCalculators.push_back(new KSTest());
222 }else if (Estimators[i] == "sharednseqs") {
223 sumCalculators.push_back(new SharedNSeqs());
224 }else if (Estimators[i] == "ochiai") {
225 sumCalculators.push_back(new Ochiai());
226 }else if (Estimators[i] == "anderberg") {
227 sumCalculators.push_back(new Anderberg());
228 }else if (Estimators[i] == "kulczynski") {
229 sumCalculators.push_back(new Kulczynski());
230 }else if (Estimators[i] == "kulczynskicody") {
231 sumCalculators.push_back(new KulczynskiCody());
232 }else if (Estimators[i] == "lennon") {
233 sumCalculators.push_back(new Lennon());
234 }else if (Estimators[i] == "morisitahorn") {
235 sumCalculators.push_back(new MorHorn());
236 }else if (Estimators[i] == "braycurtis") {
237 sumCalculators.push_back(new BrayCurtis());
238 }else if (Estimators[i] == "whittaker") {
239 sumCalculators.push_back(new Whittaker());
240 }else if (Estimators[i] == "odum") {
241 sumCalculators.push_back(new Odum());
242 }else if (Estimators[i] == "canberra") {
243 sumCalculators.push_back(new Canberra());
244 }else if (Estimators[i] == "structeuclidean") {
245 sumCalculators.push_back(new StructEuclidean());
246 }else if (Estimators[i] == "structchord") {
247 sumCalculators.push_back(new StructChord());
248 }else if (Estimators[i] == "hellinger") {
249 sumCalculators.push_back(new Hellinger());
250 }else if (Estimators[i] == "manhattan") {
251 sumCalculators.push_back(new Manhattan());
252 }else if (Estimators[i] == "structpearson") {
253 sumCalculators.push_back(new StructPearson());
254 }else if (Estimators[i] == "soergel") {
255 sumCalculators.push_back(new Soergel());
256 }else if (Estimators[i] == "spearman") {
257 sumCalculators.push_back(new Spearman());
258 }else if (Estimators[i] == "structkulczynski") {
259 sumCalculators.push_back(new StructKulczynski());
260 }else if (Estimators[i] == "speciesprofile") {
261 sumCalculators.push_back(new SpeciesProfile());
262 }else if (Estimators[i] == "hamming") {
263 sumCalculators.push_back(new Hamming());
264 }else if (Estimators[i] == "structchi2") {
265 sumCalculators.push_back(new StructChi2());
266 }else if (Estimators[i] == "gower") {
267 sumCalculators.push_back(new Gower());
268 }else if (Estimators[i] == "memchi2") {
269 sumCalculators.push_back(new MemChi2());
270 }else if (Estimators[i] == "memchord") {
271 sumCalculators.push_back(new MemChord());
272 }else if (Estimators[i] == "memeuclidean") {
273 sumCalculators.push_back(new MemEuclidean());
274 }else if (Estimators[i] == "mempearson") {
275 sumCalculators.push_back(new MemPearson());
284 catch(exception& e) {
285 m->errorOut(e, "SummarySharedCommand", "SummarySharedCommand");
289 //**********************************************************************************************************************
291 int SummarySharedCommand::execute(){
294 if (abort == true) { if (calledHelp) { return 0; } return 2; }
296 ofstream outputFileHandle, outAll;
297 string outputFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + "shared.summary";
299 //if the users entered no valid calculators don't execute command
300 if (sumCalculators.size() == 0) { return 0; }
301 //check if any calcs can do multiples
304 for (int i = 0; i < sumCalculators.size(); i++) {
305 if (sumCalculators[i]->getMultiple() == true) { mult = true; }
310 input = new InputData(sharedfile, "sharedfile");
311 lookup = input->getSharedRAbundVectors();
312 string lastLabel = lookup[0]->getLabel();
314 /******************************************************/
315 //output headings for files
316 /******************************************************/
317 //output estimator names as column headers
318 m->openOutputFile(outputFileName, outputFileHandle);
319 outputFileHandle << "label" <<'\t' << "comparison" << '\t';
320 for(int i=0;i<sumCalculators.size();i++){
321 outputFileHandle << '\t' << sumCalculators[i]->getName();
322 if (sumCalculators[i]->getCols() == 3) { outputFileHandle << "\t" << sumCalculators[i]->getName() << "_lci\t" << sumCalculators[i]->getName() << "_hci"; }
324 outputFileHandle << endl;
325 outputFileHandle.close();
327 //create file and put column headers for multiple groups file
328 string outAllFileName = ((m->getRootName(sharedfile)) + "sharedmultiple.summary");
330 m->openOutputFile(outAllFileName, outAll);
331 outputNames.push_back(outAllFileName);
333 outAll << "label" <<'\t' << "comparison" << '\t';
334 for(int i=0;i<sumCalculators.size();i++){
335 if (sumCalculators[i]->getMultiple() == true) {
336 outAll << '\t' << sumCalculators[i]->getName();
343 if (lookup.size() < 2) {
344 m->mothurOut("I cannot run the command without at least 2 valid groups.");
345 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
347 //close files and clean up
348 m->mothurRemove(outputFileName);
349 if (mult == true) { m->mothurRemove(outAllFileName); }
351 //if you only have 2 groups you don't need a .sharedmultiple file
352 }else if ((lookup.size() == 2) && (mult == true)) {
354 m->mothurRemove(outAllFileName);
355 outputNames.pop_back();
358 if (m->control_pressed) {
359 if (mult) { m->mothurRemove(outAllFileName); }
360 m->mothurRemove(outputFileName);
362 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
363 for(int i=0;i<sumCalculators.size();i++){ delete sumCalculators[i]; }
367 /******************************************************/
370 /******************************************************/
371 //comparison breakup to be used by different processes later
372 numGroups = m->getNumGroups();
373 lines.resize(processors);
374 for (int i = 0; i < processors; i++) {
375 lines[i].start = int (sqrt(float(i)/float(processors)) * numGroups);
376 lines[i].end = int (sqrt(float(i+1)/float(processors)) * numGroups);
378 /******************************************************/
380 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
381 set<string> processedLabels;
382 set<string> userLabels = labels;
384 //as long as you are not at the end of the file or done wih the lines you want
385 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
386 if (m->control_pressed) {
387 if (mult) { m->mothurRemove(outAllFileName); }
388 m->mothurRemove(outputFileName);
390 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
391 for(int i=0;i<sumCalculators.size();i++){ delete sumCalculators[i]; }
397 if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){
398 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
399 process(lookup, outputFileName, outAllFileName);
401 processedLabels.insert(lookup[0]->getLabel());
402 userLabels.erase(lookup[0]->getLabel());
405 if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
406 string saveLabel = lookup[0]->getLabel();
408 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
409 lookup = input->getSharedRAbundVectors(lastLabel);
411 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
412 process(lookup, outputFileName, outAllFileName);
414 processedLabels.insert(lookup[0]->getLabel());
415 userLabels.erase(lookup[0]->getLabel());
417 //restore real lastlabel to save below
418 lookup[0]->setLabel(saveLabel);
421 lastLabel = lookup[0]->getLabel();
423 //get next line to process
424 //prevent memory leak
425 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
426 lookup = input->getSharedRAbundVectors();
429 if (m->control_pressed) {
430 if (mult) { m->mothurRemove(outAllFileName); }
431 m->mothurRemove(outputFileName);
433 for(int i=0;i<sumCalculators.size();i++){ delete sumCalculators[i]; }
438 //output error messages about any remaining user labels
439 set<string>::iterator it;
440 bool needToRun = false;
441 for (it = userLabels.begin(); it != userLabels.end(); it++) {
442 m->mothurOut("Your file does not include the label " + *it);
443 if (processedLabels.count(lastLabel) != 1) {
444 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
447 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
451 //run last label if you need to
452 if (needToRun == true) {
453 for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }
454 lookup = input->getSharedRAbundVectors(lastLabel);
456 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
457 process(lookup, outputFileName, outAllFileName);
458 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
462 //reset groups parameter
465 for(int i=0;i<sumCalculators.size();i++){ delete sumCalculators[i]; }
468 if (m->control_pressed) {
469 m->mothurRemove(outAllFileName);
470 m->mothurRemove(outputFileName);
474 m->mothurOutEndLine();
475 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
476 m->mothurOut(outputFileName); m->mothurOutEndLine();
477 if (mult) { m->mothurOut(outAllFileName); m->mothurOutEndLine(); outputTypes["summary"].push_back(outAllFileName); }
478 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } outputTypes["summary"].push_back(outputFileName);
479 m->mothurOutEndLine();
483 catch(exception& e) {
484 m->errorOut(e, "SummarySharedCommand", "execute");
488 /***********************************************************/
489 int SummarySharedCommand::printSims(ostream& out, vector< vector<double> >& simMatrix) {
492 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
495 out << simMatrix.size() << endl;
497 if (output == "lt") {
498 for (int b = 0; b < simMatrix.size(); b++) {
499 out << lookup[b]->getGroup() << '\t';
500 for (int n = 0; n < b; n++) {
501 if (m->control_pressed) { return 0; }
502 out << simMatrix[b][n] << '\t';
507 for (int b = 0; b < simMatrix.size(); m++) {
508 out << lookup[b]->getGroup() << '\t';
509 for (int n = 0; n < simMatrix[b].size(); n++) {
510 if (m->control_pressed) { return 0; }
511 out << simMatrix[b][n] << '\t';
519 catch(exception& e) {
520 m->errorOut(e, "SummarySharedCommand", "printSims");
524 /***********************************************************/
525 int SummarySharedCommand::process(vector<SharedRAbundVector*> thisLookup, string sumFileName, string sumAllFileName) {
527 vector< vector< vector<seqDist> > > calcDistsTotals; //each iter, one for each calc, then each groupCombos dists. this will be used to make .dist files
528 vector< vector<seqDist> > calcDists; calcDists.resize(sumCalculators.size());
530 for (int thisIter = 0; thisIter < iters; thisIter++) {
532 vector<SharedRAbundVector*> thisItersLookup = thisLookup;
536 vector<string> tempLabels; //dont need since we arent printing the sampled sharedRabunds
538 //make copy of lookup so we don't get access violations
539 vector<SharedRAbundVector*> newLookup;
540 for (int k = 0; k < thisItersLookup.size(); k++) {
541 SharedRAbundVector* temp = new SharedRAbundVector();
542 temp->setLabel(thisItersLookup[k]->getLabel());
543 temp->setGroup(thisItersLookup[k]->getGroup());
544 newLookup.push_back(temp);
548 for (int k = 0; k < thisItersLookup[0]->getNumBins(); k++) {
549 if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } return 0; }
550 for (int j = 0; j < thisItersLookup.size(); j++) { newLookup[j]->push_back(thisItersLookup[j]->getAbundance(k), thisItersLookup[j]->getGroup()); }
553 tempLabels = sample.getSample(newLookup, subsampleSize);
554 thisItersLookup = newLookup;
559 driver(thisLookup, 0, numGroups, sumFileName+".temp", sumAllFileName+".temp", calcDists);
560 m->appendFiles((sumFileName + ".temp"), sumFileName);
561 m->mothurRemove((sumFileName + ".temp"));
563 m->appendFiles((sumAllFileName + ".temp"), sumAllFileName);
564 m->mothurRemove((sumAllFileName + ".temp"));
569 vector<int> processIDS;
571 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
572 //loop through and create all the processes you want
573 while (process != processors) {
577 processIDS.push_back(pid);
580 driver(thisLookup, lines[process].start, lines[process].end, sumFileName + toString(getpid()) + ".temp", sumAllFileName + toString(getpid()) + ".temp", calcDists);
582 //only do this if you want a distance file
584 string tempdistFileName = m->getRootName(m->getSimpleName(sumFileName)) + toString(getpid()) + ".dist";
586 m->openOutputFile(tempdistFileName, outtemp);
588 for (int i = 0; i < calcDists.size(); i++) {
589 outtemp << calcDists[i].size() << endl;
591 for (int j = 0; j < calcDists[i].size(); j++) {
592 outtemp << calcDists[i][j].seq1 << '\t' << calcDists[i][j].seq2 << '\t' << calcDists[i][j].dist << endl;
600 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
601 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
606 //parent do your part
607 driver(thisLookup, lines[0].start, lines[0].end, sumFileName + toString(getpid()) + ".temp", sumAllFileName + toString(getpid()) + ".temp", calcDists);
608 m->appendFiles((sumFileName + toString(getpid()) + ".temp"), sumFileName);
609 m->mothurRemove((sumFileName + toString(getpid()) + ".temp"));
610 if (mult) { m->appendFiles((sumAllFileName + toString(getpid()) + ".temp"), sumAllFileName); }
612 //force parent to wait until all the processes are done
613 for (int i = 0; i < processIDS.size(); i++) {
614 int temp = processIDS[i];
618 for (int i = 0; i < processIDS.size(); i++) {
619 m->appendFiles((sumFileName + toString(processIDS[i]) + ".temp"), sumFileName);
620 m->mothurRemove((sumFileName + toString(processIDS[i]) + ".temp"));
621 if (mult) { m->mothurRemove((sumAllFileName + toString(processIDS[i]) + ".temp")); }
624 string tempdistFileName = m->getRootName(m->getSimpleName(sumFileName)) + toString(processIDS[i]) + ".dist";
626 m->openInputFile(tempdistFileName, intemp);
628 for (int k = 0; k < calcDists.size(); k++) {
630 intemp >> size; m->gobble(intemp);
632 for (int j = 0; j < size; j++) {
637 intemp >> seq1 >> seq2 >> dist; m->gobble(intemp);
639 seqDist tempDist(seq1, seq2, dist);
640 calcDists[k].push_back(tempDist);
644 m->mothurRemove(tempdistFileName);
648 //////////////////////////////////////////////////////////////////////////////////////////////////////
649 //Windows version shared memory, so be careful when passing variables through the summarySharedData struct.
650 //Above fork() will clone, so memory is separate, but that's not the case with windows,
651 //Taking advantage of shared memory to pass results vectors.
652 //////////////////////////////////////////////////////////////////////////////////////////////////////
654 vector<summarySharedData*> pDataArray;
655 DWORD dwThreadIdArray[processors-1];
656 HANDLE hThreadArray[processors-1];
658 //Create processor worker threads.
659 for( int i=1; i<processors; i++ ){
661 //make copy of lookup so we don't get access violations
662 vector<SharedRAbundVector*> newLookup;
663 for (int k = 0; k < thisLookup.size(); k++) {
664 SharedRAbundVector* temp = new SharedRAbundVector();
665 temp->setLabel(thisLookup[k]->getLabel());
666 temp->setGroup(thisLookup[k]->getGroup());
667 newLookup.push_back(temp);
671 for (int k = 0; k < thisLookup[0]->getNumBins(); k++) {
672 if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } return 0; }
673 for (int j = 0; j < thisLookup.size(); j++) { newLookup[j]->push_back(thisLookup[j]->getAbundance(k), thisLookup[j]->getGroup()); }
676 // Allocate memory for thread data.
677 summarySharedData* tempSum = new summarySharedData((sumFileName+toString(i)+".temp"), m, lines[i].start, lines[i].end, Estimators, newLookup);
678 pDataArray.push_back(tempSum);
679 processIDS.push_back(i);
681 hThreadArray[i-1] = CreateThread(NULL, 0, MySummarySharedThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);
684 //parent do your part
685 driver(thisLookup, lines[0].start, lines[0].end, sumFileName +"0.temp", sumAllFileName + "0.temp", calcDists);
686 m->appendFiles((sumFileName + "0.temp"), sumFileName);
687 m->mothurRemove((sumFileName + "0.temp"));
688 if (mult) { m->appendFiles((sumAllFileName + "0.temp"), sumAllFileName); }
690 //Wait until all threads have terminated.
691 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
693 //Close all thread handles and free memory allocations.
694 for(int i=0; i < pDataArray.size(); i++){
695 m->appendFiles((sumFileName + toString(processIDS[i]) + ".temp"), sumFileName);
696 m->mothurRemove((sumFileName + toString(processIDS[i]) + ".temp"));
698 for (int j = 0; j < pDataArray[i]->thisLookup.size(); j++) { delete pDataArray[i]->thisLookup[j]; }
701 for (int k = 0; k < calcDists.size(); k++) {
702 int size = pDataArray[i]->calcDists[k].size();
703 for (int j = 0; j < size; j++) { calcDists[k].push_back(pDataArray[i]->calcDists[k][j]); }
707 CloseHandle(hThreadArray[i]);
708 delete pDataArray[i];
713 calcDistsTotals.push_back(calcDists);
718 for (int i = 0; i < thisItersLookup.size(); i++) { delete thisItersLookup[i]; }
719 thisItersLookup.clear();
720 for (int i = 0; i < calcDists.size(); i++) { calcDists[i].clear(); }
725 //we need to find the average distance and standard deviation for each groups distance
727 vector< vector<seqDist> > calcAverages; calcAverages.resize(sumCalculators.size());
728 for (int i = 0; i < calcAverages.size(); i++) { //initialize sums to zero.
729 calcAverages[i].resize(calcDistsTotals[0][i].size());
731 for (int j = 0; j < calcAverages[i].size(); j++) {
732 calcAverages[i][j].seq1 = calcDists[i][j].seq1;
733 calcAverages[i][j].seq2 = calcDists[i][j].seq2;
734 calcAverages[i][j].dist = 0.0;
738 for (int thisIter = 0; thisIter < iters; thisIter++) { //sum all groups dists for each calculator
739 for (int i = 0; i < calcAverages.size(); i++) { //initialize sums to zero.
740 for (int j = 0; j < calcAverages[i].size(); j++) {
741 calcAverages[i][j].dist += calcDistsTotals[thisIter][i][j].dist;
746 for (int i = 0; i < calcAverages.size(); i++) { //finds average.
747 for (int j = 0; j < calcAverages[i].size(); j++) {
748 calcAverages[i][j].dist /= (float) iters;
752 //find standard deviation
753 vector< vector<seqDist> > stdDev; stdDev.resize(sumCalculators.size());
754 for (int i = 0; i < stdDev.size(); i++) { //initialize sums to zero.
755 stdDev[i].resize(calcDistsTotals[0][i].size());
757 for (int j = 0; j < stdDev[i].size(); j++) {
758 stdDev[i][j].seq1 = calcDists[i][j].seq1;
759 stdDev[i][j].seq2 = calcDists[i][j].seq2;
760 stdDev[i][j].dist = 0.0;
764 for (int thisIter = 0; thisIter < iters; thisIter++) { //compute the difference of each dist from the mean, and square the result of each
765 for (int i = 0; i < stdDev.size(); i++) {
766 for (int j = 0; j < stdDev[i].size(); j++) {
767 stdDev[i][j].dist += ((calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist) * (calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist));
772 for (int i = 0; i < stdDev.size(); i++) { //finds average.
773 for (int j = 0; j < stdDev[i].size(); j++) {
774 stdDev[i][j].dist /= (float) iters;
775 stdDev[i][j].dist = sqrt(stdDev[i][j].dist);
780 for (int i = 0; i < calcDists.size(); i++) {
781 vector< vector<double> > matrix; //square matrix to represent the distance
782 matrix.resize(thisLookup.size());
783 for (int k = 0; k < thisLookup.size(); k++) { matrix[k].resize(thisLookup.size(), 0.0); }
785 vector< vector<double> > stdmatrix; //square matrix to represent the stdDev
786 stdmatrix.resize(thisLookup.size());
787 for (int k = 0; k < thisLookup.size(); k++) { stdmatrix[k].resize(thisLookup.size(), 0.0); }
790 for (int j = 0; j < calcAverages[i].size(); j++) {
791 int row = calcAverages[i][j].seq1;
792 int column = calcAverages[i][j].seq2;
793 float dist = calcAverages[i][j].dist;
794 float stdDist = stdDev[i][j].dist;
796 matrix[row][column] = dist;
797 matrix[column][row] = dist;
798 stdmatrix[row][column] = stdDist;
799 stdmatrix[column][row] = stdDist;
802 string distFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + sumCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + "." + output + ".ave.dist";
803 outputNames.push_back(distFileName); outputTypes["phylip"].push_back(distFileName);
805 m->openOutputFile(distFileName, outAve);
806 outAve.setf(ios::fixed, ios::floatfield); outAve.setf(ios::showpoint);
808 printSims(outAve, matrix);
812 distFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + sumCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + "." + output + ".std.dist";
813 outputNames.push_back(distFileName); outputTypes["phylip"].push_back(distFileName);
815 m->openOutputFile(distFileName, outSTD);
816 outSTD.setf(ios::fixed, ios::floatfield); outSTD.setf(ios::showpoint);
818 printSims(outSTD, stdmatrix);
825 for (int i = 0; i < calcDists.size(); i++) {
826 if (m->control_pressed) { break; }
829 vector< vector<double> > matrix; //square matrix to represent the distance
830 matrix.resize(thisLookup.size());
831 for (int k = 0; k < thisLookup.size(); k++) { matrix[k].resize(thisLookup.size(), 0.0); }
833 for (int j = 0; j < calcDists[i].size(); j++) {
834 int row = calcDists[i][j].seq1;
835 int column = calcDists[i][j].seq2;
836 double dist = calcDists[i][j].dist;
838 matrix[row][column] = dist;
839 matrix[column][row] = dist;
842 string distFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + sumCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + "." + output + ".dist";
843 outputNames.push_back(distFileName); outputTypes["phylip"].push_back(distFileName);
845 m->openOutputFile(distFileName, outDist);
846 outDist.setf(ios::fixed, ios::floatfield); outDist.setf(ios::showpoint);
848 printSims(outDist, matrix);
857 catch(exception& e) {
858 m->errorOut(e, "SummarySharedCommand", "process");
862 /**************************************************************************************************/
863 int SummarySharedCommand::driver(vector<SharedRAbundVector*> thisLookup, int start, int end, string sumFile, string sumAllFile, vector< vector<seqDist> >& calcDists) {
866 //loop through calculators and add to file all for all calcs that can do mutiple groups
869 m->openOutputFile(sumAllFile, outAll);
872 outAll << thisLookup[0]->getLabel() << '\t';
874 //output groups names
875 string outNames = "";
876 for (int j = 0; j < thisLookup.size(); j++) {
877 outNames += thisLookup[j]->getGroup() + "-";
879 outNames = outNames.substr(0, outNames.length()-1); //rip off extra '-';
880 outAll << outNames << '\t';
882 for(int i=0;i<sumCalculators.size();i++){
883 if (sumCalculators[i]->getMultiple() == true) {
884 sumCalculators[i]->getValues(thisLookup);
886 if (m->control_pressed) { outAll.close(); return 1; }
889 sumCalculators[i]->print(outAll);
896 ofstream outputFileHandle;
897 m->openOutputFile(sumFile, outputFileHandle);
899 vector<SharedRAbundVector*> subset;
900 for (int k = start; k < end; k++) { // pass cdd each set of groups to compare
902 for (int l = 0; l < k; l++) {
904 outputFileHandle << thisLookup[0]->getLabel() << '\t';
906 subset.clear(); //clear out old pair of sharedrabunds
907 //add new pair of sharedrabunds
908 subset.push_back(thisLookup[k]); subset.push_back(thisLookup[l]);
910 //sort groups to be alphanumeric
911 if (thisLookup[k]->getGroup() > thisLookup[l]->getGroup()) {
912 outputFileHandle << (thisLookup[l]->getGroup() +'\t' + thisLookup[k]->getGroup()) << '\t'; //print out groups
914 outputFileHandle << (thisLookup[k]->getGroup() +'\t' + thisLookup[l]->getGroup()) << '\t'; //print out groups
917 for(int i=0;i<sumCalculators.size();i++) {
919 //if this calc needs all groups to calculate the pair load all groups
920 if (sumCalculators[i]->getNeedsAll()) {
921 //load subset with rest of lookup for those calcs that need everyone to calc for a pair
922 for (int w = 0; w < thisLookup.size(); w++) {
923 if ((w != k) && (w != l)) { subset.push_back(thisLookup[w]); }
927 vector<double> tempdata = sumCalculators[i]->getValues(subset); //saves the calculator outputs
929 if (m->control_pressed) { outputFileHandle.close(); return 1; }
931 outputFileHandle << '\t';
932 sumCalculators[i]->print(outputFileHandle);
934 seqDist temp(l, k, tempdata[0]);
935 calcDists[i].push_back(temp);
937 outputFileHandle << endl;
941 outputFileHandle.close();
945 catch(exception& e) {
946 m->errorOut(e, "SummarySharedCommand", "driver");
950 /**************************************************************************************************/