*/
#include "summarysharedcommand.h"
-#include "sharedsobscollectsummary.h"
-#include "sharedchao1.h"
-#include "sharedace.h"
-#include "sharednseqs.h"
-#include "sharedjabund.h"
-#include "sharedsorabund.h"
-#include "sharedjclass.h"
-#include "sharedsorclass.h"
-#include "sharedjest.h"
-#include "sharedsorest.h"
-#include "sharedthetayc.h"
-#include "sharedthetan.h"
-#include "sharedkstest.h"
-#include "whittaker.h"
-#include "sharedochiai.h"
-#include "sharedanderbergs.h"
-#include "sharedkulczynski.h"
-#include "sharedkulczynskicody.h"
-#include "sharedlennon.h"
-#include "sharedmorisitahorn.h"
-#include "sharedbraycurtis.h"
-#include "sharedjackknife.h"
-#include "whittaker.h"
-#include "odum.h"
-#include "canberra.h"
-#include "structeuclidean.h"
-#include "structchord.h"
-#include "hellinger.h"
-#include "manhattan.h"
-#include "structpearson.h"
-#include "soergel.h"
-#include "spearman.h"
-#include "structkulczynski.h"
-#include "structchi2.h"
-#include "speciesprofile.h"
-#include "hamming.h"
-#include "gower.h"
-#include "memchi2.h"
-#include "memchord.h"
-#include "memeuclidean.h"
-#include "mempearson.h"
//**********************************************************************************************************************
vector<string> SummarySharedCommand::setParameters(){
CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pshared);
CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
CommandParameter pdistance("distance", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pdistance);
- CommandParameter pcalc("calc", "Multiple", "sharedchao-sharedsobs-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan-kstest-whittaker-sharednseqs-ochiai-anderberg-skulczynski-kulczynskicody-lennon-morisitahorn-braycurtis-odum-canberra-structeuclidean-structchord-hellinger-manhattan-structpearson-soergel-spearman-structkulczynski-speciesprofile-structchi2-hamming-gower-memchi2-memchord-memeuclidean-mempearson", "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan", "", "", "",true,false); parameters.push_back(pcalc);
+ CommandParameter pcalc("calc", "Multiple", "sharedchao-sharedsobs-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan-kstest-whittaker-sharednseqs-ochiai-anderberg-kulczynski-kulczynskicody-lennon-morisitahorn-braycurtis-odum-canberra-structeuclidean-structchord-hellinger-manhattan-structpearson-soergel-spearman-structkulczynski-speciesprofile-structchi2-hamming-gower-memchi2-memchord-memeuclidean-mempearson", "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan", "", "", "",true,false); parameters.push_back(pcalc);
CommandParameter pall("all", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pall);
CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
helpString += "The all parameter is used to specify if you want the estimate of all your groups together. This estimate can only be made for sharedsobs and sharedchao calculators. The default is false.\n";
helpString += "If you use sharedchao and run into memory issues, set all to false. \n";
helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 2 valid groups.\n";
- helpString += "Note: No spaces between parameter labels (i.e. label), '=' and parameters (i.e.yourLabel).\n\n";
+ helpString += "Note: No spaces between parameter labels (i.e. label), '=' and parameters (i.e.yourLabel).\n";
return helpString;
}
catch(exception& e) {
//allow user to run help
if(option == "help") { help(); abort = true; calledHelp = true; }
+ else if(option == "citation") { citation(); abort = true; calledHelp = true;}
else {
vector<string> myArray = setParameters();
sharedfile = m->getSharedFile();
if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
else { m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; }
- }
+ }else { m->setSharedFile(sharedfile); }
//if the user changes the output directory command factory will send this info to us in the output parameter
if (calc == "default") { calc = "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan"; }
}
m->splitAtDash(calc, Estimators);
+ if (m->inUsersGroups("citation", Estimators)) {
+ ValidCalculators validCalc; validCalc.printCitations(Estimators);
+ //remove citation from list of calcs
+ for (int i = 0; i < Estimators.size(); i++) { if (Estimators[i] == "citation") { Estimators.erase(Estimators.begin()+i); break; } }
+ }
groups = validParameter.validFile(parameters, "groups", false);
if (groups == "not found") { groups = ""; }
else {
m->splitAtDash(groups, Groups);
- m->Groups = Groups;
+ m->setGroups(Groups);
}
string temp = validParameter.validFile(parameters, "all", false); if (temp == "not found") { temp = "false"; }
temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
m->setProcessors(temp);
- convert(temp, processors);
+ m->mothurConvert(temp, processors);
if (abort == false) {
for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
//close files and clean up
- remove(outputFileName.c_str());
- if (mult == true) { remove(outAllFileName.c_str()); }
+ m->mothurRemove(outputFileName);
+ if (mult == true) { m->mothurRemove(outAllFileName); }
return 0;
//if you only have 2 groups you don't need a .sharedmultiple file
}else if ((lookup.size() == 2) && (mult == true)) {
mult = false;
- remove(outAllFileName.c_str());
+ m->mothurRemove(outAllFileName);
outputNames.pop_back();
}
if (m->control_pressed) {
- if (mult) { remove(outAllFileName.c_str()); }
- remove(outputFileName.c_str());
+ if (mult) { m->mothurRemove(outAllFileName); }
+ m->mothurRemove(outputFileName);
delete input;
for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
for(int i=0;i<sumCalculators.size();i++){ delete sumCalculators[i]; }
- m->Groups.clear();
+ m->clearGroups();
return 0;
}
/******************************************************/
/******************************************************/
//comparison breakup to be used by different processes later
- numGroups = m->Groups.size();
+ numGroups = m->getNumGroups();
lines.resize(processors);
for (int i = 0; i < processors; i++) {
lines[i].start = int (sqrt(float(i)/float(processors)) * numGroups);
//as long as you are not at the end of the file or done wih the lines you want
while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
if (m->control_pressed) {
- if (mult) { remove(outAllFileName.c_str()); }
- remove(outputFileName.c_str());
+ if (mult) { m->mothurRemove(outAllFileName); }
+ m->mothurRemove(outputFileName);
delete input;
for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
for(int i=0;i<sumCalculators.size();i++){ delete sumCalculators[i]; }
- m->Groups.clear();
+ m->clearGroups();
return 0;
}
}
if (m->control_pressed) {
- if (mult) { remove(outAllFileName.c_str()); }
- remove(outputFileName.c_str());
+ if (mult) { m->mothurRemove(outAllFileName); }
+ m->mothurRemove(outputFileName);
delete input;
for(int i=0;i<sumCalculators.size();i++){ delete sumCalculators[i]; }
- m->Groups.clear();
+ m->clearGroups();
return 0;
}
//reset groups parameter
- m->Groups.clear();
+ m->clearGroups();
for(int i=0;i<sumCalculators.size();i++){ delete sumCalculators[i]; }
delete input;
if (m->control_pressed) {
- remove(outAllFileName.c_str());
- remove(outputFileName.c_str());
+ m->mothurRemove(outAllFileName);
+ m->mothurRemove(outputFileName);
return 0;
}
/***********************************************************/
int SummarySharedCommand::process(vector<SharedRAbundVector*> thisLookup, string sumFileName, string sumAllFileName) {
try {
- vector< vector<seqDist> > calcDists; //vector containing vectors that contains the summary results for each group compare
- calcDists.resize(sumCalculators.size()); //one for each calc, this will be used to make .dist files
-
- #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
- if(processors == 1){
- driver(thisLookup, 0, numGroups, sumFileName+".temp", sumAllFileName+".temp", calcDists);
- m->appendFiles((sumFileName + ".temp"), sumFileName);
- remove((sumFileName + ".temp").c_str());
- if (mult) {
- m->appendFiles((sumAllFileName + ".temp"), sumAllFileName);
- remove((sumAllFileName + ".temp").c_str());
- }
- }else{
- int process = 1;
- vector<int> processIDS;
-
- //loop through and create all the processes you want
- while (process != processors) {
- int pid = fork();
-
- if (pid > 0) {
- processIDS.push_back(pid);
- process++;
- }else if (pid == 0){
- driver(thisLookup, lines[process].start, lines[process].end, sumFileName + toString(getpid()) + ".temp", sumAllFileName + toString(getpid()) + ".temp", calcDists);
-
- //only do this if you want a distance file
- if (createPhylip) {
- string tempdistFileName = m->getRootName(m->getSimpleName(sumFileName)) + toString(getpid()) + ".dist";
- ofstream outtemp;
- m->openOutputFile(tempdistFileName, outtemp);
-
- for (int i = 0; i < calcDists.size(); i++) {
- outtemp << calcDists[i].size() << endl;
-
- for (int j = 0; j < calcDists[i].size(); j++) {
- outtemp << calcDists[i][j].seq1 << '\t' << calcDists[i][j].seq2 << '\t' << calcDists[i][j].dist << endl;
- }
- }
- outtemp.close();
- }
-
- exit(0);
- }else {
- m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
- for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
- exit(0);
- }
- }
-
- //parent do your part
- driver(thisLookup, lines[0].start, lines[0].end, sumFileName + toString(getpid()) + ".temp", sumAllFileName + toString(getpid()) + ".temp", calcDists);
- m->appendFiles((sumFileName + toString(getpid()) + ".temp"), sumFileName);
- remove((sumFileName + toString(getpid()) + ".temp").c_str());
- if (mult) { m->appendFiles((sumAllFileName + toString(getpid()) + ".temp"), sumAllFileName); }
-
- //force parent to wait until all the processes are done
- for (int i = 0; i < processIDS.size(); i++) {
- int temp = processIDS[i];
- wait(&temp);
- }
-
- for (int i = 0; i < processIDS.size(); i++) {
- m->appendFiles((sumFileName + toString(processIDS[i]) + ".temp"), sumFileName);
- remove((sumFileName + toString(processIDS[i]) + ".temp").c_str());
- if (mult) { remove((sumAllFileName + toString(processIDS[i]) + ".temp").c_str()); }
-
- if (createPhylip) {
- string tempdistFileName = m->getRootName(m->getSimpleName(sumFileName)) + toString(processIDS[i]) + ".dist";
- ifstream intemp;
- m->openInputFile(tempdistFileName, intemp);
-
- for (int i = 0; i < calcDists.size(); i++) {
- int size = 0;
- intemp >> size; m->gobble(intemp);
-
- for (int j = 0; j < size; j++) {
- int seq1 = 0;
- int seq2 = 0;
- float dist = 1.0;
-
- intemp >> seq1 >> seq2 >> dist; m->gobble(intemp);
-
- seqDist tempDist(seq1, seq2, dist);
- calcDists[i].push_back(tempDist);
- }
- }
- intemp.close();
- remove(tempdistFileName.c_str());
- }
- }
+ vector< vector<seqDist> > calcDists; //vector containing vectors that contains the summary results for each group compare
+ calcDists.resize(sumCalculators.size()); //one for each calc, this will be used to make .dist files
+
+
+ if(processors == 1){
+ driver(thisLookup, 0, numGroups, sumFileName+".temp", sumAllFileName+".temp", calcDists);
+ m->appendFiles((sumFileName + ".temp"), sumFileName);
+ m->mothurRemove((sumFileName + ".temp"));
+ if (mult) {
+ m->appendFiles((sumAllFileName + ".temp"), sumAllFileName);
+ m->mothurRemove((sumAllFileName + ".temp"));
+ }
+ }else{
+
+ int process = 1;
+ vector<int> processIDS;
+
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+ //loop through and create all the processes you want
+ while (process != processors) {
+ int pid = fork();
+
+ if (pid > 0) {
+ processIDS.push_back(pid);
+ process++;
+ }else if (pid == 0){
+ driver(thisLookup, lines[process].start, lines[process].end, sumFileName + toString(getpid()) + ".temp", sumAllFileName + toString(getpid()) + ".temp", calcDists);
+
+ //only do this if you want a distance file
+ if (createPhylip) {
+ string tempdistFileName = m->getRootName(m->getSimpleName(sumFileName)) + toString(getpid()) + ".dist";
+ ofstream outtemp;
+ m->openOutputFile(tempdistFileName, outtemp);
+
+ for (int i = 0; i < calcDists.size(); i++) {
+ outtemp << calcDists[i].size() << endl;
+
+ for (int j = 0; j < calcDists[i].size(); j++) {
+ outtemp << calcDists[i][j].seq1 << '\t' << calcDists[i][j].seq2 << '\t' << calcDists[i][j].dist << endl;
+ }
+ }
+ outtemp.close();
+ }
+
+ exit(0);
+ }else {
+ m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
+ for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
+ exit(0);
+ }
+ }
+
+ //parent do your part
+ driver(thisLookup, lines[0].start, lines[0].end, sumFileName + toString(getpid()) + ".temp", sumAllFileName + toString(getpid()) + ".temp", calcDists);
+ m->appendFiles((sumFileName + toString(getpid()) + ".temp"), sumFileName);
+ m->mothurRemove((sumFileName + toString(getpid()) + ".temp"));
+ if (mult) { m->appendFiles((sumAllFileName + toString(getpid()) + ".temp"), sumAllFileName); }
+
+ //force parent to wait until all the processes are done
+ for (int i = 0; i < processIDS.size(); i++) {
+ int temp = processIDS[i];
+ wait(&temp);
+ }
+
+ for (int i = 0; i < processIDS.size(); i++) {
+ m->appendFiles((sumFileName + toString(processIDS[i]) + ".temp"), sumFileName);
+ m->mothurRemove((sumFileName + toString(processIDS[i]) + ".temp"));
+ if (mult) { m->mothurRemove((sumAllFileName + toString(processIDS[i]) + ".temp")); }
+
+ if (createPhylip) {
+ string tempdistFileName = m->getRootName(m->getSimpleName(sumFileName)) + toString(processIDS[i]) + ".dist";
+ ifstream intemp;
+ m->openInputFile(tempdistFileName, intemp);
+
+ for (int k = 0; k < calcDists.size(); k++) {
+ int size = 0;
+ intemp >> size; m->gobble(intemp);
+
+ for (int j = 0; j < size; j++) {
+ int seq1 = 0;
+ int seq2 = 0;
+ float dist = 1.0;
+
+ intemp >> seq1 >> seq2 >> dist; m->gobble(intemp);
+
+ seqDist tempDist(seq1, seq2, dist);
+ calcDists[k].push_back(tempDist);
+ }
+ }
+ intemp.close();
+ m->mothurRemove(tempdistFileName);
+ }
+ }
+#else
+ //////////////////////////////////////////////////////////////////////////////////////////////////////
+ //Windows version shared memory, so be careful when passing variables through the summarySharedData struct.
+ //Above fork() will clone, so memory is separate, but that's not the case with windows,
+ //Taking advantage of shared memory to pass results vectors.
+ //////////////////////////////////////////////////////////////////////////////////////////////////////
- }
- #else
- driver(thisLookup, 0, numGroups, (sumFileName + ".temp"), (sumAllFileName + ".temp"), calcDists);
- m->appendFiles((sumFileName + ".temp"), sumFileName);
- remove((sumFileName + ".temp").c_str());
- if (mult) {
- m->appendFiles((sumAllFileName + ".temp"), sumAllFileName);
- remove((sumAllFileName + ".temp").c_str());
- }
- #endif
-
- if (createPhylip) {
- for (int i = 0; i < calcDists.size(); i++) {
- if (m->control_pressed) { break; }
+ vector<summarySharedData*> pDataArray;
+ DWORD dwThreadIdArray[processors-1];
+ HANDLE hThreadArray[processors-1];
+
+ //Create processor worker threads.
+ for( int i=1; i<processors; i++ ){
+
+ //make copy of lookup so we don't get access violations
+ vector<SharedRAbundVector*> newLookup;
+ for (int k = 0; k < thisLookup.size(); k++) {
+ SharedRAbundVector* temp = new SharedRAbundVector();
+ temp->setLabel(thisLookup[k]->getLabel());
+ temp->setGroup(thisLookup[k]->getGroup());
+ newLookup.push_back(temp);
+ }
+
+ //for each bin
+ for (int k = 0; k < thisLookup[0]->getNumBins(); k++) {
+ if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } return 0; }
+ for (int j = 0; j < thisLookup.size(); j++) { newLookup[j]->push_back(thisLookup[j]->getAbundance(k), thisLookup[j]->getGroup()); }
+ }
+
+ // Allocate memory for thread data.
+ summarySharedData* tempSum = new summarySharedData((sumFileName+toString(i)+".temp"), m, lines[i].start, lines[i].end, Estimators, newLookup);
+ pDataArray.push_back(tempSum);
+ processIDS.push_back(i);
+
+ hThreadArray[i-1] = CreateThread(NULL, 0, MySummarySharedThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);
+ }
+
+ //parent do your part
+ driver(thisLookup, lines[0].start, lines[0].end, sumFileName +"0.temp", sumAllFileName + "0.temp", calcDists);
+ m->appendFiles((sumFileName + "0.temp"), sumFileName);
+ m->mothurRemove((sumFileName + "0.temp"));
+ if (mult) { m->appendFiles((sumAllFileName + "0.temp"), sumAllFileName); }
+
+ //Wait until all threads have terminated.
+ WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
+
+ //Close all thread handles and free memory allocations.
+ for(int i=0; i < pDataArray.size(); i++){
+ m->appendFiles((sumFileName + toString(processIDS[i]) + ".temp"), sumFileName);
+ m->mothurRemove((sumFileName + toString(processIDS[i]) + ".temp"));
+
+ for (int j = 0; j < pDataArray[i]->thisLookup.size(); j++) { delete pDataArray[i]->thisLookup[j]; }
+
+ if (createPhylip) {
+ for (int k = 0; k < calcDists.size(); k++) {
+ int size = pDataArray[i]->calcDists[k].size();
+ for (int j = 0; j < size; j++) { calcDists[k].push_back(pDataArray[i]->calcDists[k][j]); }
+ }
+ }
+
+ CloseHandle(hThreadArray[i]);
+ delete pDataArray[i];
+ }
+
+#endif
+ }
+
+ if (createPhylip) {
+ for (int i = 0; i < calcDists.size(); i++) {
+ if (m->control_pressed) { break; }
- string distFileName = outputDir + m->getRootName(m->getSimpleName(sumFileName)) + sumCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + ".dist";
- outputNames.push_back(distFileName);
- ofstream outDist;
- m->openOutputFile(distFileName, outDist);
- outDist.setf(ios::fixed, ios::floatfield); outDist.setf(ios::showpoint);
-
- //initialize matrix
- vector< vector<float> > matrix; //square matrix to represent the distance
- matrix.resize(thisLookup.size());
- for (int k = 0; k < thisLookup.size(); k++) { matrix[k].resize(thisLookup.size(), 0.0); }
-
-
- for (int j = 0; j < calcDists[i].size(); j++) {
- int row = calcDists[i][j].seq1;
- int column = calcDists[i][j].seq2;
- float dist = calcDists[i][j].dist;
-
- matrix[row][column] = dist;
- matrix[column][row] = dist;
- }
-
- //output to file
- outDist << thisLookup.size() << endl;
- for (int r=0; r<thisLookup.size(); r++) {
- //output name
- string name = thisLookup[r]->getGroup();
- if (name.length() < 10) { //pad with spaces to make compatible
- while (name.length() < 10) { name += " "; }
- }
- outDist << name << '\t';
+ string distFileName = outputDir + m->getRootName(m->getSimpleName(sumFileName)) + sumCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + ".dist";
+ outputNames.push_back(distFileName);
+ ofstream outDist;
+ m->openOutputFile(distFileName, outDist);
+ outDist.setf(ios::fixed, ios::floatfield); outDist.setf(ios::showpoint);
+
+ //initialize matrix
+ vector< vector<float> > matrix; //square matrix to represent the distance
+ matrix.resize(thisLookup.size());
+ for (int k = 0; k < thisLookup.size(); k++) { matrix[k].resize(thisLookup.size(), 0.0); }
+
+
+ for (int j = 0; j < calcDists[i].size(); j++) {
+ int row = calcDists[i][j].seq1;
+ int column = calcDists[i][j].seq2;
+ float dist = calcDists[i][j].dist;
+
+ matrix[row][column] = dist;
+ matrix[column][row] = dist;
+ }
+
+ //output to file
+ outDist << thisLookup.size() << endl;
+ for (int r=0; r<thisLookup.size(); r++) {
+ //output name
+ string name = thisLookup[r]->getGroup();
+ if (name.length() < 10) { //pad with spaces to make compatible
+ while (name.length() < 10) { name += " "; }
+ }
+ outDist << name << '\t';
- //output distances
- for (int l = 0; l < r; l++) { outDist << matrix[r][l] << '\t'; }
- outDist << endl;
- }
-
- outDist.close();
- }
- }
+ //output distances
+ for (int l = 0; l < r; l++) { outDist << matrix[r][l] << '\t'; }
+ outDist << endl;
+ }
+
+ outDist.close();
+ }
+ }
return 0;
}
catch(exception& e) {
outputFileHandle << '\t';
sumCalculators[i]->print(outputFileHandle);
- seqDist temp(l, k, (1.0 - tempdata[0]));
+ seqDist temp(l, k, tempdata[0]);
calcDists[i].push_back(temp);
}
outputFileHandle << endl;