else {
//valid paramters for this command
- string Array[] = {"label","calc","groups","all","outputdir","inputdir"};
+ string Array[] = {"label","calc","groups","all","outputdir","distance","inputdir", "processors"};
vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
OptionParser parser(option);
string temp = validParameter.validFile(parameters, "all", false); if (temp == "not found") { temp = "false"; }
all = m->isTrue(temp);
+ temp = validParameter.validFile(parameters, "distance", false); if (temp == "not found") { temp = "false"; }
+ createPhylip = m->isTrue(temp);
+
+ temp = validParameter.validFile(parameters, "processors", false); if(temp == "not found"){ temp = "1"; }
+ convert(temp, processors);
+
if (abort == false) {
validCalculator = new ValidCalculators();
}
}
- outputFileName = outputDir + m->getRootName(m->getSimpleName(globaldata->inputFileName)) + "shared.summary";
- m->openOutputFile(outputFileName, outputFileHandle);
- outputNames.push_back(outputFileName);
-
mult = false;
}
}
if (abort == true) { return 0; }
+ ofstream outputFileHandle, outAll;
+ string outputFileName = outputDir + m->getRootName(m->getSimpleName(globaldata->inputFileName)) + "shared.summary";
+
//if the users entered no valid calculators don't execute command
if (sumCalculators.size() == 0) { return 0; }
//check if any calcs can do multiples
input = globaldata->ginput;
lookup = input->getSharedRAbundVectors();
string lastLabel = lookup[0]->getLabel();
-
+
+ /******************************************************/
+ //output headings for files
+ /******************************************************/
//output estimator names as column headers
+ m->openOutputFile(outputFileName, outputFileHandle);
outputFileHandle << "label" <<'\t' << "comparison" << '\t';
for(int i=0;i<sumCalculators.size();i++){
outputFileHandle << '\t' << sumCalculators[i]->getName();
if (sumCalculators[i]->getCols() == 3) { outputFileHandle << "\t" << sumCalculators[i]->getName() << "_lci\t" << sumCalculators[i]->getName() << "_hci"; }
}
outputFileHandle << endl;
+ outputFileHandle.close();
//create file and put column headers for multiple groups file
+ string outAllFileName = ((m->getRootName(globaldata->inputFileName)) + "sharedmultiple.summary");
if (mult == true) {
- outAllFileName = ((m->getRootName(globaldata->inputFileName)) + "sharedmultiple.summary");
m->openOutputFile(outAllFileName, outAll);
outputNames.push_back(outAllFileName);
}
}
outAll << endl;
+ outAll.close();
}
if (lookup.size() < 2) {
for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
//close files and clean up
- outputFileHandle.close(); remove(outputFileName.c_str());
- if (mult == true) { outAll.close(); remove(outAllFileName.c_str()); }
+ remove(outputFileName.c_str());
+ if (mult == true) { remove(outAllFileName.c_str()); }
return 0;
//if you only have 2 groups you don't need a .sharedmultiple file
}else if ((lookup.size() == 2) && (mult == true)) {
mult = false;
- outAll.close();
remove(outAllFileName.c_str());
outputNames.pop_back();
}
if (m->control_pressed) {
- if (mult) { outAll.close(); remove(outAllFileName.c_str()); }
- outputFileHandle.close(); remove(outputFileName.c_str());
+ if (mult) { remove(outAllFileName.c_str()); }
+ remove(outputFileName.c_str());
delete input; globaldata->ginput = NULL;
for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
for(int i=0;i<sumCalculators.size();i++){ delete sumCalculators[i]; }
globaldata->Groups.clear();
return 0;
}
-
-
+ /******************************************************/
+
+
+ /******************************************************/
+ //comparison breakup to be used by different processes later
+ numGroups = globaldata->Groups.size();
+ lines.resize(processors);
+ for (int i = 0; i < processors; i++) {
+ lines[i].start = int (sqrt(float(i)/float(processors)) * numGroups);
+ lines[i].end = int (sqrt(float(i+1)/float(processors)) * numGroups);
+ }
+ /******************************************************/
+
//if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
set<string> processedLabels;
set<string> userLabels = labels;
//as long as you are not at the end of the file or done wih the lines you want
while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
if (m->control_pressed) {
- if (mult) { outAll.close(); remove(outAllFileName.c_str()); }
- outputFileHandle.close(); remove(outputFileName.c_str());
+ if (mult) { remove(outAllFileName.c_str()); }
+ remove(outputFileName.c_str());
delete input; globaldata->ginput = NULL;
for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
for(int i=0;i<sumCalculators.size();i++){ delete sumCalculators[i]; }
if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){
m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
- process(lookup);
+ process(lookup, outputFileName, outAllFileName);
processedLabels.insert(lookup[0]->getLabel());
userLabels.erase(lookup[0]->getLabel());
lookup = input->getSharedRAbundVectors(lastLabel);
m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
- process(lookup);
+ process(lookup, outputFileName, outAllFileName);
processedLabels.insert(lookup[0]->getLabel());
userLabels.erase(lookup[0]->getLabel());
}
if (m->control_pressed) {
- if (mult) { outAll.close(); remove(outAllFileName.c_str()); }
- outputFileHandle.close(); remove(outputFileName.c_str());
+ if (mult) { remove(outAllFileName.c_str()); }
+ remove(outputFileName.c_str());
delete input; globaldata->ginput = NULL;
for(int i=0;i<sumCalculators.size();i++){ delete sumCalculators[i]; }
globaldata->Groups.clear();
lookup = input->getSharedRAbundVectors(lastLabel);
m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
- process(lookup);
+ process(lookup, outputFileName, outAllFileName);
for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
}
//reset groups parameter
globaldata->Groups.clear();
- //close files
- outputFileHandle.close();
- if (mult == true) { outAll.close(); }
-
for(int i=0;i<sumCalculators.size();i++){ delete sumCalculators[i]; }
delete input; globaldata->ginput = NULL;
m->mothurOutEndLine();
m->mothurOut("Output File Names: "); m->mothurOutEndLine();
+ m->mothurOut(outputFileName); m->mothurOutEndLine();
+ if (mult) { m->mothurOut(outAllFileName); m->mothurOutEndLine(); }
for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
m->mothurOutEndLine();
}
/***********************************************************/
-int SummarySharedCommand::process(vector<SharedRAbundVector*> thisLookup) {
+int SummarySharedCommand::process(vector<SharedRAbundVector*> thisLookup, string sumFileName, string sumAllFileName) {
try {
- //loop through calculators and add to file all for all calcs that can do mutiple groups
- if (mult == true) {
- //output label
- outAll << thisLookup[0]->getLabel() << '\t';
-
- //output groups names
- string outNames = "";
- for (int j = 0; j < thisLookup.size(); j++) {
- outNames += thisLookup[j]->getGroup() + "-";
+ vector< vector<seqDist> > calcDists; //vector containing vectors that contains the summary results for each group compare
+ calcDists.resize(sumCalculators.size()); //one for each calc, this will be used to make .dist files
+
+ #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+ if(processors == 1){
+ driver(thisLookup, 0, numGroups, sumFileName+".temp", sumAllFileName+".temp", calcDists);
+ m->appendFiles((sumFileName + ".temp"), sumFileName);
+ remove((sumFileName + ".temp").c_str());
+ if (mult) {
+ m->appendFiles((sumAllFileName + ".temp"), sumAllFileName);
+ remove((sumAllFileName + ".temp").c_str());
}
- outNames = outNames.substr(0, outNames.length()-1); //rip off extra '-';
- outAll << outNames << '\t';
-
- for(int i=0;i<sumCalculators.size();i++){
- if (sumCalculators[i]->getMultiple() == true) {
- sumCalculators[i]->getValues(thisLookup);
+ }else{
+ int process = 1;
+ vector<int> processIDS;
+
+ //loop through and create all the processes you want
+ while (process != processors) {
+ int pid = fork();
+
+ if (pid > 0) {
+ processIDS.push_back(pid);
+ process++;
+ }else if (pid == 0){
+ driver(thisLookup, lines[process].start, lines[process].end, sumFileName + toString(getpid()) + ".temp", sumAllFileName + toString(getpid()) + ".temp", calcDists);
- if (m->control_pressed) { return 1; }
+ //only do this if you want a distance file
+ if (createPhylip) {
+ string tempdistFileName = m->getRootName(m->getSimpleName(sumFileName)) + toString(getpid()) + ".dist";
+ ofstream outtemp;
+ m->openOutputFile(tempdistFileName, outtemp);
+
+ for (int i = 0; i < calcDists.size(); i++) {
+ outtemp << calcDists[i].size() << endl;
+
+ for (int j = 0; j < calcDists[i].size(); j++) {
+ outtemp << calcDists[i][j].seq1 << '\t' << calcDists[i][j].seq2 << '\t' << calcDists[i][j].dist << endl;
+ }
+ }
+ outtemp.close();
+ }
- outAll << '\t';
- sumCalculators[i]->print(outAll);
- }
+ exit(0);
+ }else { m->mothurOut("unable to spawn the necessary processes."); m->mothurOutEndLine(); exit(0); }
}
- outAll << endl;
- }
-
- int n = 1;
- vector<SharedRAbundVector*> subset;
- for (int k = 0; k < (thisLookup.size() - 1); k++) { // pass cdd each set of groups to commpare
- for (int l = n; l < thisLookup.size(); l++) {
-
- outputFileHandle << thisLookup[0]->getLabel() << '\t';
+
+ //parent do your part
+ driver(thisLookup, lines[0].start, lines[0].end, sumFileName + toString(getpid()) + ".temp", sumAllFileName + toString(getpid()) + ".temp", calcDists);
+ m->appendFiles((sumFileName + toString(getpid()) + ".temp"), sumFileName);
+ remove((sumFileName + toString(getpid()) + ".temp").c_str());
+ if (mult) { m->appendFiles((sumAllFileName + toString(getpid()) + ".temp"), sumAllFileName); }
- subset.clear(); //clear out old pair of sharedrabunds
- //add new pair of sharedrabunds
- subset.push_back(thisLookup[k]); subset.push_back(thisLookup[l]);
+ //force parent to wait until all the processes are done
+ for (int i = 0; i < processIDS.size(); i++) {
+ int temp = processIDS[i];
+ wait(&temp);
+ }
+
+ for (int i = 0; i < processIDS.size(); i++) {
+ m->appendFiles((sumFileName + toString(processIDS[i]) + ".temp"), sumFileName);
+ remove((sumFileName + toString(processIDS[i]) + ".temp").c_str());
+ if (mult) { remove((sumAllFileName + toString(processIDS[i]) + ".temp").c_str()); }
- //sort groups to be alphanumeric
- if (thisLookup[k]->getGroup() > thisLookup[l]->getGroup()) {
- outputFileHandle << (thisLookup[l]->getGroup() +'\t' + thisLookup[k]->getGroup()) << '\t'; //print out groups
- }else{
- outputFileHandle << (thisLookup[k]->getGroup() +'\t' + thisLookup[l]->getGroup()) << '\t'; //print out groups
+ if (createPhylip) {
+ string tempdistFileName = m->getRootName(m->getSimpleName(sumFileName)) + toString(processIDS[i]) + ".dist";
+ ifstream intemp;
+ m->openInputFile(tempdistFileName, intemp);
+
+ for (int i = 0; i < calcDists.size(); i++) {
+ int size = 0;
+ intemp >> size; m->gobble(intemp);
+
+ for (int j = 0; j < size; j++) {
+ int seq1 = 0;
+ int seq2 = 0;
+ float dist = 1.0;
+
+ intemp >> seq1 >> seq2 >> dist; m->gobble(intemp);
+
+ seqDist tempDist(seq1, seq2, dist);
+ calcDists[i].push_back(tempDist);
+ }
+ }
+ intemp.close();
+ remove(tempdistFileName.c_str());
}
-
- for(int i=0;i<sumCalculators.size();i++) {
+ }
- sumCalculators[i]->getValues(subset); //saves the calculator outputs
-
- if (m->control_pressed) { return 1; }
-
- outputFileHandle << '\t';
- sumCalculators[i]->print(outputFileHandle);
+ }
+ #else
+ driver(thisLookup, 0, numGroups, (sumFileName + ".temp"), (sumAllFileName + ".temp"), calcDists);
+ m->appendFiles((sumFileName + ".temp"), sumFileName);
+ remove((sumFileName + ".temp").c_str());
+ if (mult) {
+ m->appendFiles((sumAllFileName + ".temp"), sumAllFileName);
+ remove((sumAllFileName + ".temp").c_str());
+ }
+ #endif
+
+ if (createPhylip) {
+ for (int i = 0; i < calcDists.size(); i++) {
+ if (m->control_pressed) { break; }
+
+ string distFileName = outputDir + m->getRootName(m->getSimpleName(sumFileName)) + sumCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + ".dist";
+ outputNames.push_back(distFileName);
+ ofstream outDist;
+ m->openOutputFile(distFileName, outDist);
+ outDist.setf(ios::fixed, ios::floatfield); outDist.setf(ios::showpoint);
+
+ //initialize matrix
+ vector< vector<float> > matrix; //square matrix to represent the distance
+ matrix.resize(thisLookup.size());
+ for (int k = 0; k < thisLookup.size(); k++) { matrix[k].resize(thisLookup.size(), 0.0); }
+
+
+ for (int j = 0; j < calcDists[i].size(); j++) {
+ int row = calcDists[i][j].seq1;
+ int column = calcDists[i][j].seq2;
+ float dist = calcDists[i][j].dist;
+
+ matrix[row][column] = dist;
+ matrix[column][row] = dist;
+ }
+
+ //output to file
+ outDist << thisLookup.size() << endl;
+ for (int r=0; r<thisLookup.size(); r++) {
+ //output name
+ string name = thisLookup[r]->getGroup();
+ if (name.length() < 10) { //pad with spaces to make compatible
+ while (name.length() < 10) { name += " "; }
}
- outputFileHandle << endl;
+ outDist << name << '\t';
+
+ //output distances
+ for (int l = 0; l < r; l++) { outDist << matrix[r][l] << '\t'; }
+ outDist << endl;
}
- n++;
+
+ outDist.close();
}
- return 0;
+ }
}
catch(exception& e) {
m->errorOut(e, "SummarySharedCommand", "process");
exit(1);
}
}
+/**************************************************************************************************/
+int SummarySharedCommand::driver(vector<SharedRAbundVector*> thisLookup, int start, int end, string sumFile, string sumAllFile, vector< vector<seqDist> >& calcDists) {
+ try {
+
+ //loop through calculators and add to file all for all calcs that can do mutiple groups
+ if (mult == true) {
+ ofstream outAll;
+ m->openOutputFile(sumAllFile, outAll);
+
+ //output label
+ outAll << thisLookup[0]->getLabel() << '\t';
+
+ //output groups names
+ string outNames = "";
+ for (int j = 0; j < thisLookup.size(); j++) {
+ outNames += thisLookup[j]->getGroup() + "-";
+ }
+ outNames = outNames.substr(0, outNames.length()-1); //rip off extra '-';
+ outAll << outNames << '\t';
+
+ for(int i=0;i<sumCalculators.size();i++){
+ if (sumCalculators[i]->getMultiple() == true) {
+ sumCalculators[i]->getValues(thisLookup);
+
+ if (m->control_pressed) { outAll.close(); return 1; }
+
+ outAll << '\t';
+ sumCalculators[i]->print(outAll);
+ }
+ }
+ outAll << endl;
+ outAll.close();
+ }
+
+ ofstream outputFileHandle;
+ m->openOutputFile(sumFile, outputFileHandle);
+
+ vector<SharedRAbundVector*> subset;
+ for (int k = start; k < end; k++) { // pass cdd each set of groups to compare
+
+ for (int l = 0; l < k; l++) {
+
+ outputFileHandle << thisLookup[0]->getLabel() << '\t';
+
+ subset.clear(); //clear out old pair of sharedrabunds
+ //add new pair of sharedrabunds
+ subset.push_back(thisLookup[k]); subset.push_back(thisLookup[l]);
+
+ //sort groups to be alphanumeric
+ if (thisLookup[k]->getGroup() > thisLookup[l]->getGroup()) {
+ outputFileHandle << (thisLookup[l]->getGroup() +'\t' + thisLookup[k]->getGroup()) << '\t'; //print out groups
+ }else{
+ outputFileHandle << (thisLookup[k]->getGroup() +'\t' + thisLookup[l]->getGroup()) << '\t'; //print out groups
+ }
+
+ for(int i=0;i<sumCalculators.size();i++) {
+
+ vector<double> tempdata = sumCalculators[i]->getValues(subset); //saves the calculator outputs
+
+ if (m->control_pressed) { outputFileHandle.close(); return 1; }
+
+ outputFileHandle << '\t';
+ sumCalculators[i]->print(outputFileHandle);
+
+ seqDist temp(l, k, tempdata[0]);
+ calcDists[i].push_back(temp);
+ }
+ outputFileHandle << endl;
+ }
+ }
+
+ outputFileHandle.close();
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SummarySharedCommand", "driver");
+ exit(1);
+ }
+}
+/**************************************************************************************************/
+
-/***********************************************************/