#include "unifracweightedcommand.h"
+//**********************************************************************************************************************
+vector<string> UnifracWeightedCommand::getValidParameters(){
+ try {
+ string Array[] = {"groups","iters","distance","random","processors","root","outputdir","inputdir"};
+ vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
+ return myArray;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "UnifracWeightedCommand", "getValidParameters");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+UnifracWeightedCommand::UnifracWeightedCommand(){
+ try {
+ abort = true; calledHelp = true;
+ vector<string> tempOutNames;
+ outputTypes["weighted"] = tempOutNames;
+ outputTypes["wsummary"] = tempOutNames;
+ outputTypes["phylip"] = tempOutNames;
+ outputTypes["column"] = tempOutNames;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "UnifracWeightedCommand", "UnifracWeightedCommand");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+vector<string> UnifracWeightedCommand::getRequiredParameters(){
+ try {
+ vector<string> myArray;
+ return myArray;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "UnifracWeightedCommand", "getRequiredParameters");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+vector<string> UnifracWeightedCommand::getRequiredFiles(){
+ try {
+ string Array[] = {"tree","group"};
+ vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
+
+ return myArray;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "UnifracWeightedCommand", "getRequiredFiles");
+ exit(1);
+ }
+}
/***********************************************************/
UnifracWeightedCommand::UnifracWeightedCommand(string option) {
try {
globaldata = GlobalData::getInstance();
- abort = false;
+ abort = false; calledHelp = false;
Groups.clear();
//allow user to run help
- if(option == "help") { help(); abort = true; }
+ if(option == "help") { help(); abort = true; calledHelp = true; }
else {
//valid paramters for this command
- string Array[] = {"groups","iters","distance","random","processors","outputdir","inputdir"};
+ string Array[] = {"groups","iters","distance","random","processors","root","outputdir","inputdir"};
vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
OptionParser parser(option);
if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
}
+ //initialize outputTypes
+ vector<string> tempOutNames;
+ outputTypes["weighted"] = tempOutNames;
+ outputTypes["wsummary"] = tempOutNames;
+ outputTypes["phylip"] = tempOutNames;
+ outputTypes["column"] = tempOutNames;
+
if (globaldata->gTree.size() == 0) {//no trees were read
m->mothurOut("You must execute the read.tree command, before you may execute the unifrac.weighted command."); m->mothurOutEndLine(); abort = true; }
itersString = validParameter.validFile(parameters, "iters", false); if (itersString == "not found") { itersString = "1000"; }
convert(itersString, iters);
- string temp = validParameter.validFile(parameters, "distance", false); if (temp == "not found") { temp = "false"; }
- phylip = m->isTrue(temp);
-
- temp = validParameter.validFile(parameters, "random", false); if (temp == "not found") { temp = "F"; }
+ string temp = validParameter.validFile(parameters, "distance", false);
+ if (temp == "not found") { phylip = false; outputForm = ""; }
+ else{
+ if ((temp == "lt") || (temp == "column") || (temp == "square")) { phylip = true; outputForm = temp; }
+ else { m->mothurOut("Options for distance are: lt, square, or column. Using lt."); m->mothurOutEndLine(); phylip = true; outputForm = "lt"; }
+ }
+
+ temp = validParameter.validFile(parameters, "random", false); if (temp == "not found") { temp = "F"; }
random = m->isTrue(temp);
+ temp = validParameter.validFile(parameters, "root", false); if (temp == "not found") { temp = "F"; }
+ includeRoot = m->isTrue(temp);
+
temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = "1"; }
convert(temp, processors);
tmap = globaldata->gTreemap;
sumFile = outputDir + m->getSimpleName(globaldata->getTreeFile()) + ".wsummary";
m->openOutputFile(sumFile, outSum);
- outputNames.push_back(sumFile);
+ outputNames.push_back(sumFile); outputTypes["wsummary"].push_back(sumFile);
util = new SharedUtil();
string s; //to make work with setgroups
util->setGroups(globaldata->Groups, tmap->namesOfGroups, s, numGroups, "weighted"); //sets the groups the user wants to analyze
util->getCombos(groupComb, globaldata->Groups, numComp);
- weighted = new Weighted(tmap);
+ weighted = new Weighted(tmap, includeRoot);
}
}
void UnifracWeightedCommand::help(){
try {
m->mothurOut("The unifrac.weighted command can only be executed after a successful read.tree command.\n");
- m->mothurOut("The unifrac.weighted command parameters are groups, iters, distance and random. No parameters are required.\n");
+ m->mothurOut("The unifrac.weighted command parameters are groups, iters, distance, processors, root and random. No parameters are required.\n");
m->mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 2 valid groups.\n");
m->mothurOut("The group names are separated by dashes. The iters parameter allows you to specify how many random trees you would like compared to your tree.\n");
m->mothurOut("The distance parameter allows you to create a distance file from the results. The default is false.\n");
m->mothurOut("The random parameter allows you to shut off the comparison to random trees. The default is false, meaning don't compare your trees with randomly generated trees.\n");
+ m->mothurOut("The root parameter allows you to include the entire root in your calculations. The default is false, meaning stop at the root for this comparision instead of the root of the entire tree.\n");
+ m->mothurOut("The processors parameter allows you to specify the number of processors to use. The default is 1.\n");
m->mothurOut("The unifrac.weighted command should be in the following format: unifrac.weighted(groups=yourGroups, iters=yourIters).\n");
m->mothurOut("Example unifrac.weighted(groups=A-B-C, iters=500).\n");
m->mothurOut("The default value for groups is all the groups in your groupfile, and iters is 1000.\n");
int UnifracWeightedCommand::execute() {
try {
- if (abort == true) { return 0; }
+ if (abort == true) { if (calledHelp) { return 0; } return 2; }
int start = time(NULL);
//get weighted for users tree
userData.resize(numComp,0); //data[0] = weightedscore AB, data[1] = weightedscore AC...
randomData.resize(numComp,0); //data[0] = weightedscore AB, data[1] = weightedscore AC...
-
- //create new tree with same num nodes and leaves as users
- randT = new Tree();
+ if (numComp < processors) { processors = numComp; }
+
//get weighted scores for users trees
for (int i = 0; i < T.size(); i++) {
- if (m->control_pressed) { delete randT; outSum.close(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
+ if (m->control_pressed) { outSum.close(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
counter = 0;
rScores.resize(numComp); //data[0] = weightedscore AB, data[1] = weightedscore AC...
if (random) {
output = new ColumnFile(outputDir + m->getSimpleName(globaldata->getTreeFile()) + toString(i+1) + ".weighted", itersString);
outputNames.push_back(outputDir + m->getSimpleName(globaldata->getTreeFile()) + toString(i+1) + ".weighted");
+ outputTypes["weighted"].push_back(outputDir + m->getSimpleName(globaldata->getTreeFile()) + toString(i+1) + ".weighted");
}
userData = weighted->getValues(T[i], processors, outputDir); //userData[0] = weightedscore
- if (m->control_pressed) {
- delete randT;
- if (random) { delete output; }
- outSum.close();
- for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); }
- return 0;
- }
-
+ if (m->control_pressed) { if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
//save users score
for (int s=0; s<numComp; s++) {
}
if (random) {
- vector<double> sums = weighted->getBranchLengthSums(T[i]);
//calculate number of comparisons i.e. with groups A,B,C = AB, AC, BC = 3;
vector< vector<string> > namesOfGroupCombos;
for (int a=0; a<numGroups; a++) {
- for (int l = a+1; l < numGroups; l++) {
+ for (int l = 0; l < a; l++) {
vector<string> groups; groups.push_back(globaldata->Groups[a]); groups.push_back(globaldata->Groups[l]);
namesOfGroupCombos.push_back(groups);
}
}
-
+
+ lines.clear();
+
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
if(processors != 1){
int numPairs = namesOfGroupCombos.size();
if(i == processors - 1){
numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
}
- lines.push_back(new linePair(startPos, numPairsPerProcessor));
+ lines.push_back(linePair(startPos, numPairsPerProcessor));
}
}
#endif
//get scores for random trees
for (int j = 0; j < iters; j++) {
- int count = 0;
-
+
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
if(processors == 1){
- driver(T[i], randT, namesOfGroupCombos, 0, namesOfGroupCombos.size(), sums, rScores);
+ driver(T[i], namesOfGroupCombos, 0, namesOfGroupCombos.size(), rScores);
}else{
- createProcesses(T[i], randT, namesOfGroupCombos, sums, rScores);
- for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear();
+ createProcesses(T[i], namesOfGroupCombos, rScores);
}
#else
- driver(T[i], randT, namesOfGroupCombos, 0, namesOfGroupCombos.size(), sums, rScores);
+ driver(T[i], namesOfGroupCombos, 0, namesOfGroupCombos.size(), rScores);
#endif
if (m->control_pressed) { delete output; outSum.close(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
+
+ //report progress
+// m->mothurOut("Iter: " + toString(j+1)); m->mothurOutEndLine();
}
-
-
+ lines.clear();
+
//find the signifigance of the score for summary file
for (int f = 0; f < numComp; f++) {
//sort random scores
}
- if (m->control_pressed) { delete randT; outSum.close(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
+ if (m->control_pressed) { outSum.close(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
printWSummaryFile();
//clear out users groups
globaldata->Groups.clear();
- delete randT;
if (m->control_pressed) {
for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); }
}
/**************************************************************************************************/
-int UnifracWeightedCommand::createProcesses(Tree* t, Tree* randT, vector< vector<string> > namesOfGroupCombos, vector<double>& sums, vector< vector<double> >& scores) {
+int UnifracWeightedCommand::createProcesses(Tree* t, vector< vector<string> > namesOfGroupCombos, vector< vector<double> >& scores) {
try {
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
- int process = 0;
- int num = 0;
+ int process = 1;
vector<int> processIDS;
EstOutput results;
processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
process++;
}else if (pid == 0){
- driver(t, randT, namesOfGroupCombos, lines[process]->start, lines[process]->num, sums, scores);
-
- m->mothurOut("Merging results."); m->mothurOutEndLine();
-
+ driver(t, namesOfGroupCombos, lines[process].start, lines[process].num, scores);
+
//pass numSeqs to parent
ofstream out;
- string tempFile = outputDir + toString(getpid()) + ".results.temp";
+ string tempFile = outputDir + toString(getpid()) + ".weightedcommand.results.temp";
m->openOutputFile(tempFile, out);
- out << results.size() << endl;
- for (int i = lines[process]->start; i < (lines[process]->start + lines[process]->num); i++) { out << results[i] << '\t'; } out << endl;
+ for (int i = lines[process].start; i < (lines[process].start + lines[process].num); i++) { out << scores[i][(scores[i].size()-1)] << '\t'; } out << endl;
out.close();
exit(0);
- }else { m->mothurOut("unable to spawn the necessary processes."); m->mothurOutEndLine(); exit(0); }
+ }else {
+ m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
+ for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
+ exit(0);
+ }
}
+ driver(t, namesOfGroupCombos, lines[0].start, lines[0].num, scores);
+
//force parent to wait until all the processes are done
- for (int i=0;i<processors;i++) {
+ for (int i=0;i<(processors-1);i++) {
int temp = processIDS[i];
wait(&temp);
}
-
+
//get data created by processes
- for (int i=0;i<processors;i++) {
+ for (int i=0;i<(processors-1);i++) {
+
ifstream in;
- string s = outputDir + toString(processIDS[i]) + ".results.temp";
+ string s = outputDir + toString(processIDS[i]) + ".weightedcommand.results.temp";
m->openInputFile(s, in);
- vector<double> r;
-
- //get quantiles
- while (!in.eof()) {
- int num;
- in >> num;
-
- m->gobble(in);
-
- double w;
- for (int j = 0; j < num; j++) {
- in >> w;
- r.push_back(w);
- }
- m->gobble(in);
- }
+ double tempScore;
+ for (int j = lines[(i+1)].start; j < (lines[(i+1)].start + lines[(i+1)].num); j++) { in >> tempScore; scores[j].push_back(tempScore); }
in.close();
remove(s.c_str());
-
- //save quan in quantiles
- for (int j = 0; j < r.size(); j++) {
- //put all values of r into results
- results.push_back(r[j]);
- }
}
- m->mothurOut("DONE."); m->mothurOutEndLine(); m->mothurOutEndLine();
-
return 0;
#endif
}
}
/**************************************************************************************************/
-int UnifracWeightedCommand::driver(Tree* t, Tree* randT, vector< vector<string> > namesOfGroupCombos, int start, int num, vector<double>& sums, vector< vector<double> >& scores) {
+int UnifracWeightedCommand::driver(Tree* t, vector< vector<string> > namesOfGroupCombos, int start, int num, vector< vector<double> >& scores) {
try {
- int count = 0;
- int total = start+num;
- int twentyPercent = (total * 0.20);
+ Tree* randT = new Tree();
for (int h = start; h < (start+num); h++) {
-
+
if (m->control_pressed) { return 0; }
//initialize weighted score
if (m->control_pressed) { delete randT; return 0; }
-
//get wscore of random tree
- EstOutput randomData = weighted->getValues(randT, groupA, groupB, sums);
-
+ EstOutput randomData = weighted->getValues(randT, groupA, groupB);
+
if (m->control_pressed) { delete randT; return 0; }
//save scores
scores[h].push_back(randomData[0]);
-
- count++;
-
- //report progress
- if((h) % twentyPercent == 0){ m->mothurOut("Random comparison percentage complete: " + toString(int((h / (float)total) * 100.0))); m->mothurOutEndLine(); }
}
-
- m->mothurOut("Random comparison percentage complete: 100"); m->mothurOutEndLine();
-
+
+ delete randT;
+
return 0;
}
if (WScoreSig[count] > (1/(float)iters)) {
outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << WScoreSig[count] << endl;
cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << WScoreSig[count] << endl;
- m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\t" + toString(WScoreSig[count])); m->mothurOutEndLine();
+ m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\t" + toString(WScoreSig[count]) + "\n");
}else{
outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl;
cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl;
- m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\t<" + toString((1/float(iters)))); m->mothurOutEndLine();
+ m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\t<" + toString((1/float(iters))) + "\n");
}
}else{
outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << "0.00" << endl;
cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << "0.00" << endl;
- m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\t0.00"); m->mothurOutEndLine();
+ m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\t0.00\n");
}
count++;
}
//for each tree
for (int i = 0; i < T.size(); i++) {
- string phylipFileName = outputDir + m->getSimpleName(globaldata->getTreeFile()) + toString(i+1) + ".weighted.dist";
- outputNames.push_back(phylipFileName);
+ string phylipFileName;
+ if ((outputForm == "lt") || (outputForm == "square")) {
+ phylipFileName = outputDir + m->getSimpleName(globaldata->getTreeFile()) + toString(i+1) + ".weighted.phylip.dist";
+ outputNames.push_back(phylipFileName); outputTypes["phylip"].push_back(phylipFileName);
+ }else { //column
+ phylipFileName = outputDir + m->getSimpleName(globaldata->getTreeFile()) + toString(i+1) + ".weighted.column.dist";
+ outputNames.push_back(phylipFileName); outputTypes["column"].push_back(phylipFileName);
+ }
+
ofstream out;
m->openOutputFile(phylipFileName, out);
- //output numSeqs
- out << globaldata->Groups.size() << endl;
-
+ if ((outputForm == "lt") || (outputForm == "square")) {
+ //output numSeqs
+ out << globaldata->Groups.size() << endl;
+ }
+
//make matrix with scores in it
vector< vector<float> > dists; dists.resize(globaldata->Groups.size());
for (int i = 0; i < globaldata->Groups.size(); i++) {
//flip it so you can print it
for (int r=0; r<globaldata->Groups.size(); r++) {
- for (int l = r+1; l < globaldata->Groups.size(); l++) {
+ for (int l = 0; l < r; l++) {
dists[r][l] = utreeScores[count];
dists[l][r] = utreeScores[count];
count++;
if (name.length() < 10) { //pad with spaces to make compatible
while (name.length() < 10) { name += " "; }
}
- out << name << '\t';
- //output distances
- for (int l = 0; l < r; l++) { out << dists[r][l] << '\t'; }
- out << endl;
+ if (outputForm == "lt") {
+ out << name << '\t';
+
+ //output distances
+ for (int l = 0; l < r; l++) { out << dists[r][l] << '\t'; }
+ out << endl;
+ }else if (outputForm == "square") {
+ out << name << '\t';
+
+ //output distances
+ for (int l = 0; l < globaldata->Groups.size(); l++) { out << dists[r][l] << '\t'; }
+ out << endl;
+ }else{
+ //output distances
+ for (int l = 0; l < r; l++) {
+ string otherName = globaldata->Groups[l];
+ if (otherName.length() < 10) { //pad with spaces to make compatible
+ while (otherName.length() < 10) { otherName += " "; }
+ }
+
+ out << name << '\t' << otherName << dists[r][l] << endl;
+ }
+ }
}
out.close();
}