From a98eb683e17d8e49583bf2d215ab7562a4cdca75 Mon Sep 17 00:00:00 2001 From: westcott Date: Wed, 1 Sep 2010 16:57:44 +0000 Subject: [PATCH] unifrac parallelization done, changed dist.seqs output=square to calc all distance twice instead of converting the matrix. --- distancecommand.cpp | 238 +++++++++++++++++++++++--------- distancecommand.h | 8 +- unifracunweightedcommand.cpp | 31 ++--- unifracweightedcommand.cpp | 69 +++++----- unifracweightedcommand.h | 7 +- unweighted.cpp | 259 ++++++++++++++++++++++------------- unweighted.h | 7 +- weighted.cpp | 9 +- weighted.h | 2 +- 9 files changed, 401 insertions(+), 229 deletions(-) diff --git a/distancecommand.cpp b/distancecommand.cpp index 351a3f4..b3f0a2d 100644 --- a/distancecommand.cpp +++ b/distancecommand.cpp @@ -237,10 +237,15 @@ int DistanceCommand::execute(){ MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are //each process gets where it should start and stop in the file - start = int (sqrt(float(pid)/float(processors)) * numSeqs); - end = int (sqrt(float(pid+1)/float(processors)) * numSeqs); + if (output != "square") { + start = int (sqrt(float(pid)/float(processors)) * numSeqs); + end = int (sqrt(float(pid+1)/float(processors)) * numSeqs); + }else{ + start = int ((float(pid)/float(processors)) * numSeqs); + end = int ((float(pid+1)/float(processors)) * numSeqs); + } - if (output != "lt") { + if (output == "column") { MPI_File outMPI; int amode=MPI_MODE_CREATE|MPI_MODE_WRONLY; @@ -257,7 +262,8 @@ int DistanceCommand::execute(){ //do your part string outputMyPart; - driverMPI(start, end, outMPI, cutoff); + + driverMPI(start, end, outMPI, cutoff); if (m->control_pressed) { MPI_File_close(&outMPI); delete distCalculator; return 0; } @@ -270,7 +276,7 @@ int DistanceCommand::execute(){ } }else { //you are a child process //do your part - driverMPI(start, end, outMPI, cutoff); + driverMPI(start, end, outMPI, cutoff); if (m->control_pressed) { MPI_File_close(&outMPI); delete distCalculator; return 0; } @@ -287,8 +293,10 @@ int DistanceCommand::execute(){ //do your part string outputMyPart; - long mySize; - driverMPI(start, end, outputFile, mySize); + unsigned long int mySize; + + if (output != "square"){ driverMPI(start, end, outputFile, mySize); } + else { driverMPI(start, end, outputFile, mySize, output); } if (m->control_pressed) { delete distCalculator; return 0; } @@ -307,7 +315,7 @@ int DistanceCommand::execute(){ //wait on chidren for(int b = 1; b < processors; b++) { - long fileSize; + unsigned long int fileSize; if (m->control_pressed) { MPI_File_close(&outMPI); delete distCalculator; return 0; } @@ -335,8 +343,9 @@ int DistanceCommand::execute(){ MPI_File_close(&outMPI); }else { //you are a child process //do your part - long size; - driverMPI(start, end, (outputFile + toString(pid) + ".temp"), size); + unsigned long int size; + if (output != "square"){ driverMPI(start, end, (outputFile + toString(pid) + ".temp"), size); } + else { driverMPI(start, end, (outputFile + toString(pid) + ".temp"), size, output); } if (m->control_pressed) { delete distCalculator; return 0; } @@ -350,13 +359,19 @@ int DistanceCommand::execute(){ #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) //if you don't need to fork anything if(processors == 1){ - driver(0, numSeqs, outputFile, cutoff); + if (output != "square") { driver(0, numSeqs, outputFile, cutoff); } + else { driver(0, numSeqs, outputFile, "square"); } }else{ //you have multiple processors for (int i = 0; i < processors; i++) { lines.push_back(new linePair()); - lines[i]->start = int (sqrt(float(i)/float(processors)) * numSeqs); - lines[i]->end = int (sqrt(float(i+1)/float(processors)) * numSeqs); + if (output != "square") { + lines[i]->start = int (sqrt(float(i)/float(processors)) * numSeqs); + lines[i]->end = int (sqrt(float(i+1)/float(processors)) * numSeqs); + }else{ + lines[i]->start = int ((float(i)/float(processors)) * numSeqs); + lines[i]->end = int ((float(i+1)/float(processors)) * numSeqs); + } } createProcesses(outputFile); @@ -372,8 +387,9 @@ int DistanceCommand::execute(){ } } #else - ifstream inFASTA; - driver(0, numSeqs, outputFile, cutoff); + //ifstream inFASTA; + if (output != "square") { driver(0, numSeqs, outputFile, cutoff); } + else { driver(0, numSeqs, outputFile, "square"); } #endif #endif @@ -385,7 +401,7 @@ int DistanceCommand::execute(){ if (pid == 0) { //only one process should output to screen #endif - if (output == "square") { convertMatrix(outputFile); } + //if (output == "square") { convertMatrix(outputFile); } ifstream fileHandle; fileHandle.open(outputFile.c_str()); @@ -445,7 +461,8 @@ void DistanceCommand::createProcesses(string filename) { processIDS[lines[process]->end] = pid; //create map from line number to pid so you can append files in correct order later process++; }else if (pid == 0){ - driver(lines[process]->start, lines[process]->end, filename + toString(getpid()) + ".temp", cutoff); + if (output != "square") { driver(lines[process]->start, lines[process]->end, filename + toString(getpid()) + ".temp", cutoff); } + else { driver(lines[process]->start, lines[process]->end, filename + toString(getpid()) + ".temp", "square"); } exit(0); }else { m->mothurOut("unable to spawn the necessary processes."); m->mothurOutEndLine(); exit(0); } } @@ -500,12 +517,6 @@ int DistanceCommand::driver(int startLine, int endLine, string dFileName, float if (output == "column") { outFile << alignDB.get(i).getName() << ' ' << alignDB.get(j).getName() << ' ' << dist << endl; } } if (output == "lt") { outFile << dist << '\t'; } - - if (output == "square") { //make a square column you can convert to square phylip - outFile << alignDB.get(i).getName() << '\t' << alignDB.get(j).getName() << '\t' << dist << endl; - outFile << alignDB.get(j).getName() << '\t' << alignDB.get(i).getName() << '\t' << dist << endl; - } - } if (output == "lt") { outFile << endl; } @@ -526,6 +537,56 @@ int DistanceCommand::driver(int startLine, int endLine, string dFileName, float exit(1); } } +/**************************************************************************************************/ +/////// need to fix to work with calcs and sequencedb +int DistanceCommand::driver(int startLine, int endLine, string dFileName, string square){ + try { + + int startTime = time(NULL); + + //column file + ofstream outFile(dFileName.c_str(), ios::trunc); + outFile.setf(ios::fixed, ios::showpoint); + outFile << setprecision(4); + + if(startLine == 0){ outFile << alignDB.getNumSeqs() << endl; } + + for(int i=startLine;icontrol_pressed) { outFile.close(); return 0; } + + distCalculator->calcDist(alignDB.get(i), alignDB.get(j)); + double dist = distCalculator->getDist(); + + outFile << dist << '\t'; + } + + outFile << endl; + + if(i % 100 == 0){ + m->mothurOut(toString(i) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine(); + } + + } + m->mothurOut(toString(endLine-1) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine(); + + outFile.close(); + + return 1; + } + catch(exception& e) { + m->errorOut(e, "DistanceCommand", "driver"); + exit(1); + } +} #ifdef USE_MPI /**************************************************************************************************/ /////// need to fix to work with calcs and sequencedb @@ -550,12 +611,7 @@ int DistanceCommand::driverMPI(int startLine, int endLine, MPI_File& outMPI, flo double dist = distCalculator->getDist(); if(dist <= cutoff){ - if (output == "column") { outputString += (alignDB.get(i).getName() + ' ' + alignDB.get(j).getName() + ' ' + toString(dist) + '\n'); } - } - - if (output == "square") { //make a square column you can convert to square phylip - outputString += (alignDB.get(i).getName() + ' ' + alignDB.get(j).getName() + ' ' + toString(dist) + '\n'); - outputString += (alignDB.get(j).getName() + ' ' + alignDB.get(i).getName() + ' ' + toString(dist) + '\n'); + outputString += (alignDB.get(i).getName() + ' ' + alignDB.get(j).getName() + ' ' + toString(dist) + '\n'); } } @@ -588,7 +644,7 @@ int DistanceCommand::driverMPI(int startLine, int endLine, MPI_File& outMPI, flo } /**************************************************************************************************/ /////// need to fix to work with calcs and sequencedb -int DistanceCommand::driverMPI(int startLine, int endLine, string file, long& size){ +int DistanceCommand::driverMPI(int startLine, int endLine, string file, unsigned long int& size){ try { MPI_Status status; @@ -609,16 +665,16 @@ int DistanceCommand::driverMPI(int startLine, int endLine, string file, long& si string outputString = ""; size = 0; - if((output == "lt") && startLine == 0){ outputString += toString(alignDB.getNumSeqs()) + "\n"; } + if(startLine == 0){ outputString += toString(alignDB.getNumSeqs()) + "\n"; } for(int i=startLine;icontrol_pressed) { return 0; } @@ -626,10 +682,10 @@ int DistanceCommand::driverMPI(int startLine, int endLine, string file, long& si distCalculator->calcDist(alignDB.get(i), alignDB.get(j)); double dist = distCalculator->getDist(); - if (output == "lt") { outputString += toString(dist) + "\t"; } + outputString += toString(dist) + "\t"; } - if (output == "lt") { outputString += "\n"; } + outputString += "\n"; if(i % 100 == 0){ @@ -660,8 +716,82 @@ int DistanceCommand::driverMPI(int startLine, int endLine, string file, long& si exit(1); } } -#endif /**************************************************************************************************/ +/////// need to fix to work with calcs and sequencedb +int DistanceCommand::driverMPI(int startLine, int endLine, string file, unsigned long int& size, string square){ + try { + MPI_Status status; + + MPI_File outMPI; + int amode=MPI_MODE_CREATE|MPI_MODE_WRONLY; + + //char* filename = new char[file.length()]; + //memcpy(filename, file.c_str(), file.length()); + + char filename[1024]; + strcpy(filename, file.c_str()); + + MPI_File_open(MPI_COMM_SELF, filename, amode, MPI_INFO_NULL, &outMPI); + //delete filename; + + int startTime = time(NULL); + + string outputString = ""; + size = 0; + + if(startLine == 0){ outputString += toString(alignDB.getNumSeqs()) + "\n"; } + + for(int i=startLine;icontrol_pressed) { return 0; } + + distCalculator->calcDist(alignDB.get(i), alignDB.get(j)); + double dist = distCalculator->getDist(); + + outputString += toString(dist) + "\t"; + } + + outputString += "\n"; + + + if(i % 100 == 0){ + //m->mothurOut(toString(i) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine(); + cout << i << '\t' << (time(NULL) - startTime) << endl; + } + + + //send results to parent + int length = outputString.length(); + char* buf = new char[length]; + memcpy(buf, outputString.c_str(), length); + + MPI_File_write(outMPI, buf, length, MPI_CHAR, &status); + size += outputString.length(); + outputString = ""; + delete buf; + } + + //m->mothurOut(toString(endLine-1) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine(); + cout << (endLine-1) << '\t' << (time(NULL) - startTime) << endl; + MPI_File_close(&outMPI); + + return 1; + } + catch(exception& e) { + m->errorOut(e, "DistanceCommand", "driverMPI"); + exit(1); + } +} +#endif +/************************************************************************************************** int DistanceCommand::convertMatrix(string outputFile) { try{ @@ -749,7 +879,7 @@ int DistanceCommand::convertMatrix(string outputFile) { exit(1); } } -/**************************************************************************************************/ +/************************************************************************************************** int DistanceCommand::convertToLowerTriangle(string outputFile) { try{ @@ -933,30 +1063,8 @@ bool DistanceCommand::sanityCheck() { exit(1); } } - -/************************************************************************************************** -void DistanceCommand::m->appendFiles(string temp, string filename) { - try{ - ofstream output; - ifstream input; - - //open output file in append mode - m->openOutputFileAppend(filename, output); - m->openInputFile(temp, input); - - while(char c = input.get()){ - if(input.eof()) { break; } - else { output << c; } - } - - input.close(); - output.close(); - } - catch(exception& e) { - m->errorOut(e, "DistanceCommand", "m->appendFiles"); - exit(1); - } -} /**************************************************************************************************/ + + diff --git a/distancecommand.h b/distancecommand.h index 4a25761..64eac8d 100644 --- a/distancecommand.h +++ b/distancecommand.h @@ -46,15 +46,17 @@ private: //void m->appendFiles(string, string); void createProcesses(string); int driver(/*Dist*, SequenceDB, */int, int, string, float); + int driver(int, int, string, string); #ifdef USE_MPI int driverMPI(int, int, MPI_File&, float); - int driverMPI(int, int, string, long&); + int driverMPI(int, int, string, unsigned long int&); + int driverMPI(int, int, string, unsigned long int&, string); #endif - int convertMatrix(string); + //int convertMatrix(string); bool sanityCheck(); - int convertToLowerTriangle(string); + //int convertToLowerTriangle(string); }; diff --git a/unifracunweightedcommand.cpp b/unifracunweightedcommand.cpp index 7836eb6..a50d147 100644 --- a/unifracunweightedcommand.cpp +++ b/unifracunweightedcommand.cpp @@ -160,7 +160,7 @@ int UnifracUnweightedCommand::execute() { UWScoreSig.resize(numComp); userData = unweighted->getValues(T[i], processors, outputDir); //userData[0] = unweightedscore - + if (m->control_pressed) { if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); }return 0; } //output scores for each combination @@ -171,18 +171,14 @@ int UnifracUnweightedCommand::execute() { //add users score to validscores validScores[userData[k]] = userData[k]; } - + //get unweighted scores for random trees - if random is false iters = 0 for (int j = 0; j < iters; j++) { + //we need a different getValues because when we swap the labels we only want to swap those in each pairwise comparison - randomData = unweighted->getValues(T[i], "", ""); + randomData = unweighted->getValues(T[i], "", "", processors, outputDir); - if (m->control_pressed) { - if (random) { delete output; } - outSum.close(); - for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } - return 0; - } + if (m->control_pressed) { if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; } for(int k = 0; k < numComp; k++) { //add trees unweighted score to map of scores @@ -196,6 +192,9 @@ int UnifracUnweightedCommand::execute() { //add randoms score to validscores validScores[randomData[k]] = randomData[k]; } + + //report progress + m->mothurOut("Iter: " + toString(j+1)); m->mothurOutEndLine(); } for(int a = 0; a < numComp; a++) { @@ -216,13 +215,7 @@ int UnifracUnweightedCommand::execute() { } - - if (m->control_pressed) { - if (random) { delete output; } - outSum.close(); - for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } - return 0; - } + if (m->control_pressed) { if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; } //print output files printUWSummaryFile(i); @@ -299,16 +292,16 @@ void UnifracUnweightedCommand::printUWSummaryFile(int i) { if (UWScoreSig[a][0] > (1/(float)iters)) { outSum << setprecision(6) << groupComb[a] << '\t' << utreeScores[a][0] << '\t' << setprecision(itersString.length()) << UWScoreSig[a][0] << endl; cout << setprecision(6) << groupComb[a] << '\t' << utreeScores[a][0] << '\t' << setprecision(itersString.length()) << UWScoreSig[a][0] << endl; - m->mothurOutJustToLog(groupComb[a] + "\t" + toString(utreeScores[a][0]) + "\t" + toString(UWScoreSig[a][0])); m->mothurOutEndLine(); + m->mothurOutJustToLog(groupComb[a] + "\t" + toString(utreeScores[a][0]) + "\t" + toString(UWScoreSig[a][0])+ "\n"); }else { outSum << setprecision(6) << groupComb[a] << '\t' << utreeScores[a][0] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl; cout << setprecision(6) << groupComb[a] << '\t' << utreeScores[a][0] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl; - m->mothurOutJustToLog(groupComb[a] + "\t" + toString(utreeScores[a][0]) + "\t<" + toString((1/float(iters)))); m->mothurOutEndLine(); + m->mothurOutJustToLog(groupComb[a] + "\t" + toString(utreeScores[a][0]) + "\t<" + toString((1/float(iters))) + "\n"); } }else{ outSum << setprecision(6) << groupComb[a] << '\t' << utreeScores[a][0] << '\t' << "0.00" << endl; cout << setprecision(6) << groupComb[a] << '\t' << utreeScores[a][0] << '\t' << "0.00" << endl; - m->mothurOutJustToLog(groupComb[a] + "\t" + toString(utreeScores[a][0]) + "\t0.00"); m->mothurOutEndLine(); + m->mothurOutJustToLog(groupComb[a] + "\t" + toString(utreeScores[a][0]) + "\t0.00\n"); } } diff --git a/unifracweightedcommand.cpp b/unifracweightedcommand.cpp index 5a2a5ec..1417b5a 100644 --- a/unifracweightedcommand.cpp +++ b/unifracweightedcommand.cpp @@ -125,13 +125,10 @@ int UnifracWeightedCommand::execute() { userData.resize(numComp,0); //data[0] = weightedscore AB, data[1] = weightedscore AC... randomData.resize(numComp,0); //data[0] = weightedscore AB, data[1] = weightedscore AC... - //create new tree with same num nodes and leaves as users - randT = new Tree(); - //get weighted scores for users trees for (int i = 0; i < T.size(); i++) { - if (m->control_pressed) { delete randT; outSum.close(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; } + if (m->control_pressed) { outSum.close(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; } counter = 0; rScores.resize(numComp); //data[0] = weightedscore AB, data[1] = weightedscore AC... @@ -144,14 +141,7 @@ int UnifracWeightedCommand::execute() { userData = weighted->getValues(T[i], processors, outputDir); //userData[0] = weightedscore - if (m->control_pressed) { - delete randT; - if (random) { delete output; } - outSum.close(); - for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } - return 0; - } - + if (m->control_pressed) { if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; } //save users score for (int s=0; scontrol_pressed) { delete output; outSum.close(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; } @@ -208,9 +200,8 @@ int UnifracWeightedCommand::execute() { //report progress m->mothurOut("Iter: " + toString(j+1)); m->mothurOutEndLine(); } - - for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); - + lines.clear(); + //find the signifigance of the score for summary file for (int f = 0; f < numComp; f++) { //sort random scores @@ -241,7 +232,7 @@ int UnifracWeightedCommand::execute() { } - if (m->control_pressed) { delete randT; outSum.close(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; } + if (m->control_pressed) { outSum.close(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; } printWSummaryFile(); @@ -250,7 +241,6 @@ int UnifracWeightedCommand::execute() { //clear out users groups globaldata->Groups.clear(); - delete randT; if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } @@ -274,7 +264,7 @@ int UnifracWeightedCommand::execute() { } /**************************************************************************************************/ -int UnifracWeightedCommand::createProcesses(Tree* t, Tree* randT, vector< vector > namesOfGroupCombos, vector& sums, vector< vector >& scores) { +int UnifracWeightedCommand::createProcesses(Tree* t, vector< vector > namesOfGroupCombos, vector& sums, vector< vector >& scores) { try { #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) int process = 1; @@ -291,35 +281,36 @@ int UnifracWeightedCommand::createProcesses(Tree* t, Tree* randT, vector< vector processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later process++; }else if (pid == 0){ - driver(t, randT, namesOfGroupCombos, lines[process]->start, lines[process]->num, sums, scores); + driver(t, namesOfGroupCombos, lines[process].start, lines[process].num, sums, scores); //pass numSeqs to parent ofstream out; string tempFile = outputDir + toString(getpid()) + ".weightedcommand.results.temp"; m->openOutputFile(tempFile, out); - for (int i = lines[process]->start; i < (lines[process]->start + lines[process]->num); i++) { out << scores[i][(scores[i].size()-1)] << '\t'; } out << endl; + for (int i = lines[process].start; i < (lines[process].start + lines[process].num); i++) { out << scores[i][(scores[i].size()-1)] << '\t'; } out << endl; out.close(); exit(0); }else { m->mothurOut("unable to spawn the necessary processes."); m->mothurOutEndLine(); exit(0); } } - driver(t, randT, namesOfGroupCombos, lines[0]->start, lines[0]->num, sums, scores); + driver(t, namesOfGroupCombos, lines[0].start, lines[0].num, sums, scores); //force parent to wait until all the processes are done for (int i=0;i<(processors-1);i++) { int temp = processIDS[i]; wait(&temp); } - + //get data created by processes for (int i=0;i<(processors-1);i++) { + ifstream in; string s = outputDir + toString(processIDS[i]) + ".weightedcommand.results.temp"; m->openInputFile(s, in); double tempScore; - for (int i = lines[process]->start; i < (lines[process]->start + lines[process]->num); i++) { in >> tempScore; scores[i].push_back(tempScore); } + for (int j = lines[(i+1)].start; j < (lines[(i+1)].start + lines[(i+1)].num); j++) { in >> tempScore; scores[j].push_back(tempScore); } in.close(); remove(s.c_str()); } @@ -334,11 +325,12 @@ int UnifracWeightedCommand::createProcesses(Tree* t, Tree* randT, vector< vector } /**************************************************************************************************/ -int UnifracWeightedCommand::driver(Tree* t, Tree* randT, vector< vector > namesOfGroupCombos, int start, int num, vector& sums, vector< vector >& scores) { +int UnifracWeightedCommand::driver(Tree* t, vector< vector > namesOfGroupCombos, int start, int num, vector& sums, vector< vector >& scores) { try { - + Tree* randT = new Tree(); + for (int h = start; h < (start+num); h++) { - cout << "doing " << h << endl; + if (m->control_pressed) { return 0; } //initialize weighted score @@ -353,16 +345,17 @@ int UnifracWeightedCommand::driver(Tree* t, Tree* randT, vector< vector if (m->control_pressed) { delete randT; return 0; } - //get wscore of random tree EstOutput randomData = weighted->getValues(randT, groupA, groupB, sums); - + if (m->control_pressed) { delete randT; return 0; } //save scores scores[h].push_back(randomData[0]); } - + + delete randT; + return 0; } @@ -414,16 +407,16 @@ void UnifracWeightedCommand::printWSummaryFile() { if (WScoreSig[count] > (1/(float)iters)) { outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << WScoreSig[count] << endl; cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << WScoreSig[count] << endl; - m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\t" + toString(WScoreSig[count])); m->mothurOutEndLine(); + m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\t" + toString(WScoreSig[count]) + "\n"); }else{ outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl; cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl; - m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\t<" + toString((1/float(iters)))); m->mothurOutEndLine(); + m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\t<" + toString((1/float(iters))) + "\n"); } }else{ outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << "0.00" << endl; cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << "0.00" << endl; - m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\t0.00"); m->mothurOutEndLine(); + m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\t0.00\n"); } count++; } diff --git a/unifracweightedcommand.h b/unifracweightedcommand.h index b6aea7d..7317c8a 100644 --- a/unifracweightedcommand.h +++ b/unifracweightedcommand.h @@ -34,7 +34,7 @@ class UnifracWeightedCommand : public Command { int num; linePair(int i, int j) : start(i), num(j) {} }; - vector lines; + vector lines; GlobalData* globaldata; SharedUtil* util; @@ -43,7 +43,6 @@ class UnifracWeightedCommand : public Command { vector utreeScores; //user tree unweighted scores vector WScoreSig; //tree weighted score signifigance when compared to random trees - percentage of random trees with that score or lower. vector groupComb; // AB. AC, BC... - Tree* randT; //random tree TreeMap* tmap; Weighted* weighted; string sumFile, outputDir; @@ -70,8 +69,8 @@ class UnifracWeightedCommand : public Command { //void removeValidScoresDuplicates(); int findIndex(float, int); void calculateFreqsCumuls(); - int createProcesses(Tree*, Tree*, vector< vector >, vector&, vector< vector >&); - int driver(Tree*, Tree*, vector< vector >, int, int, vector&, vector< vector >&); + int createProcesses(Tree*, vector< vector >, vector&, vector< vector >&); + int driver(Tree*, vector< vector >, int, int, vector&, vector< vector >&); }; diff --git a/unweighted.cpp b/unweighted.cpp index ba88049..a9515d1 100644 --- a/unweighted.cpp +++ b/unweighted.cpp @@ -59,15 +59,16 @@ EstOutput Unweighted::getValues(Tree* t, int p, string o) { for (int i = 0; i < processors; i++) { int startPos = i * numPairsPerProcessor; + if(i == processors - 1){ numPairsPerProcessor = numPairs - i * numPairsPerProcessor; } - lines.push_back(new linePair(startPos, numPairsPerProcessor)); + + lines.push_back(linePair(startPos, numPairsPerProcessor)); } data = createProcesses(t, namesOfGroupCombos); - - for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); + lines.clear(); } #else data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size()); @@ -100,7 +101,7 @@ EstOutput Unweighted::createProcesses(Tree* t, vector< vector > namesOfG process++; }else if (pid == 0){ EstOutput myresults; - myresults = driver(t, namesOfGroupCombos, lines[process]->start, lines[process]->num); + myresults = driver(t, namesOfGroupCombos, lines[process].start, lines[process].num); if (m->control_pressed) { exit(0); } @@ -118,7 +119,7 @@ EstOutput Unweighted::createProcesses(Tree* t, vector< vector > namesOfG }else { m->mothurOut("unable to spawn the necessary processes."); m->mothurOutEndLine(); exit(0); } } - results = driver(t, namesOfGroupCombos, lines[0]->start, lines[0]->num); + results = driver(t, namesOfGroupCombos, lines[0].start, lines[0].num); //force parent to wait until all the processes are done for (int i=0;i<(processors-1);i++) { @@ -171,7 +172,8 @@ EstOutput Unweighted::driver(Tree* t, vector< vector > namesOfGroupCombo int count = 0; int total = num; int twentyPercent = (total * 0.20); - + if (twentyPercent == 0) { twentyPercent = 1; } + for (int h = start; h < (start+num); h++) { if (m->control_pressed) { return results; } @@ -201,7 +203,7 @@ EstOutput Unweighted::driver(Tree* t, vector< vector > namesOfGroupCombo totalBL += abs(t->tree[i].getBranchLength()); } } - + UW = (UniqueBL / totalBL); if (isnan(UW) || isinf(UW)) { UW = 0; } @@ -210,10 +212,11 @@ EstOutput Unweighted::driver(Tree* t, vector< vector > namesOfGroupCombo count++; //report progress - if((count) % twentyPercent == 0){ m->mothurOut("Percentage complete: " + toString(int((count / (float)total) * 100.0))); m->mothurOutEndLine(); } + if((count % twentyPercent) == 0) { float tempOut = (count / (float)total); if (isnan(tempOut) || isinf(tempOut)) { tempOut = 0.0; } m->mothurOut("Percentage complete: " + toString((int(tempOut) * 100.0))); m->mothurOutEndLine(); } } - m->mothurOut("Percentage complete: 100"); m->mothurOutEndLine(); + //report progress + if((count % twentyPercent) != 0) { float tempOut = (count / (float)total); if (isnan(tempOut) || isinf(tempOut)) { tempOut = 0.0; } m->mothurOut("Percentage complete: " + toString((int(tempOut) * 100.0))); m->mothurOutEndLine(); } return results; } @@ -224,83 +227,29 @@ EstOutput Unweighted::driver(Tree* t, vector< vector > namesOfGroupCombo } /**************************************************************************************************/ -EstOutput Unweighted::getValues(Tree* t, string groupA, string groupB) { +EstOutput Unweighted::getValues(Tree* t, string groupA, string groupB, int p, string o) { try { - globaldata = GlobalData::getInstance(); + globaldata = GlobalData::getInstance(); + processors = p; + outputDir = o; + - vector groups; - double UniqueBL; //a branch length is unique if it's chidren are from the same group - double totalBL; //all branch lengths - double UW; //Unweighted Value = UniqueBL / totalBL; - copyTree = new Tree; - //if the users enters no groups then give them the score of all groups int numGroups = globaldata->Groups.size(); - + //calculate number of comparsions int numComp = 0; + vector< vector > namesOfGroupCombos; for (int r=0; rgetCopy(t); - - //groups in this combo - groups.push_back(globaldata->Groups[a]); groups.push_back(globaldata->Groups[l]); - - //swap labels in the groups you want to compare - copyTree->assembleRandomUnifracTree(groups[0], groups[1]); - - if (m->control_pressed) { delete copyTree; return data; } - - for(int i=0;igetNumNodes();i++){ - - if (m->control_pressed) { return data; } - - //pcountSize = 0, they are from a branch that is entirely from a group the user doesn't want - //pcountSize = 2, not unique to one group - //pcountSize = 1, unique to one group - - int pcountSize = 0; - for (int j = 0; j < groups.size(); j++) { - map::iterator itGroup = copyTree->tree[i].pcount.find(groups[j]); - if (itGroup != copyTree->tree[i].pcount.end()) { pcountSize++; } - } - - if (pcountSize == 0) { } - else if ((copyTree->tree[i].getBranchLength() != -1) && (pcountSize == 1)) { UniqueBL += abs(copyTree->tree[i].getBranchLength()); } - - if ((copyTree->tree[i].getBranchLength() != -1) && (pcountSize != 0)) { - totalBL += abs(copyTree->tree[i].getBranchLength()); - } - } - - - UW = (UniqueBL / totalBL); - - if (isnan(UW) || isinf(UW)) { UW = 0; } - - data[count] = UW; - count++; - groups.clear(); + vector groups; groups.push_back(globaldata->Groups[r]); groups.push_back(globaldata->Groups[l]); + namesOfGroupCombos.push_back(groups); } } - if (numComp != 1) { + vector groups; if (numGroups == 0) { //get score for all users groups for (int i = 0; i < tmap->namesOfGroups.size(); i++) { @@ -308,25 +257,151 @@ EstOutput Unweighted::getValues(Tree* t, string groupA, string groupB) { groups.push_back(tmap->namesOfGroups[i]); } } + namesOfGroupCombos.push_back(groups); }else { for (int i = 0; i < globaldata->Groups.size(); i++) { groups.push_back(globaldata->Groups[i]); } + namesOfGroupCombos.push_back(groups); } + } + + #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + if(processors == 1){ + data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size(), true); + }else{ + int numPairs = namesOfGroupCombos.size(); + + int numPairsPerProcessor = numPairs / processors; + + for (int i = 0; i < processors; i++) { + int startPos = i * numPairsPerProcessor; + if(i == processors - 1){ + numPairsPerProcessor = numPairs - i * numPairsPerProcessor; + } + lines.push_back(linePair(startPos, numPairsPerProcessor)); + } + + data = createProcesses(t, namesOfGroupCombos, true); + + lines.clear(); + } + #else + data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size(), true); + #endif + + return data; + } + catch(exception& e) { + m->errorOut(e, "Unweighted", "getValues"); + exit(1); + } +} +/**************************************************************************************************/ + +EstOutput Unweighted::createProcesses(Tree* t, vector< vector > namesOfGroupCombos, bool usingGroups) { + try { +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + int process = 1; + int num = 0; + vector processIDS; - UniqueBL=0.0000; //a branch length is unique if it's chidren are from the same group - totalBL = 0.00; //all branch lengths - UW = 0.00; //Unweighted Value = UniqueBL / totalBL; + EstOutput results; + + //loop through and create all the processes you want + while (process != processors) { + int pid = fork(); + + if (pid > 0) { + processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later + process++; + }else if (pid == 0){ + EstOutput myresults; + myresults = driver(t, namesOfGroupCombos, lines[process].start, lines[process].num, usingGroups); + + if (m->control_pressed) { exit(0); } + + //pass numSeqs to parent + ofstream out; + string tempFile = outputDir + toString(getpid()) + ".unweighted.results.temp"; + m->openOutputFile(tempFile, out); + out << myresults.size() << endl; + for (int i = 0; i < myresults.size(); i++) { out << myresults[i] << '\t'; } out << endl; + out.close(); + + exit(0); + }else { m->mothurOut("unable to spawn the necessary processes."); m->mothurOutEndLine(); exit(0); } + } + + results = driver(t, namesOfGroupCombos, lines[0].start, lines[0].num, usingGroups); + + //force parent to wait until all the processes are done + for (int i=0;i<(processors-1);i++) { + int temp = processIDS[i]; + wait(&temp); + } + + if (m->control_pressed) { return results; } + + //get data created by processes + for (int i=0;i<(processors-1);i++) { + ifstream in; + string s = outputDir + toString(processIDS[i]) + ".unweighted.results.temp"; + m->openInputFile(s, in); + + //get quantiles + if (!in.eof()) { + int num; + in >> num; m->gobble(in); + + if (m->control_pressed) { break; } + + double w; + for (int j = 0; j < num; j++) { + in >> w; + results.push_back(w); + } + m->gobble(in); + } + in.close(); + remove(s.c_str()); + } + + return results; +#endif + } + catch(exception& e) { + m->errorOut(e, "Unweighted", "createProcesses"); + exit(1); + } +} +/**************************************************************************************************/ +EstOutput Unweighted::driver(Tree* t, vector< vector > namesOfGroupCombos, int start, int num, bool usingGroups) { + try { + + EstOutput results; results.resize(num); + + int count = 0; + int total = num; + int twentyPercent = (total * 0.20); + + Tree* copyTree = new Tree; + + for (int h = start; h < (start+num); h++) { + + if (m->control_pressed) { return results; } //copy random tree passed in copyTree->getCopy(t); - //swap labels in all the groups you want to compare - copyTree->assembleRandomUnifracTree(groups); + //swap labels in the groups you want to compare + copyTree->assembleRandomUnifracTree(namesOfGroupCombos[h]); - if (m->control_pressed) { delete copyTree; return data; } - - for(int i=0;igetNumNodes();i++){ + double UniqueBL=0.0000; //a branch length is unique if it's chidren are from the same group + double totalBL = 0.00; //all branch lengths + double UW = 0.00; //Unweighted Value = UniqueBL / totalBL; + + for(int i=0;igetNumNodes();i++){ if (m->control_pressed) { return data; } @@ -335,16 +410,16 @@ EstOutput Unweighted::getValues(Tree* t, string groupA, string groupB) { //pcountSize = 1, unique to one group int pcountSize = 0; - for (int j = 0; j < groups.size(); j++) { - map::iterator itGroup = copyTree->tree[i].pcount.find(groups[j]); - if (itGroup != copyTree->tree[i].pcount.end()) { pcountSize++; if (pcountSize > 1) { break; } } + for (int j = 0; j < namesOfGroupCombos[h].size(); j++) { + map::iterator itGroup = t->tree[i].pcount.find(namesOfGroupCombos[h][j]); + if (itGroup != t->tree[i].pcount.end()) { pcountSize++; if (pcountSize > 1) { break; } } } if (pcountSize == 0) { } - else if ((copyTree->tree[i].getBranchLength() != -1) && (pcountSize == 1)) { UniqueBL += abs(copyTree->tree[i].getBranchLength()); } + else if ((t->tree[i].getBranchLength() != -1) && (pcountSize == 1)) { UniqueBL += abs(t->tree[i].getBranchLength()); } - if ((copyTree->tree[i].getBranchLength() != -1) && (pcountSize != 0)) { - totalBL += abs(copyTree->tree[i].getBranchLength()); + if ((t->tree[i].getBranchLength() != -1) && (pcountSize != 0)) { + totalBL += abs(t->tree[i].getBranchLength()); } } @@ -352,20 +427,20 @@ EstOutput Unweighted::getValues(Tree* t, string groupA, string groupB) { if (isnan(UW) || isinf(UW)) { UW = 0; } - data[count] = UW; + results[count] = UW; + count++; + } delete copyTree; - return data; - + return results; } catch(exception& e) { - m->errorOut(e, "Unweighted", "getValues"); + m->errorOut(e, "Unweighted", "driver"); exit(1); } } - /**************************************************************************************************/ diff --git a/unweighted.h b/unweighted.h index f27df8c..f413261 100644 --- a/unweighted.h +++ b/unweighted.h @@ -22,7 +22,7 @@ class Unweighted : public TreeCalculator { Unweighted(TreeMap* t) : tmap(t) {}; ~Unweighted() {}; EstOutput getValues(Tree*, int, string); - EstOutput getValues(Tree*, string, string); + EstOutput getValues(Tree*, string, string, int, string); private: struct linePair { @@ -30,10 +30,9 @@ class Unweighted : public TreeCalculator { int num; linePair(int i, int j) : start(i), num(j) {} }; - vector lines; + vector lines; GlobalData* globaldata; - Tree* copyTree; EstOutput data; TreeMap* tmap; int processors; @@ -41,6 +40,8 @@ class Unweighted : public TreeCalculator { EstOutput driver(Tree*, vector< vector >, int, int); EstOutput createProcesses(Tree*, vector< vector >); + EstOutput driver(Tree*, vector< vector >, int, int, bool); + EstOutput createProcesses(Tree*, vector< vector >, bool); }; diff --git a/weighted.cpp b/weighted.cpp index 5f58f2e..fd7f854 100644 --- a/weighted.cpp +++ b/weighted.cpp @@ -51,12 +51,12 @@ EstOutput Weighted::getValues(Tree* t, int p, string o) { if(i == processors - 1){ numPairsPerProcessor = numPairs - i * numPairsPerProcessor; } - lines.push_back(new linePair(startPos, numPairsPerProcessor)); + lines.push_back(linePair(startPos, numPairsPerProcessor)); } data = createProcesses(t, namesOfGroupCombos, sums); - for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); + lines.clear(); } #else data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size(), sums); @@ -90,7 +90,7 @@ EstOutput Weighted::createProcesses(Tree* t, vector< vector > namesOfGro }else if (pid == 0){ EstOutput Myresults; - Myresults = driver(t, namesOfGroupCombos, lines[process]->start, lines[process]->num, sums); + Myresults = driver(t, namesOfGroupCombos, lines[process].start, lines[process].num, sums); m->mothurOut("Merging results."); m->mothurOutEndLine(); @@ -109,7 +109,7 @@ EstOutput Weighted::createProcesses(Tree* t, vector< vector > namesOfGro }else { m->mothurOut("unable to spawn the necessary processes."); m->mothurOutEndLine(); exit(0); } } - results = driver(t, namesOfGroupCombos, lines[0]->start, lines[0]->num, sums); + results = driver(t, namesOfGroupCombos, lines[0].start, lines[0].num, sums); //force parent to wait until all the processes are done for (int i=0;i<(processors-1);i++) { @@ -196,6 +196,7 @@ EstOutput Weighted::driver(Tree* t, vector< vector > namesOfGroupCombos, //calculate u for the group comb int total = t->getNumNodes(); int twentyPercent = (total * 0.20); + for(int i=0;igetNumNodes();i++){ for (int h = start; h < (start+num); h++) { diff --git a/weighted.h b/weighted.h index 8a8ad8f..3fa5123 100644 --- a/weighted.h +++ b/weighted.h @@ -33,7 +33,7 @@ class Weighted : public TreeCalculator { int num; linePair(int i, int j) : start(i), num(j) {} }; - vector lines; + vector lines; GlobalData* globaldata; EstOutput data; -- 2.39.2