X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=summarysharedcommand.cpp;h=8c4ea0d6cd5b4cf26e15ec3460da8e5fd74e5fb6;hb=0caf3fbabaa3ece404f8ce77f4c883dc5b1bf1dc;hp=6db7dab85b9b51ae798aaed4df4bc581adc5db6d;hpb=ca9ac1d80c62f57270b0dcd49410ebe08a8aecd6;p=mothur.git diff --git a/summarysharedcommand.cpp b/summarysharedcommand.cpp index 6db7dab..8c4ea0d 100644 --- a/summarysharedcommand.cpp +++ b/summarysharedcommand.cpp @@ -8,47 +8,6 @@ */ #include "summarysharedcommand.h" -#include "sharedsobscollectsummary.h" -#include "sharedchao1.h" -#include "sharedace.h" -#include "sharednseqs.h" -#include "sharedjabund.h" -#include "sharedsorabund.h" -#include "sharedjclass.h" -#include "sharedsorclass.h" -#include "sharedjest.h" -#include "sharedsorest.h" -#include "sharedthetayc.h" -#include "sharedthetan.h" -#include "sharedkstest.h" -#include "whittaker.h" -#include "sharedochiai.h" -#include "sharedanderbergs.h" -#include "sharedkulczynski.h" -#include "sharedkulczynskicody.h" -#include "sharedlennon.h" -#include "sharedmorisitahorn.h" -#include "sharedbraycurtis.h" -#include "sharedjackknife.h" -#include "whittaker.h" -#include "odum.h" -#include "canberra.h" -#include "structeuclidean.h" -#include "structchord.h" -#include "hellinger.h" -#include "manhattan.h" -#include "structpearson.h" -#include "soergel.h" -#include "spearman.h" -#include "structkulczynski.h" -#include "structchi2.h" -#include "speciesprofile.h" -#include "hamming.h" -#include "gower.h" -#include "memchi2.h" -#include "memchord.h" -#include "memeuclidean.h" -#include "mempearson.h" //********************************************************************************************************************** vector SummarySharedCommand::setParameters(){ @@ -56,7 +15,7 @@ vector SummarySharedCommand::setParameters(){ CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pshared); CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel); CommandParameter pdistance("distance", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pdistance); - CommandParameter pcalc("calc", "Multiple", "sharedchao-sharedsobs-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan-kstest-whittaker-sharednseqs-ochiai-anderberg-skulczynski-kulczynskicody-lennon-morisitahorn-braycurtis-odum-canberra-structeuclidean-structchord-hellinger-manhattan-structpearson-soergel-spearman-structkulczynski-speciesprofile-structchi2-hamming-gower-memchi2-memchord-memeuclidean-mempearson", "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan", "", "", "",true,false); parameters.push_back(pcalc); + CommandParameter pcalc("calc", "Multiple", "sharedchao-sharedsobs-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan-kstest-whittaker-sharednseqs-ochiai-anderberg-kulczynski-kulczynskicody-lennon-morisitahorn-braycurtis-odum-canberra-structeuclidean-structchord-hellinger-manhattan-structpearson-soergel-spearman-structkulczynski-speciesprofile-structchi2-hamming-gower-memchi2-memchord-memeuclidean-mempearson", "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan", "", "", "",true,false); parameters.push_back(pcalc); CommandParameter pall("all", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pall); CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors); CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups); @@ -89,7 +48,7 @@ string SummarySharedCommand::getHelpString(){ helpString += "The all parameter is used to specify if you want the estimate of all your groups together. This estimate can only be made for sharedsobs and sharedchao calculators. The default is false.\n"; helpString += "If you use sharedchao and run into memory issues, set all to false. \n"; helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 2 valid groups.\n"; - helpString += "Note: No spaces between parameter labels (i.e. label), '=' and parameters (i.e.yourLabel).\n\n"; + helpString += "Note: No spaces between parameter labels (i.e. label), '=' and parameters (i.e.yourLabel).\n"; return helpString; } catch(exception& e) { @@ -119,6 +78,7 @@ SummarySharedCommand::SummarySharedCommand(string option) { //allow user to run help if(option == "help") { help(); abort = true; calledHelp = true; } + else if(option == "citation") { citation(); abort = true; calledHelp = true;} else { vector myArray = setParameters(); @@ -160,7 +120,7 @@ SummarySharedCommand::SummarySharedCommand(string option) { sharedfile = m->getSharedFile(); if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); } else { m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; } - } + }else { m->setSharedFile(sharedfile); } //if the user changes the output directory command factory will send this info to us in the output parameter @@ -183,12 +143,17 @@ SummarySharedCommand::SummarySharedCommand(string option) { if (calc == "default") { calc = "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan"; } } m->splitAtDash(calc, Estimators); + if (m->inUsersGroups("citation", Estimators)) { + ValidCalculators validCalc; validCalc.printCitations(Estimators); + //remove citation from list of calcs + for (int i = 0; i < Estimators.size(); i++) { if (Estimators[i] == "citation") { Estimators.erase(Estimators.begin()+i); break; } } + } groups = validParameter.validFile(parameters, "groups", false); if (groups == "not found") { groups = ""; } else { m->splitAtDash(groups, Groups); - m->Groups = Groups; + m->setGroups(Groups); } string temp = validParameter.validFile(parameters, "all", false); if (temp == "not found") { temp = "false"; } @@ -199,7 +164,7 @@ SummarySharedCommand::SummarySharedCommand(string option) { temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); } m->setProcessors(temp); - convert(temp, processors); + m->mothurConvert(temp, processors); if (abort == false) { @@ -358,23 +323,23 @@ int SummarySharedCommand::execute(){ for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } //close files and clean up - remove(outputFileName.c_str()); - if (mult == true) { remove(outAllFileName.c_str()); } + m->mothurRemove(outputFileName); + if (mult == true) { m->mothurRemove(outAllFileName); } return 0; //if you only have 2 groups you don't need a .sharedmultiple file }else if ((lookup.size() == 2) && (mult == true)) { mult = false; - remove(outAllFileName.c_str()); + m->mothurRemove(outAllFileName); outputNames.pop_back(); } if (m->control_pressed) { - if (mult) { remove(outAllFileName.c_str()); } - remove(outputFileName.c_str()); + if (mult) { m->mothurRemove(outAllFileName); } + m->mothurRemove(outputFileName); delete input; for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } for(int i=0;iGroups.clear(); + m->clearGroups(); return 0; } /******************************************************/ @@ -382,7 +347,7 @@ int SummarySharedCommand::execute(){ /******************************************************/ //comparison breakup to be used by different processes later - numGroups = m->Groups.size(); + numGroups = m->getNumGroups(); lines.resize(processors); for (int i = 0; i < processors; i++) { lines[i].start = int (sqrt(float(i)/float(processors)) * numGroups); @@ -397,12 +362,12 @@ int SummarySharedCommand::execute(){ //as long as you are not at the end of the file or done wih the lines you want while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) { if (m->control_pressed) { - if (mult) { remove(outAllFileName.c_str()); } - remove(outputFileName.c_str()); + if (mult) { m->mothurRemove(outAllFileName); } + m->mothurRemove(outputFileName); delete input; for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } for(int i=0;iGroups.clear(); + m->clearGroups(); return 0; } @@ -440,11 +405,11 @@ int SummarySharedCommand::execute(){ } if (m->control_pressed) { - if (mult) { remove(outAllFileName.c_str()); } - remove(outputFileName.c_str()); + if (mult) { m->mothurRemove(outAllFileName); } + m->mothurRemove(outputFileName); delete input; for(int i=0;iGroups.clear(); + m->clearGroups(); return 0; } @@ -473,14 +438,14 @@ int SummarySharedCommand::execute(){ //reset groups parameter - m->Groups.clear(); + m->clearGroups(); for(int i=0;icontrol_pressed) { - remove(outAllFileName.c_str()); - remove(outputFileName.c_str()); + m->mothurRemove(outAllFileName); + m->mothurRemove(outputFileName); return 0; } @@ -502,152 +467,209 @@ int SummarySharedCommand::execute(){ /***********************************************************/ int SummarySharedCommand::process(vector thisLookup, string sumFileName, string sumAllFileName) { try { - vector< vector > calcDists; //vector containing vectors that contains the summary results for each group compare - calcDists.resize(sumCalculators.size()); //one for each calc, this will be used to make .dist files - - #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) - if(processors == 1){ - driver(thisLookup, 0, numGroups, sumFileName+".temp", sumAllFileName+".temp", calcDists); - m->appendFiles((sumFileName + ".temp"), sumFileName); - remove((sumFileName + ".temp").c_str()); - if (mult) { - m->appendFiles((sumAllFileName + ".temp"), sumAllFileName); - remove((sumAllFileName + ".temp").c_str()); - } - }else{ - int process = 1; - vector processIDS; - - //loop through and create all the processes you want - while (process != processors) { - int pid = fork(); - - if (pid > 0) { - processIDS.push_back(pid); - process++; - }else if (pid == 0){ - driver(thisLookup, lines[process].start, lines[process].end, sumFileName + toString(getpid()) + ".temp", sumAllFileName + toString(getpid()) + ".temp", calcDists); - - //only do this if you want a distance file - if (createPhylip) { - string tempdistFileName = m->getRootName(m->getSimpleName(sumFileName)) + toString(getpid()) + ".dist"; - ofstream outtemp; - m->openOutputFile(tempdistFileName, outtemp); - - for (int i = 0; i < calcDists.size(); i++) { - outtemp << calcDists[i].size() << endl; - - for (int j = 0; j < calcDists[i].size(); j++) { - outtemp << calcDists[i][j].seq1 << '\t' << calcDists[i][j].seq2 << '\t' << calcDists[i][j].dist << endl; - } - } - outtemp.close(); - } - - exit(0); - }else { - m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); - for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); } - exit(0); - } - } - - //parent do your part - driver(thisLookup, lines[0].start, lines[0].end, sumFileName + toString(getpid()) + ".temp", sumAllFileName + toString(getpid()) + ".temp", calcDists); - m->appendFiles((sumFileName + toString(getpid()) + ".temp"), sumFileName); - remove((sumFileName + toString(getpid()) + ".temp").c_str()); - if (mult) { m->appendFiles((sumAllFileName + toString(getpid()) + ".temp"), sumAllFileName); } - - //force parent to wait until all the processes are done - for (int i = 0; i < processIDS.size(); i++) { - int temp = processIDS[i]; - wait(&temp); - } - - for (int i = 0; i < processIDS.size(); i++) { - m->appendFiles((sumFileName + toString(processIDS[i]) + ".temp"), sumFileName); - remove((sumFileName + toString(processIDS[i]) + ".temp").c_str()); - if (mult) { remove((sumAllFileName + toString(processIDS[i]) + ".temp").c_str()); } - - if (createPhylip) { - string tempdistFileName = m->getRootName(m->getSimpleName(sumFileName)) + toString(processIDS[i]) + ".dist"; - ifstream intemp; - m->openInputFile(tempdistFileName, intemp); - - for (int i = 0; i < calcDists.size(); i++) { - int size = 0; - intemp >> size; m->gobble(intemp); - - for (int j = 0; j < size; j++) { - int seq1 = 0; - int seq2 = 0; - float dist = 1.0; - - intemp >> seq1 >> seq2 >> dist; m->gobble(intemp); - - seqDist tempDist(seq1, seq2, dist); - calcDists[i].push_back(tempDist); - } - } - intemp.close(); - remove(tempdistFileName.c_str()); - } - } + vector< vector > calcDists; //vector containing vectors that contains the summary results for each group compare + calcDists.resize(sumCalculators.size()); //one for each calc, this will be used to make .dist files + + + if(processors == 1){ + driver(thisLookup, 0, numGroups, sumFileName+".temp", sumAllFileName+".temp", calcDists); + m->appendFiles((sumFileName + ".temp"), sumFileName); + m->mothurRemove((sumFileName + ".temp")); + if (mult) { + m->appendFiles((sumAllFileName + ".temp"), sumAllFileName); + m->mothurRemove((sumAllFileName + ".temp")); + } + }else{ + + int process = 1; + vector processIDS; + +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) + //loop through and create all the processes you want + while (process != processors) { + int pid = fork(); + + if (pid > 0) { + processIDS.push_back(pid); + process++; + }else if (pid == 0){ + driver(thisLookup, lines[process].start, lines[process].end, sumFileName + toString(getpid()) + ".temp", sumAllFileName + toString(getpid()) + ".temp", calcDists); + + //only do this if you want a distance file + if (createPhylip) { + string tempdistFileName = m->getRootName(m->getSimpleName(sumFileName)) + toString(getpid()) + ".dist"; + ofstream outtemp; + m->openOutputFile(tempdistFileName, outtemp); + + for (int i = 0; i < calcDists.size(); i++) { + outtemp << calcDists[i].size() << endl; + + for (int j = 0; j < calcDists[i].size(); j++) { + outtemp << calcDists[i][j].seq1 << '\t' << calcDists[i][j].seq2 << '\t' << calcDists[i][j].dist << endl; + } + } + outtemp.close(); + } + + exit(0); + }else { + m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); + for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); } + exit(0); + } + } + + //parent do your part + driver(thisLookup, lines[0].start, lines[0].end, sumFileName + toString(getpid()) + ".temp", sumAllFileName + toString(getpid()) + ".temp", calcDists); + m->appendFiles((sumFileName + toString(getpid()) + ".temp"), sumFileName); + m->mothurRemove((sumFileName + toString(getpid()) + ".temp")); + if (mult) { m->appendFiles((sumAllFileName + toString(getpid()) + ".temp"), sumAllFileName); } + + //force parent to wait until all the processes are done + for (int i = 0; i < processIDS.size(); i++) { + int temp = processIDS[i]; + wait(&temp); + } + + for (int i = 0; i < processIDS.size(); i++) { + m->appendFiles((sumFileName + toString(processIDS[i]) + ".temp"), sumFileName); + m->mothurRemove((sumFileName + toString(processIDS[i]) + ".temp")); + if (mult) { m->mothurRemove((sumAllFileName + toString(processIDS[i]) + ".temp")); } + + if (createPhylip) { + string tempdistFileName = m->getRootName(m->getSimpleName(sumFileName)) + toString(processIDS[i]) + ".dist"; + ifstream intemp; + m->openInputFile(tempdistFileName, intemp); + + for (int k = 0; k < calcDists.size(); k++) { + int size = 0; + intemp >> size; m->gobble(intemp); + + for (int j = 0; j < size; j++) { + int seq1 = 0; + int seq2 = 0; + float dist = 1.0; + + intemp >> seq1 >> seq2 >> dist; m->gobble(intemp); + + seqDist tempDist(seq1, seq2, dist); + calcDists[k].push_back(tempDist); + } + } + intemp.close(); + m->mothurRemove(tempdistFileName); + } + } +#else + ////////////////////////////////////////////////////////////////////////////////////////////////////// + //Windows version shared memory, so be careful when passing variables through the summarySharedData struct. + //Above fork() will clone, so memory is separate, but that's not the case with windows, + //Taking advantage of shared memory to pass results vectors. + ////////////////////////////////////////////////////////////////////////////////////////////////////// - } - #else - driver(thisLookup, 0, numGroups, (sumFileName + ".temp"), (sumAllFileName + ".temp"), calcDists); - m->appendFiles((sumFileName + ".temp"), sumFileName); - remove((sumFileName + ".temp").c_str()); - if (mult) { - m->appendFiles((sumAllFileName + ".temp"), sumAllFileName); - remove((sumAllFileName + ".temp").c_str()); - } - #endif - - if (createPhylip) { - for (int i = 0; i < calcDists.size(); i++) { - if (m->control_pressed) { break; } + vector pDataArray; + DWORD dwThreadIdArray[processors-1]; + HANDLE hThreadArray[processors-1]; + + //Create processor worker threads. + for( int i=1; i newLookup; + for (int k = 0; k < thisLookup.size(); k++) { + SharedRAbundVector* temp = new SharedRAbundVector(); + temp->setLabel(thisLookup[k]->getLabel()); + temp->setGroup(thisLookup[k]->getGroup()); + newLookup.push_back(temp); + } + + //for each bin + for (int k = 0; k < thisLookup[0]->getNumBins(); k++) { + if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } return 0; } + for (int j = 0; j < thisLookup.size(); j++) { newLookup[j]->push_back(thisLookup[j]->getAbundance(k), thisLookup[j]->getGroup()); } + } + + // Allocate memory for thread data. + summarySharedData* tempSum = new summarySharedData((sumFileName+toString(i)+".temp"), m, lines[i].start, lines[i].end, Estimators, newLookup); + pDataArray.push_back(tempSum); + processIDS.push_back(i); + + hThreadArray[i-1] = CreateThread(NULL, 0, MySummarySharedThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]); + } + + //parent do your part + driver(thisLookup, lines[0].start, lines[0].end, sumFileName +"0.temp", sumAllFileName + "0.temp", calcDists); + m->appendFiles((sumFileName + "0.temp"), sumFileName); + m->mothurRemove((sumFileName + "0.temp")); + if (mult) { m->appendFiles((sumAllFileName + "0.temp"), sumAllFileName); } + + //Wait until all threads have terminated. + WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE); + + //Close all thread handles and free memory allocations. + for(int i=0; i < pDataArray.size(); i++){ + m->appendFiles((sumFileName + toString(processIDS[i]) + ".temp"), sumFileName); + m->mothurRemove((sumFileName + toString(processIDS[i]) + ".temp")); + + for (int j = 0; j < pDataArray[i]->thisLookup.size(); j++) { delete pDataArray[i]->thisLookup[j]; } + + if (createPhylip) { + for (int k = 0; k < calcDists.size(); k++) { + int size = pDataArray[i]->calcDists[k].size(); + for (int j = 0; j < size; j++) { calcDists[k].push_back(pDataArray[i]->calcDists[k][j]); } + } + } + + CloseHandle(hThreadArray[i]); + delete pDataArray[i]; + } + +#endif + } + + if (createPhylip) { + for (int i = 0; i < calcDists.size(); i++) { + if (m->control_pressed) { break; } - string distFileName = outputDir + m->getRootName(m->getSimpleName(sumFileName)) + sumCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + ".dist"; - outputNames.push_back(distFileName); - ofstream outDist; - m->openOutputFile(distFileName, outDist); - outDist.setf(ios::fixed, ios::floatfield); outDist.setf(ios::showpoint); - - //initialize matrix - vector< vector > matrix; //square matrix to represent the distance - matrix.resize(thisLookup.size()); - for (int k = 0; k < thisLookup.size(); k++) { matrix[k].resize(thisLookup.size(), 0.0); } - - - for (int j = 0; j < calcDists[i].size(); j++) { - int row = calcDists[i][j].seq1; - int column = calcDists[i][j].seq2; - float dist = calcDists[i][j].dist; - - matrix[row][column] = dist; - matrix[column][row] = dist; - } - - //output to file - outDist << thisLookup.size() << endl; - for (int r=0; rgetGroup(); - if (name.length() < 10) { //pad with spaces to make compatible - while (name.length() < 10) { name += " "; } - } - outDist << name << '\t'; + string distFileName = outputDir + m->getRootName(m->getSimpleName(sumFileName)) + sumCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + ".dist"; + outputNames.push_back(distFileName); + ofstream outDist; + m->openOutputFile(distFileName, outDist); + outDist.setf(ios::fixed, ios::floatfield); outDist.setf(ios::showpoint); + + //initialize matrix + vector< vector > matrix; //square matrix to represent the distance + matrix.resize(thisLookup.size()); + for (int k = 0; k < thisLookup.size(); k++) { matrix[k].resize(thisLookup.size(), 0.0); } + + + for (int j = 0; j < calcDists[i].size(); j++) { + int row = calcDists[i][j].seq1; + int column = calcDists[i][j].seq2; + float dist = calcDists[i][j].dist; + + matrix[row][column] = dist; + matrix[column][row] = dist; + } + + //output to file + outDist << thisLookup.size() << endl; + for (int r=0; rgetGroup(); + if (name.length() < 10) { //pad with spaces to make compatible + while (name.length() < 10) { name += " "; } + } + outDist << name << '\t'; - //output distances - for (int l = 0; l < r; l++) { outDist << matrix[r][l] << '\t'; } - outDist << endl; - } - - outDist.close(); - } - } + //output distances + for (int l = 0; l < r; l++) { outDist << matrix[r][l] << '\t'; } + outDist << endl; + } + + outDist.close(); + } + } return 0; } catch(exception& e) { @@ -727,7 +749,7 @@ int SummarySharedCommand::driver(vector thisLookup, int sta outputFileHandle << '\t'; sumCalculators[i]->print(outputFileHandle); - seqDist temp(l, k, (1.0 - tempdata[0])); + seqDist temp(l, k, tempdata[0]); calcDists[i].push_back(temp); } outputFileHandle << endl;