From b302d221847b504388ec044c6929e9dde42f9bb1 Mon Sep 17 00:00:00 2001 From: Sarah Westcott Date: Tue, 28 Feb 2012 13:44:21 -0500 Subject: [PATCH] fixed bug in windows summary.seqs and summary.qual paralellization. paralellized summary.shared for windows. --- screenseqscommand.cpp | 15 +- seqsummarycommand.cpp | 7 +- seqsummarycommand.h | 27 ++- summaryqualcommand.cpp | 20 +- summaryqualcommand.h | 27 ++- summarysharedcommand.cpp | 384 ++++++++++++++++++++------------------- summarysharedcommand.h | 221 ++++++++++++++++++++++ 7 files changed, 469 insertions(+), 232 deletions(-) diff --git a/screenseqscommand.cpp b/screenseqscommand.cpp index 9494865..686bdaf 100644 --- a/screenseqscommand.cpp +++ b/screenseqscommand.cpp @@ -398,17 +398,10 @@ int ScreenSeqsCommand::execute(){ MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case #else - - //#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) - if(processors == 1){ - numFastaSeqs = driver(lines[0], goodSeqFile, badAccnosFile, fastafile, badSeqNames); - }else{ - numFastaSeqs = createProcesses(goodSeqFile, badAccnosFile, fastafile, badSeqNames); - } - //#else - // numFastaSeqs = driver(lines[0], goodSeqFile, badAccnosFile, fastafile, badSeqNames); - //#endif - if (m->control_pressed) { m->mothurRemove(goodSeqFile); return 0; } + if(processors == 1){ numFastaSeqs = driver(lines[0], goodSeqFile, badAccnosFile, fastafile, badSeqNames); } + else{ numFastaSeqs = createProcesses(goodSeqFile, badAccnosFile, fastafile, badSeqNames); } + + if (m->control_pressed) { m->mothurRemove(goodSeqFile); return 0; } #endif #ifdef USE_MPI diff --git a/seqsummarycommand.cpp b/seqsummarycommand.cpp index 37d0fdf..647334a 100644 --- a/seqsummarycommand.cpp +++ b/seqsummarycommand.cpp @@ -612,7 +612,7 @@ int SeqSummaryCommand::createProcessesCreateSummary(vector& startPosition, string extension = ""; if (i != 0) { extension = toString(i) + ".temp"; processIDS.push_back(i); } // Allocate memory for thread data. - seqSumData* tempSum = new seqSumData(&startPosition, &endPosition, &seqLength, &ambigBases, &longHomoPolymer, filename, (sumFile+extension), m, lines[i]->start, lines[i]->end, namefile, nameMap); + seqSumData* tempSum = new seqSumData(filename, (sumFile+extension), m, lines[i]->start, lines[i]->end, namefile, nameMap); pDataArray.push_back(tempSum); //MySeqSumThreadFunction is in header. It must be global or static to work with the threads. @@ -630,6 +630,11 @@ int SeqSummaryCommand::createProcessesCreateSummary(vector& startPosition, //Close all thread handles and free memory allocations. for(int i=0; i < pDataArray.size(); i++){ num += pDataArray[i]->count; + for (int k = 0; k < pDataArray[i]->startPosition.size(); k++) { startPosition.push_back(pDataArray[i]->startPosition[k]); } + for (int k = 0; k < pDataArray[i]->endPosition.size(); k++) { endPosition.push_back(pDataArray[i]->endPosition[k]); } + for (int k = 0; k < pDataArray[i]->seqLength.size(); k++) { seqLength.push_back(pDataArray[i]->seqLength[k]); } + for (int k = 0; k < pDataArray[i]->ambigBases.size(); k++) { ambigBases.push_back(pDataArray[i]->ambigBases[k]); } + for (int k = 0; k < pDataArray[i]->longHomoPolymer.size(); k++) { longHomoPolymer.push_back(pDataArray[i]->longHomoPolymer[k]); } CloseHandle(hThreadArray[i]); delete pDataArray[i]; } diff --git a/seqsummarycommand.h b/seqsummarycommand.h index d37d6f4..dcae434 100644 --- a/seqsummarycommand.h +++ b/seqsummarycommand.h @@ -62,11 +62,11 @@ private: // This is passed by void pointer so it can be any data type // that can be passed using a single void pointer (LPVOID). struct seqSumData { - vector* startPosition; - vector* endPosition; - vector* seqLength; - vector* ambigBases; - vector* longHomoPolymer; + vector startPosition; + vector endPosition; + vector seqLength; + vector ambigBases; + vector longHomoPolymer; string filename; string sumFile; unsigned long long start; @@ -78,12 +78,7 @@ struct seqSumData { seqSumData(){} - seqSumData(vector* s, vector* e, vector* l, vector* a, vector* h, string f, string sf, MothurOut* mout, unsigned long long st, unsigned long long en, string na, map nam) { - startPosition = s; - endPosition = e; - seqLength = l; - ambigBases = a; - longHomoPolymer = h; + seqSumData(string f, string sf, MothurOut* mout, unsigned long long st, unsigned long long en, string na, map nam) { filename = f; sumFile = sf; m = mout; @@ -137,11 +132,11 @@ static DWORD WINAPI MySeqSumThreadFunction(LPVOID lpParam){ //for each sequence this sequence represents for (int i = 0; i < num; i++) { - pDataArray->startPosition->push_back(current.getStartPos()); - pDataArray->endPosition->push_back(current.getEndPos()); - pDataArray->seqLength->push_back(current.getNumBases()); - pDataArray->ambigBases->push_back(current.getAmbigBases()); - pDataArray->longHomoPolymer->push_back(current.getLongHomoPolymer()); + pDataArray->startPosition.push_back(current.getStartPos()); + pDataArray->endPosition.push_back(current.getEndPos()); + pDataArray->seqLength.push_back(current.getNumBases()); + pDataArray->ambigBases.push_back(current.getAmbigBases()); + pDataArray->longHomoPolymer.push_back(current.getLongHomoPolymer()); } outSummary << current.getName() << '\t'; diff --git a/summaryqualcommand.cpp b/summaryqualcommand.cpp index 2a40e08..a0d786f 100644 --- a/summaryqualcommand.cpp +++ b/summaryqualcommand.cpp @@ -373,7 +373,7 @@ int SummaryQualCommand::createProcessesCreateSummary(vector& position, vect ////////////////////////////////////////////////////////////////////////////////////////////////////// //Windows version shared memory, so be careful when passing variables through the seqSumQualData struct. //Above fork() will clone, so memory is separate, but that's not the case with windows, - //Taking advantage of shared memory to allow both threads to add info to vectors. + //Taking advantage of shared memory to pass results vectors. ////////////////////////////////////////////////////////////////////////////////////////////////////// vector pDataArray; @@ -384,12 +384,10 @@ int SummaryQualCommand::createProcessesCreateSummary(vector& position, vect for( int i=0; i& position, vect //Close all thread handles and free memory allocations. for(int i=0; i < pDataArray.size(); i++){ numSeqs += pDataArray[i]->count; + int tempNum = pDataArray[i]->position.size(); + if (position.size() < tempNum) { position.resize(tempNum, 0); } + if (averageQ.size() < tempNum) { averageQ.resize(tempNum, 0); } + if (scores.size() < tempNum) { + scores.resize(tempNum); + for (int i = 0; i < scores.size(); i++) { scores[i].resize(41, 0); } + } + + for (int k = 0; k < tempNum; k++) { position[k] += pDataArray[i]->position[k]; } + for (int k = 0; k < tempNum; k++) { averageQ[k] += pDataArray[i]->averageQ[k]; } + for (int k = 0; k < tempNum; k++) { for (int j = 0; j < 41; j++) { scores[k][j] += pDataArray[i]->scores[k][j]; } } + CloseHandle(hThreadArray[i]); delete pDataArray[i]; } diff --git a/summaryqualcommand.h b/summaryqualcommand.h index 1ec3cf1..d7aa39a 100644 --- a/summaryqualcommand.h +++ b/summaryqualcommand.h @@ -58,9 +58,9 @@ private: // This is passed by void pointer so it can be any data type // that can be passed using a single void pointer (LPVOID). struct seqSumQualData { - vector* position; - vector* averageQ; - vector< vector >* scores; + vector position; + vector averageQ; + vector< vector > scores; string filename, namefile; unsigned long long start; unsigned long long end; @@ -69,10 +69,7 @@ struct seqSumQualData { map nameMap; ~seqSumQualData(){} - seqSumQualData(vector* p, vector* a, vector< vector >* s, string f, MothurOut* mout, unsigned long long st, unsigned long long en, string n, map nam) { - position = p; - averageQ = a; - scores = s; + seqSumQualData(string f, MothurOut* mout, unsigned long long st, unsigned long long en, string n, map nam) { filename = f; m = mout; start = st; @@ -122,20 +119,20 @@ static DWORD WINAPI MySeqSumQualThreadFunction(LPVOID lpParam){ vector thisScores = current.getQualityScores(); //resize to num of positions setting number of seqs with that size to 1 - if (pDataArray->position->size() < thisScores.size()) { pDataArray->position->resize(thisScores.size(), 0); } - if (pDataArray->averageQ->size() < thisScores.size()) { pDataArray->averageQ->resize(thisScores.size(), 0); } - if (pDataArray->scores->size() < thisScores.size()) { - pDataArray->scores->resize(thisScores.size()); - for (int i = 0; i < pDataArray->scores->size(); i++) { pDataArray->scores->at(i).resize(41, 0); } + if (pDataArray->position.size() < thisScores.size()) { pDataArray->position.resize(thisScores.size(), 0); } + if (pDataArray->averageQ.size() < thisScores.size()) { pDataArray->averageQ.resize(thisScores.size(), 0); } + if (pDataArray->scores.size() < thisScores.size()) { + pDataArray->scores.resize(thisScores.size()); + for (int i = 0; i < pDataArray->scores.size(); i++) { pDataArray->scores.at(i).resize(41, 0); } } //increase counts of number of seqs with this position //average is really the total, we will average in execute for (int i = 0; i < thisScores.size(); i++) { - pDataArray->position->at(i) += num; - pDataArray->averageQ->at(i) += (thisScores[i] * num); //weighting for namesfile + pDataArray->position.at(i) += num; + pDataArray->averageQ.at(i) += (thisScores[i] * num); //weighting for namesfile if (thisScores[i] > 40) { pDataArray->m->mothurOut("[ERROR]: " + current.getName() + " has a quality scores of " + toString(thisScores[i]) + ", expecting values to be less than 40."); pDataArray->m->mothurOutEndLine(); pDataArray->m->control_pressed = true; } - else { pDataArray->scores->at(i)[thisScores[i]] += num; } + else { pDataArray->scores.at(i)[thisScores[i]] += num; } } count += num; diff --git a/summarysharedcommand.cpp b/summarysharedcommand.cpp index 7116e46..f63a2ad 100644 --- a/summarysharedcommand.cpp +++ b/summarysharedcommand.cpp @@ -8,47 +8,6 @@ */ #include "summarysharedcommand.h" -#include "sharedsobscollectsummary.h" -#include "sharedchao1.h" -#include "sharedace.h" -#include "sharednseqs.h" -#include "sharedjabund.h" -#include "sharedsorabund.h" -#include "sharedjclass.h" -#include "sharedsorclass.h" -#include "sharedjest.h" -#include "sharedsorest.h" -#include "sharedthetayc.h" -#include "sharedthetan.h" -#include "sharedkstest.h" -#include "whittaker.h" -#include "sharedochiai.h" -#include "sharedanderbergs.h" -#include "sharedkulczynski.h" -#include "sharedkulczynskicody.h" -#include "sharedlennon.h" -#include "sharedmorisitahorn.h" -#include "sharedbraycurtis.h" -#include "sharedjackknife.h" -#include "whittaker.h" -#include "odum.h" -#include "canberra.h" -#include "structeuclidean.h" -#include "structchord.h" -#include "hellinger.h" -#include "manhattan.h" -#include "structpearson.h" -#include "soergel.h" -#include "spearman.h" -#include "structkulczynski.h" -#include "structchi2.h" -#include "speciesprofile.h" -#include "hamming.h" -#include "gower.h" -#include "memchi2.h" -#include "memchord.h" -#include "memeuclidean.h" -#include "mempearson.h" //********************************************************************************************************************** vector SummarySharedCommand::setParameters(){ @@ -508,152 +467,209 @@ int SummarySharedCommand::execute(){ /***********************************************************/ int SummarySharedCommand::process(vector thisLookup, string sumFileName, string sumAllFileName) { try { - vector< vector > calcDists; //vector containing vectors that contains the summary results for each group compare - calcDists.resize(sumCalculators.size()); //one for each calc, this will be used to make .dist files - - #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) - if(processors == 1){ - driver(thisLookup, 0, numGroups, sumFileName+".temp", sumAllFileName+".temp", calcDists); - m->appendFiles((sumFileName + ".temp"), sumFileName); - m->mothurRemove((sumFileName + ".temp")); - if (mult) { - m->appendFiles((sumAllFileName + ".temp"), sumAllFileName); - m->mothurRemove((sumAllFileName + ".temp")); - } - }else{ - int process = 1; - vector processIDS; - - //loop through and create all the processes you want - while (process != processors) { - int pid = fork(); - - if (pid > 0) { - processIDS.push_back(pid); - process++; - }else if (pid == 0){ - driver(thisLookup, lines[process].start, lines[process].end, sumFileName + toString(getpid()) + ".temp", sumAllFileName + toString(getpid()) + ".temp", calcDists); - - //only do this if you want a distance file - if (createPhylip) { - string tempdistFileName = m->getRootName(m->getSimpleName(sumFileName)) + toString(getpid()) + ".dist"; - ofstream outtemp; - m->openOutputFile(tempdistFileName, outtemp); - - for (int i = 0; i < calcDists.size(); i++) { - outtemp << calcDists[i].size() << endl; - - for (int j = 0; j < calcDists[i].size(); j++) { - outtemp << calcDists[i][j].seq1 << '\t' << calcDists[i][j].seq2 << '\t' << calcDists[i][j].dist << endl; - } - } - outtemp.close(); - } - - exit(0); - }else { - m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); - for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); } - exit(0); - } - } - - //parent do your part - driver(thisLookup, lines[0].start, lines[0].end, sumFileName + toString(getpid()) + ".temp", sumAllFileName + toString(getpid()) + ".temp", calcDists); - m->appendFiles((sumFileName + toString(getpid()) + ".temp"), sumFileName); - m->mothurRemove((sumFileName + toString(getpid()) + ".temp")); - if (mult) { m->appendFiles((sumAllFileName + toString(getpid()) + ".temp"), sumAllFileName); } - - //force parent to wait until all the processes are done - for (int i = 0; i < processIDS.size(); i++) { - int temp = processIDS[i]; - wait(&temp); - } - - for (int i = 0; i < processIDS.size(); i++) { - m->appendFiles((sumFileName + toString(processIDS[i]) + ".temp"), sumFileName); - m->mothurRemove((sumFileName + toString(processIDS[i]) + ".temp")); - if (mult) { m->mothurRemove((sumAllFileName + toString(processIDS[i]) + ".temp")); } - - if (createPhylip) { - string tempdistFileName = m->getRootName(m->getSimpleName(sumFileName)) + toString(processIDS[i]) + ".dist"; - ifstream intemp; - m->openInputFile(tempdistFileName, intemp); - - for (int k = 0; k < calcDists.size(); k++) { - int size = 0; - intemp >> size; m->gobble(intemp); - - for (int j = 0; j < size; j++) { - int seq1 = 0; - int seq2 = 0; - float dist = 1.0; - - intemp >> seq1 >> seq2 >> dist; m->gobble(intemp); - - seqDist tempDist(seq1, seq2, dist); - calcDists[k].push_back(tempDist); - } - } - intemp.close(); - m->mothurRemove(tempdistFileName); - } - } + vector< vector > calcDists; //vector containing vectors that contains the summary results for each group compare + calcDists.resize(sumCalculators.size()); //one for each calc, this will be used to make .dist files + + + if(processors == 1){ + driver(thisLookup, 0, numGroups, sumFileName+".temp", sumAllFileName+".temp", calcDists); + m->appendFiles((sumFileName + ".temp"), sumFileName); + m->mothurRemove((sumFileName + ".temp")); + if (mult) { + m->appendFiles((sumAllFileName + ".temp"), sumAllFileName); + m->mothurRemove((sumAllFileName + ".temp")); + } + }else{ + + int process = 1; + vector processIDS; + +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + //loop through and create all the processes you want + while (process != processors) { + int pid = fork(); + + if (pid > 0) { + processIDS.push_back(pid); + process++; + }else if (pid == 0){ + driver(thisLookup, lines[process].start, lines[process].end, sumFileName + toString(getpid()) + ".temp", sumAllFileName + toString(getpid()) + ".temp", calcDists); + + //only do this if you want a distance file + if (createPhylip) { + string tempdistFileName = m->getRootName(m->getSimpleName(sumFileName)) + toString(getpid()) + ".dist"; + ofstream outtemp; + m->openOutputFile(tempdistFileName, outtemp); + + for (int i = 0; i < calcDists.size(); i++) { + outtemp << calcDists[i].size() << endl; + + for (int j = 0; j < calcDists[i].size(); j++) { + outtemp << calcDists[i][j].seq1 << '\t' << calcDists[i][j].seq2 << '\t' << calcDists[i][j].dist << endl; + } + } + outtemp.close(); + } + + exit(0); + }else { + m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); + for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); } + exit(0); + } + } + + //parent do your part + driver(thisLookup, lines[0].start, lines[0].end, sumFileName + toString(getpid()) + ".temp", sumAllFileName + toString(getpid()) + ".temp", calcDists); + m->appendFiles((sumFileName + toString(getpid()) + ".temp"), sumFileName); + m->mothurRemove((sumFileName + toString(getpid()) + ".temp")); + if (mult) { m->appendFiles((sumAllFileName + toString(getpid()) + ".temp"), sumAllFileName); } + + //force parent to wait until all the processes are done + for (int i = 0; i < processIDS.size(); i++) { + int temp = processIDS[i]; + wait(&temp); + } + + for (int i = 0; i < processIDS.size(); i++) { + m->appendFiles((sumFileName + toString(processIDS[i]) + ".temp"), sumFileName); + m->mothurRemove((sumFileName + toString(processIDS[i]) + ".temp")); + if (mult) { m->mothurRemove((sumAllFileName + toString(processIDS[i]) + ".temp")); } + + if (createPhylip) { + string tempdistFileName = m->getRootName(m->getSimpleName(sumFileName)) + toString(processIDS[i]) + ".dist"; + ifstream intemp; + m->openInputFile(tempdistFileName, intemp); + + for (int k = 0; k < calcDists.size(); k++) { + int size = 0; + intemp >> size; m->gobble(intemp); + + for (int j = 0; j < size; j++) { + int seq1 = 0; + int seq2 = 0; + float dist = 1.0; + + intemp >> seq1 >> seq2 >> dist; m->gobble(intemp); + + seqDist tempDist(seq1, seq2, dist); + calcDists[k].push_back(tempDist); + } + } + intemp.close(); + m->mothurRemove(tempdistFileName); + } + } +#else + ////////////////////////////////////////////////////////////////////////////////////////////////////// + //Windows version shared memory, so be careful when passing variables through the summarySharedData struct. + //Above fork() will clone, so memory is separate, but that's not the case with windows, + //Taking advantage of shared memory to pass results vectors. + ////////////////////////////////////////////////////////////////////////////////////////////////////// - } - #else - driver(thisLookup, 0, numGroups, (sumFileName + ".temp"), (sumAllFileName + ".temp"), calcDists); - m->appendFiles((sumFileName + ".temp"), sumFileName); - m->mothurRemove((sumFileName + ".temp")); - if (mult) { - m->appendFiles((sumAllFileName + ".temp"), sumAllFileName); - m->mothurRemove((sumAllFileName + ".temp")); - } - #endif - - if (createPhylip) { - for (int i = 0; i < calcDists.size(); i++) { - if (m->control_pressed) { break; } + vector pDataArray; + DWORD dwThreadIdArray[processors-1]; + HANDLE hThreadArray[processors-1]; + + //Create processor worker threads. + for( int i=1; i newLookup; + for (int k = 0; k < thisLookup.size(); k++) { + SharedRAbundVector* temp = new SharedRAbundVector(); + temp->setLabel(thisLookup[k]->getLabel()); + temp->setGroup(thisLookup[k]->getGroup()); + newLookup.push_back(temp); + } + + //for each bin + for (int k = 0; k < thisLookup[0]->getNumBins(); k++) { + if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } return 0; } + for (int j = 0; j < thisLookup.size(); j++) { newLookup[j]->push_back(thisLookup[j]->getAbundance(k), thisLookup[j]->getGroup()); } + } + + // Allocate memory for thread data. + summarySharedData* tempSum = new summarySharedData((sumFileName+toString(i)+".temp"), m, lines[i].start, lines[i].end, Estimators, newLookup); + pDataArray.push_back(tempSum); + processIDS.push_back(i); + + hThreadArray[i-1] = CreateThread(NULL, 0, MySummarySharedThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]); + } + + //parent do your part + driver(thisLookup, lines[0].start, lines[0].end, sumFileName +"0.temp", sumAllFileName + "0.temp", calcDists); + m->appendFiles((sumFileName + "0.temp"), sumFileName); + m->mothurRemove((sumFileName + "0.temp")); + if (mult) { m->appendFiles((sumAllFileName + "0.temp"), sumAllFileName); } + + //Wait until all threads have terminated. + WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE); + + //Close all thread handles and free memory allocations. + for(int i=0; i < pDataArray.size(); i++){ + m->appendFiles((sumFileName + toString(processIDS[i]) + ".temp"), sumFileName); + m->mothurRemove((sumFileName + toString(processIDS[i]) + ".temp")); + + for (int j = 0; j < pDataArray[i]->thisLookup.size(); j++) { delete pDataArray[i]->thisLookup[j]; } + + if (createPhylip) { + for (int k = 0; k < calcDists.size(); k++) { + int size = pDataArray[i]->calcDists[k].size(); + for (int j = 0; j < size; j++) { calcDists[k].push_back(pDataArray[i]->calcDists[k][j]); } + } + } + + CloseHandle(hThreadArray[i]); + delete pDataArray[i]; + } + +#endif + } + + if (createPhylip) { + for (int i = 0; i < calcDists.size(); i++) { + if (m->control_pressed) { break; } - string distFileName = outputDir + m->getRootName(m->getSimpleName(sumFileName)) + sumCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + ".dist"; - outputNames.push_back(distFileName); - ofstream outDist; - m->openOutputFile(distFileName, outDist); - outDist.setf(ios::fixed, ios::floatfield); outDist.setf(ios::showpoint); - - //initialize matrix - vector< vector > matrix; //square matrix to represent the distance - matrix.resize(thisLookup.size()); - for (int k = 0; k < thisLookup.size(); k++) { matrix[k].resize(thisLookup.size(), 0.0); } - - - for (int j = 0; j < calcDists[i].size(); j++) { - int row = calcDists[i][j].seq1; - int column = calcDists[i][j].seq2; - float dist = calcDists[i][j].dist; - - matrix[row][column] = dist; - matrix[column][row] = dist; - } + string distFileName = outputDir + m->getRootName(m->getSimpleName(sumFileName)) + sumCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + ".dist"; + outputNames.push_back(distFileName); + ofstream outDist; + m->openOutputFile(distFileName, outDist); + outDist.setf(ios::fixed, ios::floatfield); outDist.setf(ios::showpoint); + + //initialize matrix + vector< vector > matrix; //square matrix to represent the distance + matrix.resize(thisLookup.size()); + for (int k = 0; k < thisLookup.size(); k++) { matrix[k].resize(thisLookup.size(), 0.0); } + + + for (int j = 0; j < calcDists[i].size(); j++) { + int row = calcDists[i][j].seq1; + int column = calcDists[i][j].seq2; + float dist = calcDists[i][j].dist; + + matrix[row][column] = dist; + matrix[column][row] = dist; + } + + //output to file + outDist << thisLookup.size() << endl; + for (int r=0; rgetGroup(); + if (name.length() < 10) { //pad with spaces to make compatible + while (name.length() < 10) { name += " "; } + } + outDist << name << '\t'; - //output to file - outDist << thisLookup.size() << endl; - for (int r=0; rgetGroup(); - if (name.length() < 10) { //pad with spaces to make compatible - while (name.length() < 10) { name += " "; } - } - outDist << name << '\t'; - - //output distances - for (int l = 0; l < r; l++) { outDist << matrix[r][l] << '\t'; } - outDist << endl; - } - - outDist.close(); - } - } + //output distances + for (int l = 0; l < r; l++) { outDist << matrix[r][l] << '\t'; } + outDist << endl; + } + + outDist.close(); + } + } return 0; } catch(exception& e) { diff --git a/summarysharedcommand.h b/summarysharedcommand.h index f366d0f..2ffe90d 100644 --- a/summarysharedcommand.h +++ b/summarysharedcommand.h @@ -15,6 +15,47 @@ #include "inputdata.h" #include "calculator.h" #include "validcalculator.h" +#include "sharedsobscollectsummary.h" +#include "sharedchao1.h" +#include "sharedace.h" +#include "sharednseqs.h" +#include "sharedjabund.h" +#include "sharedsorabund.h" +#include "sharedjclass.h" +#include "sharedsorclass.h" +#include "sharedjest.h" +#include "sharedsorest.h" +#include "sharedthetayc.h" +#include "sharedthetan.h" +#include "sharedkstest.h" +#include "whittaker.h" +#include "sharedochiai.h" +#include "sharedanderbergs.h" +#include "sharedkulczynski.h" +#include "sharedkulczynskicody.h" +#include "sharedlennon.h" +#include "sharedmorisitahorn.h" +#include "sharedbraycurtis.h" +#include "sharedjackknife.h" +#include "whittaker.h" +#include "odum.h" +#include "canberra.h" +#include "structeuclidean.h" +#include "structchord.h" +#include "hellinger.h" +#include "manhattan.h" +#include "structpearson.h" +#include "soergel.h" +#include "spearman.h" +#include "structkulczynski.h" +#include "structchi2.h" +#include "speciesprofile.h" +#include "hamming.h" +#include "gower.h" +#include "memchi2.h" +#include "memchord.h" +#include "memeuclidean.h" +#include "mempearson.h" class SummarySharedCommand : public Command { @@ -55,4 +96,184 @@ private: }; +/**************************************************************************************************/ +//custom data structure for threads to use. +//main process handling the calcs that can do more than 2 groups +// This is passed by void pointer so it can be any data type +// that can be passed using a single void pointer (LPVOID). +struct summarySharedData { + vector thisLookup; + vector< vector > calcDists; + vector Estimators; + unsigned long long start; + unsigned long long end; + MothurOut* m; + string sumFile; + + summarySharedData(){} + summarySharedData(string sf, MothurOut* mout, unsigned long long st, unsigned long long en, vector est, vector lu) { + sumFile = sf; + m = mout; + start = st; + end = en; + Estimators = est; + thisLookup = lu; + } +}; +/**************************************************************************************************/ +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) +#else +static DWORD WINAPI MySummarySharedThreadFunction(LPVOID lpParam){ + summarySharedData* pDataArray; + pDataArray = (summarySharedData*)lpParam; + + try { + + vector sumCalculators; + ValidCalculators validCalculator; + for (int i=0; iEstimators.size(); i++) { + if (validCalculator.isValidCalculator("sharedsummary", pDataArray->Estimators[i]) == true) { + if (pDataArray->Estimators[i] == "sharedsobs") { + sumCalculators.push_back(new SharedSobsCS()); + }else if (pDataArray->Estimators[i] == "sharedchao") { + sumCalculators.push_back(new SharedChao1()); + }else if (pDataArray->Estimators[i] == "sharedace") { + sumCalculators.push_back(new SharedAce()); + }else if (pDataArray->Estimators[i] == "jabund") { + sumCalculators.push_back(new JAbund()); + }else if (pDataArray->Estimators[i] == "sorabund") { + sumCalculators.push_back(new SorAbund()); + }else if (pDataArray->Estimators[i] == "jclass") { + sumCalculators.push_back(new Jclass()); + }else if (pDataArray->Estimators[i] == "sorclass") { + sumCalculators.push_back(new SorClass()); + }else if (pDataArray->Estimators[i] == "jest") { + sumCalculators.push_back(new Jest()); + }else if (pDataArray->Estimators[i] == "sorest") { + sumCalculators.push_back(new SorEst()); + }else if (pDataArray->Estimators[i] == "thetayc") { + sumCalculators.push_back(new ThetaYC()); + }else if (pDataArray->Estimators[i] == "thetan") { + sumCalculators.push_back(new ThetaN()); + }else if (pDataArray->Estimators[i] == "kstest") { + sumCalculators.push_back(new KSTest()); + }else if (pDataArray->Estimators[i] == "sharednseqs") { + sumCalculators.push_back(new SharedNSeqs()); + }else if (pDataArray->Estimators[i] == "ochiai") { + sumCalculators.push_back(new Ochiai()); + }else if (pDataArray->Estimators[i] == "anderberg") { + sumCalculators.push_back(new Anderberg()); + }else if (pDataArray->Estimators[i] == "kulczynski") { + sumCalculators.push_back(new Kulczynski()); + }else if (pDataArray->Estimators[i] == "kulczynskicody") { + sumCalculators.push_back(new KulczynskiCody()); + }else if (pDataArray->Estimators[i] == "lennon") { + sumCalculators.push_back(new Lennon()); + }else if (pDataArray->Estimators[i] == "morisitahorn") { + sumCalculators.push_back(new MorHorn()); + }else if (pDataArray->Estimators[i] == "braycurtis") { + sumCalculators.push_back(new BrayCurtis()); + }else if (pDataArray->Estimators[i] == "whittaker") { + sumCalculators.push_back(new Whittaker()); + }else if (pDataArray->Estimators[i] == "odum") { + sumCalculators.push_back(new Odum()); + }else if (pDataArray->Estimators[i] == "canberra") { + sumCalculators.push_back(new Canberra()); + }else if (pDataArray->Estimators[i] == "structeuclidean") { + sumCalculators.push_back(new StructEuclidean()); + }else if (pDataArray->Estimators[i] == "structchord") { + sumCalculators.push_back(new StructChord()); + }else if (pDataArray->Estimators[i] == "hellinger") { + sumCalculators.push_back(new Hellinger()); + }else if (pDataArray->Estimators[i] == "manhattan") { + sumCalculators.push_back(new Manhattan()); + }else if (pDataArray->Estimators[i] == "structpearson") { + sumCalculators.push_back(new StructPearson()); + }else if (pDataArray->Estimators[i] == "soergel") { + sumCalculators.push_back(new Soergel()); + }else if (pDataArray->Estimators[i] == "spearman") { + sumCalculators.push_back(new Spearman()); + }else if (pDataArray->Estimators[i] == "structkulczynski") { + sumCalculators.push_back(new StructKulczynski()); + }else if (pDataArray->Estimators[i] == "speciesprofile") { + sumCalculators.push_back(new SpeciesProfile()); + }else if (pDataArray->Estimators[i] == "hamming") { + sumCalculators.push_back(new Hamming()); + }else if (pDataArray->Estimators[i] == "structchi2") { + sumCalculators.push_back(new StructChi2()); + }else if (pDataArray->Estimators[i] == "gower") { + sumCalculators.push_back(new Gower()); + }else if (pDataArray->Estimators[i] == "memchi2") { + sumCalculators.push_back(new MemChi2()); + }else if (pDataArray->Estimators[i] == "memchord") { + sumCalculators.push_back(new MemChord()); + }else if (pDataArray->Estimators[i] == "memeuclidean") { + sumCalculators.push_back(new MemEuclidean()); + }else if (pDataArray->Estimators[i] == "mempearson") { + sumCalculators.push_back(new MemPearson()); + } + } + } + + pDataArray->calcDists.resize(sumCalculators.size()); + + ofstream outputFileHandle; + pDataArray->m->openOutputFile(pDataArray->sumFile, outputFileHandle); + + vector subset; + for (int k = pDataArray->start; k < pDataArray->end; k++) { // pass cdd each set of groups to compare + + for (int l = 0; l < k; l++) { + + outputFileHandle << pDataArray->thisLookup[0]->getLabel() << '\t'; + + subset.clear(); //clear out old pair of sharedrabunds + //add new pair of sharedrabunds + subset.push_back(pDataArray->thisLookup[k]); subset.push_back(pDataArray->thisLookup[l]); + + //sort groups to be alphanumeric + if (pDataArray->thisLookup[k]->getGroup() > pDataArray->thisLookup[l]->getGroup()) { + outputFileHandle << (pDataArray->thisLookup[l]->getGroup() +'\t' + pDataArray->thisLookup[k]->getGroup()) << '\t'; //print out groups + }else{ + outputFileHandle << (pDataArray->thisLookup[k]->getGroup() +'\t' + pDataArray->thisLookup[l]->getGroup()) << '\t'; //print out groups + } + + for(int i=0;igetNeedsAll()) { + //load subset with rest of lookup for those calcs that need everyone to calc for a pair + for (int w = 0; w < pDataArray->thisLookup.size(); w++) { + if ((w != k) && (w != l)) { subset.push_back(pDataArray->thisLookup[w]); } + } + } + + vector tempdata = sumCalculators[i]->getValues(subset); //saves the calculator outputs + + if (pDataArray->m->control_pressed) { for(int i=0;iprint(outputFileHandle); + + seqDist temp(l, k, tempdata[0]); + pDataArray->calcDists[i].push_back(temp); + } + outputFileHandle << endl; + } + } + + outputFileHandle.close(); + for(int i=0;im->errorOut(e, "SummarySharedCommand", "MySummarySharedThreadFunction"); + exit(1); + } +} +#endif + + #endif -- 2.39.2