From 2cfb747aa8f63bde9c1114001e6d2e81ccd26178 Mon Sep 17 00:00:00 2001 From: SarahsWork Date: Fri, 19 Apr 2013 09:37:01 -0400 Subject: [PATCH] paralellized unifrac.weighted for windows. added get.metacommunity command. fixed bug in sequence class filterToPos that was effecting pcr.seqs trimming. {For forward primer trimming with aligned sequences and keepdots=t. If the character before the first primer base was a base and not a gap the base was not trimmed.} modified setFilePosFasta in mothurOut class to help with large files on 32bit machines. --- getmetacommunitycommand.cpp | 319 ++++++++++++++++++++++++++++++++---- getmetacommunitycommand.h | 117 ++++++++++++- listseqscommand.cpp | 4 +- mothurout.cpp | 18 +- seqsummarycommand.cpp | 8 +- sequence.cpp | 4 +- unifracweightedcommand.cpp | 94 +++++++---- unifracweightedcommand.h | 71 ++++++++ weighted.cpp | 88 ++++++---- weighted.h | 282 +++++++++++++++++++++++++++++++ 10 files changed, 895 insertions(+), 110 deletions(-) diff --git a/getmetacommunitycommand.cpp b/getmetacommunitycommand.cpp index 59efe60..ad069d0 100644 --- a/getmetacommunitycommand.cpp +++ b/getmetacommunitycommand.cpp @@ -7,7 +7,7 @@ // #include "getmetacommunitycommand.h" -#include "qFinderDMM.h" + //********************************************************************************************************************** vector GetMetaCommunityCommand::setParameters(){ @@ -16,8 +16,9 @@ vector GetMetaCommunityCommand::setParameters(){ CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups); CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel); CommandParameter pminpartitions("minpartitions", "Number", "", "5", "", "", "","",false,false,true); parameters.push_back(pminpartitions); - CommandParameter pmaxpartitions("maxpartitions", "Number", "", "10", "", "", "","",false,false,true); parameters.push_back(pmaxpartitions); + CommandParameter pmaxpartitions("maxpartitions", "Number", "", "100", "", "", "","",false,false,true); parameters.push_back(pmaxpartitions); CommandParameter poptimizegap("optimizegap", "Number", "", "3", "", "", "","",false,false,true); parameters.push_back(poptimizegap); + CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); @@ -34,12 +35,13 @@ vector GetMetaCommunityCommand::setParameters(){ string GetMetaCommunityCommand::getHelpString(){ try { string helpString = ""; - helpString += "The get.metacommunity command parameters are shared, label, groups, minpartitions, maxpartitions and optimizegap. The shared file is required. \n"; + helpString += "The get.metacommunity command parameters are shared, label, groups, minpartitions, maxpartitions, optimizegap and processors. The shared file is required. \n"; helpString += "The label parameter is used to analyze specific labels in your input. labels are separated by dashes.\n"; helpString += "The groups parameter allows you to specify which of the groups in your shared file you would like analyzed. Group names are separated by dashes.\n"; helpString += "The minpartitions parameter is used to .... Default=5.\n"; helpString += "The maxpartitions parameter is used to .... Default=10.\n"; helpString += "The optimizegap parameter is used to .... Default=3.\n"; + helpString += "The processors parameter allows you to specify number of processors to use. The default is 1.\n"; helpString += "The get.metacommunity command should be in the following format: get.metacommunity(shared=yourSharedFile).\n"; return helpString; } @@ -55,7 +57,7 @@ string GetMetaCommunityCommand::getOutputPattern(string type) { if (type == "fit") { pattern = "[filename],[distance],mix.fit"; } else if (type == "relabund") { pattern = "[filename],[distance],[tag],mix.relabund"; } - else if (type == "design") { pattern = "[filename],mix.design"; } + else if (type == "design") { pattern = "[filename],[distance],mix.design"; } else if (type == "matrix") { pattern = "[filename],[distance],[tag],mix.posterior"; } else if (type == "parameters") { pattern = "[filename],[distance],mix.parameters"; } else if (type == "summary") { pattern = "[filename],[distance],mix.summary"; } @@ -154,6 +156,10 @@ GetMetaCommunityCommand::GetMetaCommunityCommand(string option) { temp = validParameter.validFile(parameters, "optimizegap", false); if (temp == "not found"){ temp = "3"; } m->mothurConvert(temp, optimizegap); + temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); } + m->setProcessors(temp); + m->mothurConvert(temp, processors); + string groups = validParameter.validFile(parameters, "groups", false); if (groups == "not found") { groups = ""; } else { m->splitAtDash(groups, Groups); } @@ -197,7 +203,7 @@ int GetMetaCommunityCommand::execute(){ m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine(); - process(lookup); + createProcesses(lookup); processedLabels.insert(lookup[0]->getLabel()); userLabels.erase(lookup[0]->getLabel()); @@ -210,7 +216,7 @@ int GetMetaCommunityCommand::execute(){ lookup = input.getSharedRAbundVectors(lastLabel); m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine(); - process(lookup); + createProcesses(lookup); processedLabels.insert(lookup[0]->getLabel()); userLabels.erase(lookup[0]->getLabel()); @@ -251,7 +257,7 @@ int GetMetaCommunityCommand::execute(){ m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine(); - process(lookup); + createProcesses(lookup); for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } } @@ -270,17 +276,245 @@ int GetMetaCommunityCommand::execute(){ } } //********************************************************************************************************************** -int GetMetaCommunityCommand::process(vector& thislookup){ +int GetMetaCommunityCommand::createProcesses(vector& thislookup){ try { - double minLaplace = 1e10; + #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) + #else + processors=1; //qFinderDMM not thread safe + #endif + + vector processIDS; + int process = 1; + int num = 0; int minPartition = 0; + + //sanity check + if (maxpartitions < processors) { processors = maxpartitions; } map variables; variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile)); variables["[distance]"] = thislookup[0]->getLabel(); string outputFileName = getOutputFileName("fit", variables); outputNames.push_back(outputFileName); outputTypes["fit"].push_back(outputFileName); + + //divide the partitions between the processors + vector< vector > dividedPartitions; + vector< vector > rels, matrix; + vector doneFlags; + dividedPartitions.resize(processors); + rels.resize(processors); + matrix.resize(processors); + + //for each file group figure out which process will complete it + //want to divide the load intelligently so the big files are spread between processes + for (int i=1; i<=maxpartitions; i++) { + //cout << i << endl; + int processToAssign = (i+1) % processors; + if (processToAssign == 0) { processToAssign = processors; } + + if (m->debug) { m->mothurOut("[DEBUG]: assigning " + toString(i) + " to process " + toString(processToAssign-1) + "\n"); } + dividedPartitions[(processToAssign-1)].push_back(i); + + variables["[tag]"] = toString(i); + string relName = getOutputFileName("relabund", variables); + string mName = getOutputFileName("matrix", variables); + rels[(processToAssign-1)].push_back(relName); + matrix[(processToAssign-1)].push_back(mName); + } + + for (int i = 0; i < processors; i++) { //read from everyone elses, just write to yours + string tempDoneFile = toString(i) + ".done.temp"; + doneFlags.push_back(tempDoneFile); + ofstream out; + m->openOutputFile(tempDoneFile, out); //clear out + out.close(); + } + + +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) + + //loop through and create all the processes you want + while (process != processors) { + int pid = fork(); + + if (pid > 0) { + processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later + process++; + }else if (pid == 0){ + outputNames.clear(); + num = processDriver(thislookup, dividedPartitions[process], (outputFileName + toString(getpid())), rels[process], matrix[process], doneFlags, process); + + //pass numSeqs to parent + ofstream out; + string tempFile = toString(getpid()) + ".outputNames.temp"; + m->openOutputFile(tempFile, out); + out << num << endl; + out << outputNames.size() << endl; + for (int i = 0; i < outputNames.size(); i++) { out << outputNames[i] << endl; } + out.close(); + + exit(0); + }else { + m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); + for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); } + exit(0); + } + } + + //do my part + minPartition = processDriver(thislookup, dividedPartitions[0], outputFileName, rels[0], matrix[0], doneFlags, 0); + + //force parent to wait until all the processes are done + for (int i=0;i tempOutputNames = outputNames; + for (int i=0;iopenInputFile(tempFile, in); + if (!in.eof()) { + int tempNum = 0; + in >> tempNum; m->gobble(in); + if (tempNum < minPartition) { minPartition = tempNum; } + in >> tempNum; m->gobble(in); + for (int i = 0; i < tempNum; i++) { + string tempName = ""; + in >> tempName; m->gobble(in); + tempOutputNames.push_back(tempName); + } + } + in.close(); m->mothurRemove(tempFile); + + m->appendFilesWithoutHeaders(outputFileName + toString(processIDS[i]), outputFileName); + m->mothurRemove(outputFileName + toString(processIDS[i])); + } + + if (processors > 1) { + outputNames.clear(); + for (int i = 0; i < tempOutputNames.size(); i++) { //remove files if needed + string name = tempOutputNames[i]; + vector parts; + m->splitAtChar(name, parts, '.'); + bool keep = true; + if (((parts[parts.size()-1] == "relabund") || (parts[parts.size()-1] == "posterior")) && (parts[parts.size()-2] == "mix")) { + string tempNum = parts[parts.size()-3]; + int num; m->mothurConvert(tempNum, num); + //if (num > minPartition) { + // m->mothurRemove(tempOutputNames[i]); + // keep = false; if (m->debug) { m->mothurOut("[DEBUG]: removing " + tempOutputNames[i] + ".\n"); } + //} + } + if (keep) { outputNames.push_back(tempOutputNames[i]); } + } + + //reorder fit file + ifstream in; + m->openInputFile(outputFileName, in); + string headers = m->getline(in); m->gobble(in); + + map file; + while (!in.eof()) { + string numString, line; + int num; + in >> numString; line = m->getline(in); m->gobble(in); + m->mothurConvert(numString, num); + file[num] = line; + } + in.close(); + ofstream out; + m->openOutputFile(outputFileName, out); + out << headers << endl; + for (map::iterator it = file.begin(); it != file.end(); it++) { + out << it->first << '\t' << it->second << endl; + if (m->debug) { m->mothurOut("[DEBUG]: printing: " + toString(it->first) + '\t' + it->second + ".\n"); } + } + out.close(); + } + +#else + /* + vector pDataArray; + DWORD dwThreadIdArray[processors-1]; + HANDLE hThreadArray[processors-1]; + + //Create processor worker threads. + for( int i=1; i newLookup; + for (int k = 0; k < thislookup.size(); k++) { + SharedRAbundVector* temp = new SharedRAbundVector(); + temp->setLabel(thislookup[k]->getLabel()); + temp->setGroup(thislookup[k]->getGroup()); + newLookup.push_back(temp); + } + + //for each bin + for (int k = 0; k < thislookup[0]->getNumBins(); k++) { + if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } return 0; } + for (int j = 0; j < thislookup.size(); j++) { newLookup[j]->push_back(thislookup[j]->getAbundance(k), thislookup[j]->getGroup()); } + } + + processIDS.push_back(i); + + // Allocate memory for thread data. + metaCommunityData* tempMeta = new metaCommunityData(newLookup, m, dividedPartitions[i], outputFileName + toString(i), rels[i], matrix[i], minpartitions, maxpartitions, optimizegap); + pDataArray.push_back(tempMeta); + + hThreadArray[i] = CreateThread(NULL, 0, MyMetaCommunityThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]); + } + + //do my part + minPartition = processDriver(thislookup, dividedPartitions[0], outputFileName, rels[0], matrix[0]); + + //Wait until all threads have terminated. + WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE); + + //Close all thread handles and free memory allocations. + for(int i=0; i < pDataArray.size(); i++){ + if (pDataArray[i]->minPartition < minPartition) { minPartition = pDataArray[i]->minPartition; } + for (int j = 0; j < pDataArray[i]->outputNames.size(); j++) { + outputNames.push_back(pDataArray[i]->outputNames[j]); + } + m->appendFilesWithoutHeaders(outputFileName + toString(processIDS[i]), outputFileName); + m->mothurRemove(outputFileName + toString(processIDS[i])); + CloseHandle(hThreadArray[i]); + delete pDataArray[i]; + } */ + //do my part + minPartition = processDriver(thislookup, dividedPartitions[0], outputFileName, rels[0], matrix[0], doneFlags, 0); +#endif + for (int i = 0; i < processors; i++) { //read from everyone elses, just write to yours + string tempDoneFile = toString(i) + ".done.temp"; + m->mothurRemove(tempDoneFile); + } + + if (m->control_pressed) { return 0; } + + if (m->debug) { m->mothurOut("[DEBUG]: minPartition = " + toString(minPartition) + "\n"); } + + //run generate Summary function for smallest minPartition + variables["[tag]"] = toString(minPartition); + generateSummaryFile(minPartition, variables); + + return 0; + + } + catch(exception& e) { + m->errorOut(e, "GetMetaCommunityCommand", "createProcesses"); + exit(1); + } +} +//********************************************************************************************************************** +int GetMetaCommunityCommand::processDriver(vector& thislookup, vector& parts, string outputFileName, vector relabunds, vector matrix, vector doneFlags, int processID){ + try { + + double minLaplace = 1e10; + int minPartition = 0; ofstream fitData; m->openOutputFile(outputFileName, fitData); @@ -295,10 +529,26 @@ int GetMetaCommunityCommand::process(vector& thislookup){ m->mothurOut("K\tNLE\t\tlogDet\tBIC\t\tAIC\t\tLaplace\n"); fitData << "K\tNLE\tlogDet\tBIC\tAIC\tLaplace" << endl; - for(int numPartitions=1;numPartitions<=maxpartitions;numPartitions++){ + for(int i=0;idebug) { m->mothurOut("[DEBUG]: running partition " + toString(numPartitions) + "\n"); } if (m->control_pressed) { break; } + //check to see if anyone else is done + for (int j = 0; j < doneFlags.size(); j++) { + if (!m->isBlank(doneFlags[j])) { //another process has finished + //are they done at a lower partition? + ifstream in; + m->openInputFile(doneFlags[j], in); + int tempNum; + in >> tempNum; in.close(); + if (tempNum < numPartitions) { break; } //quit, because someone else has finished + } + } + qFinderDMM findQ(sharedMatrix, numPartitions); double laplace = findQ.getLaplace(); @@ -319,43 +569,47 @@ int GetMetaCommunityCommand::process(vector& thislookup){ } m->mothurOutEndLine(); - variables["[tag]"] = toString(numPartitions); - string relabund = getOutputFileName("relabund", variables); + string relabund = relabunds[i]; outputNames.push_back(relabund); outputTypes["relabund"].push_back(relabund); - string matrix = getOutputFileName("matrix", variables); - outputNames.push_back(matrix); outputTypes["matrix"].push_back(matrix); + string matrixName = matrix[i]; + outputNames.push_back(matrixName); outputTypes["matrix"].push_back(matrixName); - findQ.printZMatrix(matrix, m->getGroups()); + findQ.printZMatrix(matrixName, m->getGroups()); findQ.printRelAbund(relabund, m->currentBinLabels); - if(optimizegap != -1 && (numPartitions - minPartition) >= optimizegap && numPartitions >= minpartitions){ break; } + if(optimizegap != -1 && (numPartitions - minPartition) >= optimizegap && numPartitions >= minpartitions){ + string tempDoneFile = toString(processID) + ".done.temp"; + ofstream outDone; + m->openOutputFile(tempDoneFile, outDone); + outDone << minPartition << endl; + outDone.close(); + break; + } } fitData.close(); //minPartition = 4; if (m->control_pressed) { return 0; } - - generateSummaryFile(minpartitions, outputTypes["relabund"][0], outputTypes["relabund"][outputTypes["relabund"].size()-1], outputTypes["matrix"][outputTypes["matrix"].size()-1], thislookup[0]->getLabel()); - - return 0; + return minPartition; } catch(exception& e) { - m->errorOut(e, "GetMetaCommunityCommand", "process"); + m->errorOut(e, "GetMetaCommunityCommand", "processDriver"); exit(1); } } /**************************************************************************************************/ -vector GetMetaCommunityCommand::generateDesignFile(int numPartitions, string input){ +vector GetMetaCommunityCommand::generateDesignFile(int numPartitions, map variables){ try { vector piValues(numPartitions, 0); ifstream postFile; + variables["[tag]"] = toString(numPartitions); + string input = getOutputFileName("matrix", variables); m->openInputFile(input, postFile);//((fileRoot + toString(numPartitions) + "mix.posterior").c_str()); //matrix file - map variables; - variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(input)); + variables.erase("[tag]"); string outputFileName = getOutputFileName("design", variables); ofstream designFile; m->openOutputFile(outputFileName, designFile); @@ -416,7 +670,7 @@ vector GetMetaCommunityCommand::generateDesignFile(int numPartitions, st inline bool summaryFunction(summaryData i, summaryData j){ return i.difference > j.difference; } /**************************************************************************************************/ -int GetMetaCommunityCommand::generateSummaryFile(int numPartitions, string reference, string partFile, string designInput, string label){ +int GetMetaCommunityCommand::generateSummaryFile(int numPartitions, map v){ try { vector summary; @@ -428,10 +682,17 @@ int GetMetaCommunityCommand::generateSummaryFile(int numPartitions, string refer double mean, lci, uci; - vector piValues = generateDesignFile(numPartitions, designInput); + vector piValues = generateDesignFile(numPartitions, v); ifstream referenceFile; + map variables; + variables["[filename]"] = v["[filename]"]; + variables["[distance]"] = v["[distance]"]; + variables["[tag]"] = "1"; + string reference = getOutputFileName("relabund", variables); m->openInputFile(reference, referenceFile); //((fileRoot + label + ".1mix.relabund").c_str()); + variables["[tag]"] = toString(numPartitions); + string partFile = getOutputFileName("relabund", variables); ifstream partitionFile; m->openInputFile(partFile, partitionFile); //((fileRoot + toString(numPartitions) + "mix.relabund").c_str()); @@ -486,9 +747,7 @@ int GetMetaCommunityCommand::generateSummaryFile(int numPartitions, string refer sort(summary.begin(), summary.end(), summaryFunction); - map variables; - variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile)); - variables["[distance]"] = label; + variables.erase("[tag]"); string outputFileName = getOutputFileName("parameters", variables); outputNames.push_back(outputFileName); outputTypes["parameters"].push_back(outputFileName); @@ -509,7 +768,7 @@ int GetMetaCommunityCommand::generateSummaryFile(int numPartitions, string refer if (m->control_pressed) { return 0; } string summaryFileName = getOutputFileName("summary", variables); - outputNames.push_back(outputFileName); outputTypes["summary"].push_back(outputFileName); + outputNames.push_back(summaryFileName); outputTypes["summary"].push_back(summaryFileName); ofstream summaryFile; m->openOutputFile(summaryFileName, summaryFile); //((fileRoot + "mix.summary").c_str()); diff --git a/getmetacommunitycommand.h b/getmetacommunitycommand.h index 62070d6..2264c87 100644 --- a/getmetacommunitycommand.h +++ b/getmetacommunitycommand.h @@ -11,6 +11,7 @@ #include "command.hpp" #include "inputdata.h" +#include "qFinderDMM.h" /**************************************************************************************************/ @@ -34,17 +35,23 @@ public: void help() { m->mothurOut(getHelpString()); } private: + struct linePair { + unsigned long long start; + unsigned long long end; + linePair(unsigned long long i, unsigned long long j) : start(i), end(j) {} + }; bool abort, allLines; string outputDir; vector outputNames; string sharedfile; - int minpartitions, maxpartitions, optimizegap; + int minpartitions, maxpartitions, optimizegap, processors; vector Groups; set labels; - int process(vector&); - vector generateDesignFile(int, string); - int generateSummaryFile(int, string, string, string, string); + int processDriver(vector&, vector&, string, vector, vector, vector, int); + int createProcesses(vector&); + vector generateDesignFile(int, map); + int generateSummaryFile(int, map); }; @@ -58,6 +65,108 @@ struct summaryData { }; /**************************************************************************************************/ +struct metaCommunityData { + vector thislookup; + MothurOut* m; + string outputFileName; + vector relabunds, matrix, outputNames; + int minpartitions, maxpartitions, optimizegap; + vector parts; + int minPartition; + + metaCommunityData(){} + metaCommunityData(vector lu, MothurOut* mout, vector dp, string fit, vector rels, vector mat, int minp, int maxp, int opg) { + m = mout; + thislookup = lu; + parts = dp; + outputFileName = fit; + relabunds = rels; + matrix = mat; + minpartitions = minp; + maxpartitions = maxp; + optimizegap = opg; + minPartition = 0; + } +}; +/**************************************************************************************************/ +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) +#else +static DWORD WINAPI MyMetaCommunityThreadFunction(LPVOID lpParam){ + metaCommunityData* pDataArray; + pDataArray = (metaCommunityData*)lpParam; + + try { + + double minLaplace = 1e10; + + ofstream fitData; + pDataArray->m->openOutputFile(pDataArray->outputFileName, fitData); + fitData.setf(ios::fixed, ios::floatfield); + fitData.setf(ios::showpoint); + cout.setf(ios::fixed, ios::floatfield); + cout.setf(ios::showpoint); + + vector< vector > sharedMatrix; + for (int i = 0; i < pDataArray->thislookup.size(); i++) { sharedMatrix.push_back(pDataArray->thislookup[i]->getAbundances()); } + + pDataArray->m->mothurOut("K\tNLE\t\tlogDet\tBIC\t\tAIC\t\tLaplace\n"); + fitData << "K\tNLE\tlogDet\tBIC\tAIC\tLaplace" << endl; + + for(int i=0;iparts.size();i++){ + + int numPartitions = pDataArray->parts[i]; + + if (pDataArray->m->debug) { pDataArray->m->mothurOut("[DEBUG]: running partition " + toString(numPartitions) + "\n"); } + + if (pDataArray->m->control_pressed) { break; } + + qFinderDMM* findQ = new qFinderDMM(sharedMatrix, numPartitions); + + if (pDataArray->m->debug) { pDataArray->m->mothurOut("[DEBUG]: done finding Q " + toString(numPartitions) + "\n"); } + + double laplace = findQ->getLaplace(); + pDataArray->m->mothurOut(toString(numPartitions) + '\t'); + cout << setprecision (2) << findQ->getNLL() << '\t' << findQ->getLogDet() << '\t'; + pDataArray->m->mothurOutJustToLog(toString(findQ->getNLL()) + '\t' + toString(findQ->getLogDet()) + '\t'); + cout << findQ->getBIC() << '\t' << findQ->getAIC() << '\t' << laplace; + pDataArray->m->mothurOutJustToLog(toString(findQ->getBIC()) + '\t' + toString(findQ->getAIC()) + '\t' + toString(laplace)); + + fitData << numPartitions << '\t'; + fitData << setprecision (2) << findQ->getNLL() << '\t' << findQ->getLogDet() << '\t'; + fitData << findQ->getBIC() << '\t' << findQ->getAIC() << '\t' << laplace << endl; + + if(laplace < minLaplace){ + pDataArray->minPartition = numPartitions; + minLaplace = laplace; + pDataArray->m->mothurOut("***"); + } + pDataArray->m->mothurOutEndLine(); + + pDataArray->outputNames.push_back(pDataArray->relabunds[i]); + pDataArray->outputNames.push_back(pDataArray->matrix[i]); + + findQ->printZMatrix(pDataArray->matrix[i], pDataArray->m->getGroups()); + findQ->printRelAbund(pDataArray->relabunds[i], pDataArray->m->currentBinLabels); + + if(pDataArray->optimizegap != -1 && (numPartitions - pDataArray->minPartition) >= pDataArray->optimizegap && numPartitions >= pDataArray->minpartitions){ break; } + + delete findQ; + } + fitData.close(); + + //minPartition = 4; + + if (pDataArray->m->control_pressed) { return 0; } + + return 0; + + } + catch(exception& e) { + pDataArray->m->errorOut(e, "GetMetaCommunityCommand", "MyMetaCommunityThreadFunction"); + exit(1); + } +} +#endif diff --git a/listseqscommand.cpp b/listseqscommand.cpp index 8a1effe..d10dfb4 100644 --- a/listseqscommand.cpp +++ b/listseqscommand.cpp @@ -297,7 +297,7 @@ int ListSeqsCommand::readFasta(){ //ofstream out; //string newFastaName = outputDir + m->getRootName(m->getSimpleName(fastafile)) + "numsAdded.fasta"; //m->openOutputFile(newFastaName, out); - //int count = 1; + int count = 1; //string lastName = ""; while(!in.eof()){ @@ -310,7 +310,7 @@ int ListSeqsCommand::readFasta(){ if (name != "") { names.push_back(name); } m->gobble(in); - //count++; + if (m->debug) { count++; cout << "[DEBUG]: count = " + toString(count) + ", name = " + currSeq.getName() + "\n"; } } in.close(); //out.close(); diff --git a/mothurout.cpp b/mothurout.cpp index 8892feb..0efa551 100644 --- a/mothurout.cpp +++ b/mothurout.cpp @@ -1296,15 +1296,15 @@ vector MothurOut::setFilePosFasta(string filename, int& num) char c = inFASTA.get(); count++; if (c == '>') { positions.push_back(count-1); - //cout << count << endl; + if (debug) { mothurOut("[DEBUG]: numSeqs = " + toString(positions.size()) + " count = " + toString(count) + ".\n"); } } } inFASTA.close(); num = positions.size(); - - /*FILE * pFile; - long size; + if (debug) { mothurOut("[DEBUG]: num = " + toString(num) + ".\n"); } + FILE * pFile; + unsigned long long size; //get num bytes in file pFile = fopen (filename.c_str(),"rb"); @@ -1313,9 +1313,9 @@ vector MothurOut::setFilePosFasta(string filename, int& num) fseek (pFile, 0, SEEK_END); size=ftell (pFile); fclose (pFile); - }*/ + } - unsigned long long size = positions[(positions.size()-1)]; + /*unsigned long long size = positions[(positions.size()-1)]; ifstream in; openInputFile(filename, in); @@ -1325,8 +1325,10 @@ vector MothurOut::setFilePosFasta(string filename, int& num) if(in.eof()) { break; } else { size++; } } - in.close(); - + in.close();*/ + + if (debug) { mothurOut("[DEBUG]: size = " + toString(size) + ".\n"); } + positions.push_back(size); positions[0] = 0; diff --git a/seqsummarycommand.cpp b/seqsummarycommand.cpp index e9002bd..90a66f8 100644 --- a/seqsummarycommand.cpp +++ b/seqsummarycommand.cpp @@ -452,11 +452,15 @@ int SeqSummaryCommand::driverCreateSummary(vector& startPosition, vectorcontrol_pressed) { in.close(); outSummary.close(); return 1; } - + + if (m->debug) { m->mothurOut("[DEBUG]: count = " + toString(count) + "\n"); } + Sequence current(in); m->gobble(in); if (current.getName() != "") { + if (m->debug) { m->mothurOut("[DEBUG]: " + current.getName() + '\t' + toString(current.getNumBases()) + "\n"); } + int num = 1; if ((namefile != "") || (countfile != "")) { //make sure this sequence is in the namefile, else error @@ -480,6 +484,8 @@ int SeqSummaryCommand::driverCreateSummary(vector& startPosition, vectordebug) { m->mothurOut("[DEBUG]: " + current.getName() + '\t' + toString(current.getNumBases()) + "\n"); } } #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) diff --git a/sequence.cpp b/sequence.cpp index d1cf729..ddc7d4c 100644 --- a/sequence.cpp +++ b/sequence.cpp @@ -683,12 +683,12 @@ int Sequence::filterToPos(int start){ if (start > aligned.length()) { start = aligned.length(); m->mothurOut("[ERROR]: start to large.\n"); } - for(int j = 0; j < start-1; j++) { + for(int j = 0; j < start; j++) { aligned[j] = '.'; } //things like ......----------AT become ................AT - for(int j = start-1; j < aligned.length(); j++) { + for(int j = start; j < aligned.length(); j++) { if (isalpha(aligned[j])) { break; } else { aligned[j] = '.'; } } diff --git a/unifracweightedcommand.cpp b/unifracweightedcommand.cpp index 94ae125..81ea326 100644 --- a/unifracweightedcommand.cpp +++ b/unifracweightedcommand.cpp @@ -698,39 +698,32 @@ int UnifracWeightedCommand::runRandomCalcs(Tree* thisTree, vector usersS lines.clear(); -#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) - if(processors != 1){ - int numPairs = namesOfGroupCombos.size(); - int numPairsPerProcessor = numPairs / processors; + //breakdown work between processors + int numPairs = namesOfGroupCombos.size(); + int numPairsPerProcessor = numPairs / processors; - for (int i = 0; i < processors; i++) { - int startPos = i * numPairsPerProcessor; - if(i == processors - 1){ - numPairsPerProcessor = numPairs - i * numPairsPerProcessor; - } - lines.push_back(linePair(startPos, numPairsPerProcessor)); - } + for (int i = 0; i < processors; i++) { + int startPos = i * numPairsPerProcessor; + if(i == processors - 1){ numPairsPerProcessor = numPairs - i * numPairsPerProcessor; } + lines.push_back(linePair(startPos, numPairsPerProcessor)); } -#endif //get scores for random trees for (int j = 0; j < iters; j++) { - cout << j << endl; -#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) - if(processors == 1){ - driver(thisTree, namesOfGroupCombos, 0, namesOfGroupCombos.size(), rScores); - }else{ +//#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) + //if(processors == 1){ + // driver(thisTree, namesOfGroupCombos, 0, namesOfGroupCombos.size(), rScores); + // }else{ createProcesses(thisTree, namesOfGroupCombos, rScores); - } -#else - driver(thisTree, namesOfGroupCombos, 0, namesOfGroupCombos.size(), rScores); -#endif + // } +//#else + //driver(thisTree, namesOfGroupCombos, 0, namesOfGroupCombos.size(), rScores); +//#endif + if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } delete output; outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } - //report progress - // m->mothurOut("Iter: " + toString(j+1)); m->mothurOutEndLine(); } lines.clear(); @@ -766,12 +759,11 @@ int UnifracWeightedCommand::runRandomCalcs(Tree* thisTree, vector usersS int UnifracWeightedCommand::createProcesses(Tree* t, vector< vector > namesOfGroupCombos, vector< vector >& scores) { try { -#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) - int process = 1; + int process = 1; vector processIDS; - EstOutput results; - + +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) //loop through and create all the processes you want while (process != processors) { int pid = fork(); @@ -817,9 +809,53 @@ int UnifracWeightedCommand::createProcesses(Tree* t, vector< vector > na in.close(); m->mothurRemove(s); } +#else + //fill in functions + vector pDataArray; + DWORD dwThreadIdArray[processors-1]; + HANDLE hThreadArray[processors-1]; + vector cts; + vector trees; - return 0; -#endif + //Create processor worker threads. + for( int i=1; icopy(ct); + Tree* copyTree = new Tree(copyCount); + copyTree->getCopy(t); + + cts.push_back(copyCount); + trees.push_back(copyTree); + + vector< vector > copyScores = rScores; + + weightedRandomData* tempweighted = new weightedRandomData(m, lines[i].start, lines[i].num, namesOfGroupCombos, copyTree, copyCount, includeRoot, copyScores); + pDataArray.push_back(tempweighted); + processIDS.push_back(i); + + hThreadArray[i-1] = CreateThread(NULL, 0, MyWeightedRandomThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]); + } + + driver(t, namesOfGroupCombos, lines[0].start, lines[0].num, scores); + + //Wait until all threads have terminated. + WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE); + + //Close all thread handles and free memory allocations. + for(int i=0; i < pDataArray.size(); i++){ + for (int j = pDataArray[i]->start; j < (pDataArray[i]->start+pDataArray[i]->num); j++) { + scores[j].push_back(pDataArray[i]->scores[j][pDataArray[i]->scores[j].size()-1]); + } + delete cts[i]; + delete trees[i]; + CloseHandle(hThreadArray[i]); + delete pDataArray[i]; + } + + +#endif + + return 0; } catch(exception& e) { m->errorOut(e, "UnifracWeightedCommand", "createProcesses"); diff --git a/unifracweightedcommand.h b/unifracweightedcommand.h index 1c67c32..10cbc7b 100644 --- a/unifracweightedcommand.h +++ b/unifracweightedcommand.h @@ -80,6 +80,77 @@ class UnifracWeightedCommand : public Command { }; +/***********************************************************************/ +struct weightedRandomData { + int start; + int num; + MothurOut* m; + vector< vector > scores; + vector< vector > namesOfGroupCombos; + Tree* t; + CountTable* ct; + bool includeRoot; + + weightedRandomData(){} + weightedRandomData(MothurOut* mout, int st, int en, vector< vector > ngc, Tree* tree, CountTable* count, bool ir, vector< vector > sc) { + m = mout; + start = st; + num = en; + namesOfGroupCombos = ngc; + t = tree; + ct = count; + includeRoot = ir; + scores = sc; + } +}; + +/**************************************************************************************************/ +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) +#else +static DWORD WINAPI MyWeightedRandomThreadFunction(LPVOID lpParam){ + weightedRandomData* pDataArray; + pDataArray = (weightedRandomData*)lpParam; + try { + + Tree* randT = new Tree(pDataArray->ct); + + Weighted weighted(pDataArray->includeRoot); + + for (int h = pDataArray->start; h < (pDataArray->start+pDataArray->num); h++) { + + if (pDataArray->m->control_pressed) { return 0; } + + //initialize weighted score + string groupA = pDataArray->namesOfGroupCombos[h][0]; + string groupB = pDataArray->namesOfGroupCombos[h][1]; + + //copy T[i]'s info. + randT->getCopy(pDataArray->t); + + //create a random tree with same topology as T[i], but different labels + randT->assembleRandomUnifracTree(groupA, groupB); + + if (pDataArray->m->control_pressed) { delete randT; return 0; } + + //get wscore of random tree + EstOutput randomData = weighted.getValues(randT, groupA, groupB); + + if (pDataArray->m->control_pressed) { delete randT; return 0; } + + //save scores + pDataArray->scores[h].push_back(randomData[0]); + } + + delete randT; + + return 0; + } + catch(exception& e) { + pDataArray->m->errorOut(e, "Weighted", "MyWeightedRandomThreadFunction"); + exit(1); + } +} +#endif #endif diff --git a/weighted.cpp b/weighted.cpp index cf1291d..b0d06fb 100644 --- a/weighted.cpp +++ b/weighted.cpp @@ -36,30 +36,19 @@ EstOutput Weighted::getValues(Tree* t, int p, string o) { } } - #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) - if(processors == 1){ - data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size(), ct); - }else{ - int numPairs = namesOfGroupCombos.size(); - - int numPairsPerProcessor = numPairs / processors; - - for (int i = 0; i < processors; i++) { - int startPos = i * numPairsPerProcessor; - if(i == processors - 1){ - numPairsPerProcessor = numPairs - i * numPairsPerProcessor; - } - lines.push_back(linePair(startPos, numPairsPerProcessor)); - } + int numPairs = namesOfGroupCombos.size(); + int numPairsPerProcessor = numPairs / processors; + + for (int i = 0; i < processors; i++) { + int startPos = i * numPairsPerProcessor; + if(i == processors - 1){ numPairsPerProcessor = numPairs - i * numPairsPerProcessor; } + lines.push_back(linePair(startPos, numPairsPerProcessor)); + } + + data = createProcesses(t, namesOfGroupCombos, ct); + + lines.clear(); - data = createProcesses(t, namesOfGroupCombos, ct); - - lines.clear(); - } - #else - data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size(), ct); - #endif - return data; } catch(exception& e) { @@ -71,11 +60,10 @@ EstOutput Weighted::getValues(Tree* t, int p, string o) { EstOutput Weighted::createProcesses(Tree* t, vector< vector > namesOfGroupCombos, CountTable* ct) { try { + vector processIDS; + EstOutput results; #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) int process = 1; - vector processIDS; - - EstOutput results; //loop through and create all the processes you want while (process != processors) { @@ -85,17 +73,12 @@ EstOutput Weighted::createProcesses(Tree* t, vector< vector > namesOfGro processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later process++; }else if (pid == 0){ - EstOutput Myresults; Myresults = driver(t, namesOfGroupCombos, lines[process].start, lines[process].num, ct); - //m->mothurOut("Merging results."); m->mothurOutEndLine(); - //pass numSeqs to parent ofstream out; - string tempFile = outputDir + toString(getpid()) + ".weighted.results.temp"; - m->openOutputFile(tempFile, out); out << Myresults.size() << endl; @@ -143,11 +126,48 @@ EstOutput Weighted::createProcesses(Tree* t, vector< vector > namesOfGro in.close(); m->mothurRemove(s); } +#else + + //fill in functions + vector pDataArray; + DWORD dwThreadIdArray[processors-1]; + HANDLE hThreadArray[processors-1]; + vector cts; + vector trees; + + //Create processor worker threads. + for( int i=1; icopy(ct); + Tree* copyTree = new Tree(copyCount); + copyTree->getCopy(t); + + cts.push_back(copyCount); + trees.push_back(copyTree); + + weightedData* tempweighted = new weightedData(m, lines[i].start, lines[i].num, namesOfGroupCombos, copyTree, copyCount, includeRoot); + pDataArray.push_back(tempweighted); + processIDS.push_back(i); + + hThreadArray[i-1] = CreateThread(NULL, 0, MyWeightedThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]); + } - //m->mothurOut("DONE."); m->mothurOutEndLine(); m->mothurOutEndLine(); + results = driver(t, namesOfGroupCombos, lines[0].start, lines[0].num, ct); + + //Wait until all threads have terminated. + WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE); - return results; -#endif + //Close all thread handles and free memory allocations. + for(int i=0; i < pDataArray.size(); i++){ + for (int j = 0; j < pDataArray[i]->results.size(); j++) { results.push_back(pDataArray[i]->results[j]); } + delete cts[i]; + delete trees[i]; + CloseHandle(hThreadArray[i]); + delete pDataArray[i]; + } +#endif + + return results; } catch(exception& e) { m->errorOut(e, "Weighted", "createProcesses"); diff --git a/weighted.h b/weighted.h index d4082fe..e97f01b 100644 --- a/weighted.h +++ b/weighted.h @@ -47,6 +47,288 @@ class Weighted : public TreeCalculator { }; /***********************************************************************/ +struct weightedData { + int start; + int num; + MothurOut* m; + EstOutput results; + vector< vector > namesOfGroupCombos; + Tree* t; + CountTable* ct; + bool includeRoot; + + + weightedData(){} + weightedData(MothurOut* mout, int st, int en, vector< vector > ngc, Tree* tree, CountTable* count, bool ir) { + m = mout; + start = st; + num = en; + namesOfGroupCombos = ngc; + t = tree; + ct = count; + includeRoot = ir; + } +}; + +/**************************************************************************************************/ +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) +#else +static DWORD WINAPI MyWeightedThreadFunction(LPVOID lpParam){ + weightedData* pDataArray; + pDataArray = (weightedData*)lpParam; + try { + map::iterator it; + vector D; + int count = 0; + map< vector, set > rootForGrouping; + map WScore; + + for (int h = pDataArray->start; h < (pDataArray->start+pDataArray->num); h++) { + + if (pDataArray->m->control_pressed) { return 0; } + + //initialize weighted score + string groupA = pDataArray->namesOfGroupCombos[h][0]; + string groupB = pDataArray->namesOfGroupCombos[h][1]; + + set validBranches; + WScore[groupA+groupB] = 0.0; + D.push_back(0.0000); //initialize a spot in D for each combination + + //adding the wieghted sums from group i + for (int j = 0; j < pDataArray->t->groupNodeInfo[groupA].size(); j++) { //the leaf nodes that have seqs from group i + map::iterator it = pDataArray->t->tree[pDataArray->t->groupNodeInfo[groupA][j]].pcount.find(groupA); + int numSeqsInGroupI = it->second; + + //double sum = getLengthToRoot(pDataArray->t, pDataArray->t->groupNodeInfo[groupA][j], groupA, groupB); + /*************************************************************************************/ + double sum = 0.0; + int index = pDataArray->t->groupNodeInfo[groupA][j]; + + //you are a leaf + if(pDataArray->t->tree[index].getBranchLength() != -1){ sum += abs(pDataArray->t->tree[index].getBranchLength()); } + double tempTotal = 0.0; + index = pDataArray->t->tree[index].getParent(); + + vector grouping; grouping.push_back(groupA); grouping.push_back(groupB); + + rootForGrouping[grouping].insert(index); + + //while you aren't at root + while(pDataArray->t->tree[index].getParent() != -1){ + + if (pDataArray->m->control_pressed) { return 0; } + + int parent = pDataArray->t->tree[index].getParent(); + + if (pDataArray->includeRoot) { //add everyone + if(pDataArray->t->tree[index].getBranchLength() != -1){ sum += abs(pDataArray->t->tree[index].getBranchLength()); } + }else { + + //am I the root for this grouping? if so I want to stop "early" + //does my sibling have descendants from the users groups? + int lc = pDataArray->t->tree[parent].getLChild(); + int rc = pDataArray->t->tree[parent].getRChild(); + + int sib = lc; + if (lc == index) { sib = rc; } + + map::iterator itGroup; + int pcountSize = 0; + itGroup = pDataArray->t->tree[sib].pcount.find(groupA); + if (itGroup != pDataArray->t->tree[sib].pcount.end()) { pcountSize++; } + itGroup = pDataArray->t->tree[sib].pcount.find(groupB); + if (itGroup != pDataArray->t->tree[sib].pcount.end()) { pcountSize++; } + + //if yes, I am not the root so add me + if (pcountSize != 0) { + if (pDataArray->t->tree[index].getBranchLength() != -1) { + sum += abs(pDataArray->t->tree[index].getBranchLength()) + tempTotal; + tempTotal = 0.0; + }else { + sum += tempTotal; + tempTotal = 0.0; + } + rootForGrouping[grouping].clear(); + rootForGrouping[grouping].insert(parent); + }else { //if no, I may be the root so add my br to tempTotal until I am proven innocent + if (pDataArray->t->tree[index].getBranchLength() != -1) { + tempTotal += abs(pDataArray->t->tree[index].getBranchLength()); + } + } + } + + index = parent; + } + + //get all nodes above the root to add so we don't add their u values above + index = *(rootForGrouping[grouping].begin()); + + while(pDataArray->t->tree[index].getParent() != -1){ + int parent = pDataArray->t->tree[index].getParent(); + rootForGrouping[grouping].insert(parent); + index = parent; + } + + /*************************************************************************************/ + double weightedSum = ((numSeqsInGroupI * sum) / (double)pDataArray->ct->getGroupCount(groupA)); + + D[count] += weightedSum; + } + + //adding the wieghted sums from group l + for (int j = 0; j < pDataArray->t->groupNodeInfo[groupB].size(); j++) { //the leaf nodes that have seqs from group l + map::iterator it = pDataArray->t->tree[pDataArray->t->groupNodeInfo[groupB][j]].pcount.find(groupB); + int numSeqsInGroupL = it->second; + + //double sum = getLengthToRoot(pDataArray->t, pDataArray->t->groupNodeInfo[groupB][j], groupA, groupB); + /*************************************************************************************/ + double sum = 0.0; + int index = pDataArray->t->groupNodeInfo[groupB][j]; + + //you are a leaf + if(pDataArray->t->tree[index].getBranchLength() != -1){ sum += abs(pDataArray->t->tree[index].getBranchLength()); } + double tempTotal = 0.0; + index = pDataArray->t->tree[index].getParent(); + + vector grouping; grouping.push_back(groupA); grouping.push_back(groupB); + + rootForGrouping[grouping].insert(index); + + //while you aren't at root + while(pDataArray->t->tree[index].getParent() != -1){ + + if (pDataArray->m->control_pressed) { return 0; } + + int parent = pDataArray->t->tree[index].getParent(); + + if (pDataArray->includeRoot) { //add everyone + if(pDataArray->t->tree[index].getBranchLength() != -1){ sum += abs(pDataArray->t->tree[index].getBranchLength()); } + }else { + + //am I the root for this grouping? if so I want to stop "early" + //does my sibling have descendants from the users groups? + int lc = pDataArray->t->tree[parent].getLChild(); + int rc = pDataArray->t->tree[parent].getRChild(); + + int sib = lc; + if (lc == index) { sib = rc; } + + map::iterator itGroup; + int pcountSize = 0; + itGroup = pDataArray->t->tree[sib].pcount.find(groupA); + if (itGroup != pDataArray->t->tree[sib].pcount.end()) { pcountSize++; } + itGroup = pDataArray->t->tree[sib].pcount.find(groupB); + if (itGroup != pDataArray->t->tree[sib].pcount.end()) { pcountSize++; } + + //if yes, I am not the root so add me + if (pcountSize != 0) { + if (pDataArray->t->tree[index].getBranchLength() != -1) { + sum += abs(pDataArray->t->tree[index].getBranchLength()) + tempTotal; + tempTotal = 0.0; + }else { + sum += tempTotal; + tempTotal = 0.0; + } + rootForGrouping[grouping].clear(); + rootForGrouping[grouping].insert(parent); + }else { //if no, I may be the root so add my br to tempTotal until I am proven innocent + if (pDataArray->t->tree[index].getBranchLength() != -1) { + tempTotal += abs(pDataArray->t->tree[index].getBranchLength()); + } + } + } + + index = parent; + } + + //get all nodes above the root to add so we don't add their u values above + index = *(rootForGrouping[grouping].begin()); + + while(pDataArray->t->tree[index].getParent() != -1){ + int parent = pDataArray->t->tree[index].getParent(); + rootForGrouping[grouping].insert(parent); + index = parent; + } + + /*************************************************************************************/ + + double weightedSum = ((numSeqsInGroupL * sum) / (double)pDataArray->ct->getGroupCount(groupB)); + + D[count] += weightedSum; + } + count++; + } + + //calculate u for the group comb + for (int h = pDataArray->start; h < (pDataArray->start+pDataArray->num); h++) { + //report progress + //pDataArray->m->mothurOut("Processing combo: " + toString(h)); pDataArray->m->mothurOutEndLine(); + + string groupA = pDataArray->namesOfGroupCombos[h][0]; + string groupB = pDataArray->namesOfGroupCombos[h][1]; + + //calculate u for the group comb + for(int i=0;it->getNumNodes();i++){ + + if (pDataArray->m->control_pressed) { return 0; } + + double u; + //int pcountSize = 0; + //does this node have descendants from groupA + it = pDataArray->t->tree[i].pcount.find(groupA); + //if it does u = # of its descendants with a certain group / total number in tree with a certain group + if (it != pDataArray->t->tree[i].pcount.end()) { + u = (double) pDataArray->t->tree[i].pcount[groupA] / (double) pDataArray->ct->getGroupCount(groupA); + }else { u = 0.00; } + + + //does this node have descendants from group l + it = pDataArray->t->tree[i].pcount.find(groupB); + + //if it does subtract their percentage from u + if (it != pDataArray->t->tree[i].pcount.end()) { + u -= (double) pDataArray->t->tree[i].pcount[groupB] / (double) pDataArray->ct->getGroupCount(groupB); + } + + if (pDataArray->includeRoot) { + if (pDataArray->t->tree[i].getBranchLength() != -1) { + u = abs(u * pDataArray->t->tree[i].getBranchLength()); + WScore[(groupA+groupB)] += u; + } + }else { + //if this is not the root then add it + if (rootForGrouping[pDataArray->namesOfGroupCombos[h]].count(i) == 0) { + if (pDataArray->t->tree[i].getBranchLength() != -1) { + u = abs(u * pDataArray->t->tree[i].getBranchLength()); + WScore[(groupA+groupB)] += u; + } + } + } + } + + } + + /********************************************************/ + //calculate weighted score for the group combination + double UN; + count = 0; + for (int h = pDataArray->start; h < (pDataArray->start+pDataArray->num); h++) { + UN = (WScore[pDataArray->namesOfGroupCombos[h][0]+pDataArray->namesOfGroupCombos[h][1]] / D[count]); + if (isnan(UN) || isinf(UN)) { UN = 0; } + pDataArray->results.push_back(UN); + count++; + } + + return 0; + + } + catch(exception& e) { + pDataArray->m->errorOut(e, "Weighted", "MyWeightedThreadFunction"); + exit(1); + } +} +#endif #endif -- 2.39.2