X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=blobdiff_plain;f=metastatscommand.cpp;h=204d83e10df302ba17d3303c0bdb0a81f54536be;hp=0704f55896e835f331eda28446012e62c790904e;hb=050a3ff02473a3d4c0980964e1a9ebe52e55d6b8;hpb=7bf9a81bba76538ecaf351ae208de3da4bf1b6dd diff --git a/metastatscommand.cpp b/metastatscommand.cpp index 0704f55..204d83e 100644 --- a/metastatscommand.cpp +++ b/metastatscommand.cpp @@ -8,22 +8,22 @@ */ #include "metastatscommand.h" -#include "metastats.h" #include "sharedutilities.h" + //********************************************************************************************************************** vector MetaStatsCommand::setParameters(){ try { - CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pshared); - CommandParameter pdesign("design", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pdesign); - CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors); - CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters); - CommandParameter pthreshold("threshold", "Number", "", "0.05", "", "", "",false,false); parameters.push_back(pthreshold); - CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel); - CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups); - CommandParameter psets("sets", "String", "", "", "", "", "",false,false); parameters.push_back(psets); - CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir); - CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir); + CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","metastats",false,true,true); parameters.push_back(pshared); + CommandParameter pdesign("design", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(pdesign); + CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors); + CommandParameter piters("iters", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(piters); + CommandParameter pthreshold("threshold", "Number", "", "0.05", "", "", "","",false,false); parameters.push_back(pthreshold); + CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel); + CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups); + CommandParameter psets("sets", "String", "", "", "", "", "","",false,false); parameters.push_back(psets); + CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); + CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } @@ -61,6 +61,21 @@ string MetaStatsCommand::getHelpString(){ } } //********************************************************************************************************************** +string MetaStatsCommand::getOutputPattern(string type) { + try { + string pattern = ""; + + if (type == "metastats") { pattern = "[filename],[distance],[group],metastats"; } + else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; } + + return pattern; + } + catch(exception& e) { + m->errorOut(e, "MetaStatsCommand", "getOutputPattern"); + exit(1); + } +} +//********************************************************************************************************************** MetaStatsCommand::MetaStatsCommand(){ try { abort = true; calledHelp = true; @@ -166,7 +181,7 @@ MetaStatsCommand::MetaStatsCommand(string option) { else { pickedGroups = true; m->splitAtDash(groups, Groups); - m->Groups = Groups; + m->setGroups(Groups); } sets = validParameter.validFile(parameters, "sets", false); @@ -177,14 +192,14 @@ MetaStatsCommand::MetaStatsCommand(string option) { string temp = validParameter.validFile(parameters, "iters", false); if (temp == "not found") { temp = "1000"; } - convert(temp, iters); + m->mothurConvert(temp, iters); temp = validParameter.validFile(parameters, "threshold", false); if (temp == "not found") { temp = "0.05"; } - convert(temp, threshold); + m->mothurConvert(temp, threshold); temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); } m->setProcessors(temp); - convert(temp, processors); + m->mothurConvert(temp, processors); } } @@ -199,6 +214,14 @@ int MetaStatsCommand::execute(){ try { if (abort == true) { if (calledHelp) { return 0; } return 2; } + + //just used to convert files to test metastats online + /****************************************************/ + bool convertInputToShared = false; + convertSharedToInput = false; + if (convertInputToShared) { convertToShared(sharedfile); return 0; } + /****************************************************/ + designMap = new GroupMap(designfile); designMap->readDesignMap(); @@ -214,8 +237,9 @@ int MetaStatsCommand::execute(){ //setup the pairwise comparions of sets for metastats //calculate number of comparisons i.e. with groups A,B,C = AB, AC, BC = 3; //make sure sets are all in designMap - SharedUtil* util = new SharedUtil(); - util->setGroups(Sets, designMap->namesOfGroups); + SharedUtil* util = new SharedUtil(); + vector dGroups = designMap->getNamesOfGroups(); + util->setGroups(Sets, dGroups); delete util; int numGroups = Sets.size(); @@ -230,26 +254,24 @@ int MetaStatsCommand::execute(){ //only 1 combo if (numGroups == 2) { processors = 1; } else if (numGroups < 2) { m->mothurOut("Not enough sets, I need at least 2 valid sets. Unable to complete command."); m->mothurOutEndLine(); m->control_pressed = true; } - - #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) - if(processors != 1){ - int numPairs = namesOfGroupCombos.size(); - int numPairsPerProcessor = numPairs / processors; + + if(processors != 1){ + int numPairs = namesOfGroupCombos.size(); + int numPairsPerProcessor = numPairs / processors; - for (int i = 0; i < processors; i++) { - int startPos = i * numPairsPerProcessor; - if(i == processors - 1){ - numPairsPerProcessor = numPairs - i * numPairsPerProcessor; - } - lines.push_back(linePair(startPos, numPairsPerProcessor)); - } - } - #endif + for (int i = 0; i < processors; i++) { + int startPos = i * numPairsPerProcessor; + if(i == processors - 1){ + numPairsPerProcessor = numPairs - i * numPairsPerProcessor; + } + lines.push_back(linePair(startPos, numPairsPerProcessor)); + } + } //as long as you are not at the end of the file or done wih the lines you want while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) { - if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } m->Groups.clear(); delete input; delete designMap; for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; } + if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } m->clearGroups(); delete input; delete designMap; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){ @@ -280,13 +302,13 @@ int MetaStatsCommand::execute(){ //prevent memory leak for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; lookup[i] = NULL; } - if (m->control_pressed) { outputTypes.clear(); m->Groups.clear(); delete input; delete designMap; for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; } + if (m->control_pressed) { outputTypes.clear(); m->clearGroups(); delete input; delete designMap; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } //get next line to process lookup = input->getSharedRAbundVectors(); } - if (m->control_pressed) { outputTypes.clear(); m->Groups.clear(); delete input; delete designMap; for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; } + if (m->control_pressed) { outputTypes.clear(); m->clearGroups(); delete input; delete designMap; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } //output error messages about any remaining user labels set::iterator it; @@ -314,11 +336,11 @@ int MetaStatsCommand::execute(){ } //reset groups parameter - m->Groups.clear(); + m->clearGroups(); delete input; delete designMap; - if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0;} + if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0;} m->mothurOutEndLine(); m->mothurOut("Output File Names: "); m->mothurOutEndLine(); @@ -337,13 +359,13 @@ int MetaStatsCommand::execute(){ int MetaStatsCommand::process(vector& thisLookUp){ try { - #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + if(processors == 1){ driver(0, namesOfGroupCombos.size(), thisLookUp); }else{ int process = 1; vector processIDS; - + #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) //loop through and create all the processes you want while (process != processors) { int pid = fork(); @@ -369,11 +391,70 @@ int MetaStatsCommand::process(vector& thisLookUp){ int temp = processIDS[i]; wait(&temp); } - } - #else - driver(0, namesOfGroupCombos.size(), thisLookUp); - #endif + #else + + ////////////////////////////////////////////////////////////////////////////////////////////////////// + //Windows version shared memory, so be careful when passing variables through the summarySharedData struct. + //Above fork() will clone, so memory is separate, but that's not the case with windows, + //Taking advantage of shared memory to pass results vectors. + ////////////////////////////////////////////////////////////////////////////////////////////////////// + + vector pDataArray; + DWORD dwThreadIdArray[processors-1]; + HANDLE hThreadArray[processors-1]; + + //Create processor worker threads. + for( int i=1; i newLookup; + vector designMapGroups; + for (int k = 0; k < thisLookUp.size(); k++) { + SharedRAbundVector* temp = new SharedRAbundVector(); + temp->setLabel(thisLookUp[k]->getLabel()); + temp->setGroup(thisLookUp[k]->getGroup()); + newLookup.push_back(temp); + designMapGroups.push_back(designMap->getGroup(thisLookUp[k]->getGroup())); + } + + //for each bin + for (int k = 0; k < thisLookUp[0]->getNumBins(); k++) { + if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } return 0; } + for (int j = 0; j < thisLookUp.size(); j++) { newLookup[j]->push_back(thisLookUp[j]->getAbundance(k), thisLookUp[j]->getGroup()); } + } + + // Allocate memory for thread data. + metastatsData* tempSum = new metastatsData(sharedfile, outputDir, m, lines[i].start, lines[i].num, namesOfGroupCombos, newLookup, designMapGroups, iters, threshold); + pDataArray.push_back(tempSum); + processIDS.push_back(i); + + hThreadArray[i-1] = CreateThread(NULL, 0, MyMetastatsThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]); + } + + //do my part + driver(lines[0].start, lines[0].num, thisLookUp); + + //Wait until all threads have terminated. + WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE); + + //Close all thread handles and free memory allocations. + for(int i=0; i < pDataArray.size(); i++){ + if (pDataArray[i]->count != (pDataArray[i]->num)) { + m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of " + toString(pDataArray[i]->num) + " groups assigned to it, quitting. \n"); m->control_pressed = true; + } + for (int j = 0; j < pDataArray[i]->thisLookUp.size(); j++) { delete pDataArray[i]->thisLookUp[j]; } + for (int j = 0; j < pDataArray[i]->outputNames.size(); j++) { + outputNames.push_back(pDataArray[i]->outputNames[j]); + outputTypes["metastats"].push_back(pDataArray[i]->outputNames[j]); + } + + CloseHandle(hThreadArray[i]); + delete pDataArray[i]; + } + #endif + } + return 0; } @@ -390,19 +471,25 @@ int MetaStatsCommand::driver(int start, int num, vector& th for (int c = start; c < (start+num); c++) { //get set names - string setA = namesOfGroupCombos[c][0]; + string setA = namesOfGroupCombos[c][0]; string setB = namesOfGroupCombos[c][1]; - + //get filename - string outputFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + thisLookUp[0]->getLabel() + "." + setA + "-" + setB + ".metastats"; + map variables; + variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile)); + variables["[distance]"] = thisLookUp[0]->getLabel(); + variables["[group]"] = setA + "-" + setB; + string outputFileName = getOutputFileName("metastats",variables); outputNames.push_back(outputFileName); outputTypes["metastats"].push_back(outputFileName); - int nameLength = outputFileName.length(); - char * output = new char[nameLength]; - strcpy(output, outputFileName.c_str()); + //int nameLength = outputFileName.length(); + //char * output = new char[nameLength]; + //strcpy(output, outputFileName.c_str()); //build matrix from shared rabunds - double** data; - data = new double*[thisLookUp[0]->getNumBins()]; + //double** data; + //data = new double*[thisLookUp[0]->getNumBins()]; + + vector< vector > data2; data2.resize(thisLookUp[0]->getNumBins()); vector subset; int setACount = 0; @@ -425,24 +512,32 @@ int MetaStatsCommand::driver(int start, int num, vector& th m->mothurOut("Missing shared info for " + setA + " or " + setB + ". Skipping comparison."); m->mothurOutEndLine(); outputNames.pop_back(); }else { + //fill data for (int j = 0; j < thisLookUp[0]->getNumBins(); j++) { - data[j] = new double[subset.size()]; + //data[j] = new double[subset.size()]; + data2[j].resize(subset.size(), 0.0); + for (int i = 0; i < subset.size(); i++) { - data[j][i] = (subset[i]->getAbundance(j)); + data2[j][i] = (subset[i]->getAbundance(j)); } } m->mothurOut("Comparing " + setA + " and " + setB + "..."); m->mothurOutEndLine(); - metastat_main(output, thisLookUp[0]->getNumBins(), subset.size(), threshold, iters, data, setACount); + //metastat_main(output, thisLookUp[0]->getNumBins(), subset.size(), threshold, iters, data, setACount); + if (convertSharedToInput) { convertToInput(subset, outputFileName); } + m->mothurOutEndLine(); + MothurMetastats mothurMeta(threshold, iters); + mothurMeta.runMetastats(outputFileName , data2, setACount); + m->mothurOutEndLine(); m->mothurOutEndLine(); } //free memory - delete output; - for(int i = 0; i < thisLookUp[0]->getNumBins(); i++) { delete[] data[i]; } - delete[] data; + //delete output; + //for(int i = 0; i < thisLookUp[0]->getNumBins(); i++) { delete[] data[i]; } + //delete[] data; } return 0; @@ -454,3 +549,117 @@ int MetaStatsCommand::driver(int start, int num, vector& th } } //********************************************************************************************************************** +/*Metastats files look like: + 13_0 14_0 13_52 14_52 70S 71S 72S M1 M2 M3 C11 C12 C21 C15 C16 C19 C3 C4 C9 + Alphaproteobacteria 0 0 0 0 0 0 5 0 0 0 0 0 0 0 0 0 0 0 0 + Mollicutes 0 0 2 0 0 59 5 11 4 1 0 2 8 1 0 1 0 3 0 + Verrucomicrobiae 0 0 0 0 0 1 6 0 0 0 0 0 0 0 0 0 0 0 0 + Deltaproteobacteria 0 0 0 0 0 6 1 0 1 0 1 1 7 0 0 0 0 0 0 + Cyanobacteria 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 + Epsilonproteobacteria 0 0 0 0 0 0 0 0 6 0 0 3 1 0 0 0 0 0 0 + Clostridia 75 65 207 226 801 280 267 210 162 197 81 120 106 148 120 94 84 98 121 + Bacilli 3 2 16 8 21 52 31 70 46 65 4 28 5 23 62 26 20 30 25 + Bacteroidetes (class) 21 25 22 64 226 193 296 172 98 55 19 149 201 85 50 76 113 92 82 + Gammaproteobacteria 0 0 0 0 0 1 0 0 0 0 1 1 0 0 0 1 0 0 0 + TM7_genera_incertae_sedis 0 0 0 0 0 0 0 0 1 0 1 2 0 2 0 0 0 0 0 + Actinobacteria (class) 1 1 1 2 0 0 0 9 3 7 1 1 1 3 1 2 1 2 3 + Betaproteobacteria 0 0 3 3 0 0 9 1 1 0 1 2 3 1 1 0 0 0 0 +*/ +//this function is just used to convert files to test the differences between the metastats version and mothurs version +int MetaStatsCommand::convertToShared(string filename) { + try { + ifstream in; + m->openInputFile(filename, in); + + string header = m->getline(in); m->gobble(in); + + vector groups = m->splitWhiteSpace(header); + vector newLookup; + for (int i = 0; i < groups.size(); i++) { + SharedRAbundVector* temp = new SharedRAbundVector(); + temp->setLabel("0.03"); + temp->setGroup(groups[i]); + newLookup.push_back(temp); + } + + int otuCount = 0; + while (!in.eof()) { + if (m->control_pressed) { break; } + + string otuname; + in >> otuname; m->gobble(in); + otuCount++; + + for (int i = 0; i < groups.size(); i++) { + int temp; + in >> temp; m->gobble(in); + newLookup[i]->push_back(temp, groups[i]); + } + m->gobble(in); + } + in.close(); + + ofstream out; + m->openOutputFile(filename+".shared", out); + + out << "label\tgroup\tnumOTUs\t"; + + string snumBins = toString(otuCount); + for (int i = 0; i < otuCount; i++) { + string binLabel = "Otu"; + string sbinNumber = toString(i+1); + if (sbinNumber.length() < snumBins.length()) { + int diff = snumBins.length() - sbinNumber.length(); + for (int h = 0; h < diff; h++) { binLabel += "0"; } + } + binLabel += sbinNumber; + out << binLabel << '\t'; + } + out << endl; + + for (int i = 0; i < groups.size(); i++) { + out << "0.03" << '\t' << groups[i] << '\t'; + newLookup[i]->print(out); + } + out.close(); + + cout << filename+".shared" << endl; + + return 0; + } + catch(exception& e) { + m->errorOut(e, "MetaStatsCommand", "convertToShared"); + exit(1); + } +} +//********************************************************************************************************************** + +int MetaStatsCommand::convertToInput(vector& subset, string thisfilename) { + try { + ofstream out; + m->openOutputFile(thisfilename+".matrix", out); + + out << "\t"; + for (int i = 0; i < subset.size()-1; i++) { + out << subset[i]->getGroup() << '\t'; + } + out << subset[subset.size()-1]->getGroup() << endl; + + for (int i = 0; i < subset[0]->getNumBins(); i++) { + out << m->currentBinLabels[i] << '\t'; + for (int j = 0; j < subset.size()-1; j++) { + out << subset[j]->getAbundance(i) << '\t'; + } + out << subset[subset.size()-1]->getAbundance(i) << endl; + } + out.close(); + + return 0; + } + catch(exception& e) { + m->errorOut(e, "MetaStatsCommand", "convertToInput"); + exit(1); + } +} + +//**********************************************************************************************************************