X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=blobdiff_plain;f=metastatscommand.cpp;h=de03dab2a147174384d3e3ffce329c1e16fa07f5;hp=a0e6ed5d9bfe95b0fa6724aa05c8a06c4350762e;hb=b206f634aae1b4ce13978d203247fb64757d5482;hpb=43ed0accfbc2852849e104ff7eccdd2c42acd4ec diff --git a/metastatscommand.cpp b/metastatscommand.cpp index a0e6ed5..de03dab 100644 --- a/metastatscommand.cpp +++ b/metastatscommand.cpp @@ -8,23 +8,23 @@ */ #include "metastatscommand.h" -#include "metastats.h" #include "sharedutilities.h" +#include "sharedrabundfloatvector.h" //********************************************************************************************************************** vector MetaStatsCommand::setParameters(){ try { - CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pshared); - CommandParameter pdesign("design", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pdesign); - CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors); - CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters); - CommandParameter pthreshold("threshold", "Number", "", "0.05", "", "", "",false,false); parameters.push_back(pthreshold); - CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel); - CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups); - CommandParameter psets("sets", "String", "", "", "", "", "",false,false); parameters.push_back(psets); - CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir); - CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir); + CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","metastats",false,true,true); parameters.push_back(pshared); + CommandParameter pdesign("design", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(pdesign); + CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors); + CommandParameter piters("iters", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(piters); + CommandParameter pthreshold("threshold", "Number", "", "0.05", "", "", "","",false,false); parameters.push_back(pthreshold); + CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel); + CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups); + CommandParameter psets("sets", "String", "", "", "", "", "","",false,false); parameters.push_back(psets); + CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); + CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } @@ -62,6 +62,21 @@ string MetaStatsCommand::getHelpString(){ } } //********************************************************************************************************************** +string MetaStatsCommand::getOutputPattern(string type) { + try { + string pattern = ""; + + if (type == "metastats") { pattern = "[filename],[distance],[group],metastats"; } + else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; } + + return pattern; + } + catch(exception& e) { + m->errorOut(e, "MetaStatsCommand", "getOutputPattern"); + exit(1); + } +} +//********************************************************************************************************************** MetaStatsCommand::MetaStatsCommand(){ try { abort = true; calledHelp = true; @@ -199,7 +214,14 @@ MetaStatsCommand::MetaStatsCommand(string option) { int MetaStatsCommand::execute(){ try { - if (abort == true) { if (calledHelp) { return 0; } return 2; } + if (abort == true) { if (calledHelp) { return 0; } return 2; } + + //just used to convert files to test metastats online + /****************************************************/ + bool convertInputToShared = false; + convertSharedToInput = false; + if (convertInputToShared) { convertToShared(sharedfile); return 0; } + /****************************************************/ designMap = new GroupMap(designfile); designMap->readDesignMap(); @@ -234,15 +256,14 @@ int MetaStatsCommand::execute(){ else if (numGroups < 2) { m->mothurOut("Not enough sets, I need at least 2 valid sets. Unable to complete command."); m->mothurOutEndLine(); m->control_pressed = true; } if(processors != 1){ - int numPairs = namesOfGroupCombos.size(); - int numPairsPerProcessor = numPairs / processors; - - for (int i = 0; i < processors; i++) { - int startPos = i * numPairsPerProcessor; - if(i == processors - 1){ - numPairsPerProcessor = numPairs - i * numPairsPerProcessor; - } - lines.push_back(linePair(startPos, numPairsPerProcessor)); + int remainingPairs = namesOfGroupCombos.size(); + int startIndex = 0; + for (int remainingProcessors = processors; remainingProcessors > 0; remainingProcessors--) { + int numPairs = remainingPairs; //case for last processor + if (remainingProcessors != 1) { numPairs = ceil(remainingPairs / remainingProcessors); } + lines.push_back(linePair(startIndex, numPairs)); //startIndex, numPairs + startIndex = startIndex + numPairs; + remainingPairs = remainingPairs - numPairs; } } @@ -417,6 +438,9 @@ int MetaStatsCommand::process(vector& thisLookUp){ //Close all thread handles and free memory allocations. for(int i=0; i < pDataArray.size(); i++){ + if (pDataArray[i]->count != (pDataArray[i]->num)) { + m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of " + toString(pDataArray[i]->num) + " groups assigned to it, quitting. \n"); m->control_pressed = true; + } for (int j = 0; j < pDataArray[i]->thisLookUp.size(); j++) { delete pDataArray[i]->thisLookUp[j]; } for (int j = 0; j < pDataArray[i]->outputNames.size(); j++) { outputNames.push_back(pDataArray[i]->outputNames[j]); @@ -446,11 +470,15 @@ int MetaStatsCommand::driver(int start, int num, vector& th for (int c = start; c < (start+num); c++) { //get set names - string setA = namesOfGroupCombos[c][0]; + string setA = namesOfGroupCombos[c][0]; string setB = namesOfGroupCombos[c][1]; //get filename - string outputFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + thisLookUp[0]->getLabel() + "." + setA + "-" + setB + ".metastats"; + map variables; + variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile)); + variables["[distance]"] = thisLookUp[0]->getLabel(); + variables["[group]"] = setA + "-" + setB; + string outputFileName = getOutputFileName("metastats",variables); outputNames.push_back(outputFileName); outputTypes["metastats"].push_back(outputFileName); //int nameLength = outputFileName.length(); //char * output = new char[nameLength]; @@ -483,24 +511,25 @@ int MetaStatsCommand::driver(int start, int num, vector& th m->mothurOut("Missing shared info for " + setA + " or " + setB + ". Skipping comparison."); m->mothurOutEndLine(); outputNames.pop_back(); }else { + //fill data for (int j = 0; j < thisLookUp[0]->getNumBins(); j++) { //data[j] = new double[subset.size()]; data2[j].resize(subset.size(), 0.0); + for (int i = 0; i < subset.size(); i++) { - //data[j][i] = (subset[i]->getAbundance(j)); data2[j][i] = (subset[i]->getAbundance(j)); } } m->mothurOut("Comparing " + setA + " and " + setB + "..."); m->mothurOutEndLine(); //metastat_main(output, thisLookUp[0]->getNumBins(), subset.size(), threshold, iters, data, setACount); + if (convertSharedToInput) { convertToInput(subset, outputFileName); } m->mothurOutEndLine(); MothurMetastats mothurMeta(threshold, iters); mothurMeta.runMetastats(outputFileName , data2, setACount); m->mothurOutEndLine(); - m->mothurOutEndLine(); } @@ -519,3 +548,121 @@ int MetaStatsCommand::driver(int start, int num, vector& th } } //********************************************************************************************************************** +/*Metastats files look like: + 13_0 14_0 13_52 14_52 70S 71S 72S M1 M2 M3 C11 C12 C21 C15 C16 C19 C3 C4 C9 + Alphaproteobacteria 0 0 0 0 0 0 5 0 0 0 0 0 0 0 0 0 0 0 0 + Mollicutes 0 0 2 0 0 59 5 11 4 1 0 2 8 1 0 1 0 3 0 + Verrucomicrobiae 0 0 0 0 0 1 6 0 0 0 0 0 0 0 0 0 0 0 0 + Deltaproteobacteria 0 0 0 0 0 6 1 0 1 0 1 1 7 0 0 0 0 0 0 + Cyanobacteria 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 + Epsilonproteobacteria 0 0 0 0 0 0 0 0 6 0 0 3 1 0 0 0 0 0 0 + Clostridia 75 65 207 226 801 280 267 210 162 197 81 120 106 148 120 94 84 98 121 + Bacilli 3 2 16 8 21 52 31 70 46 65 4 28 5 23 62 26 20 30 25 + Bacteroidetes (class) 21 25 22 64 226 193 296 172 98 55 19 149 201 85 50 76 113 92 82 + Gammaproteobacteria 0 0 0 0 0 1 0 0 0 0 1 1 0 0 0 1 0 0 0 + TM7_genera_incertae_sedis 0 0 0 0 0 0 0 0 1 0 1 2 0 2 0 0 0 0 0 + Actinobacteria (class) 1 1 1 2 0 0 0 9 3 7 1 1 1 3 1 2 1 2 3 + Betaproteobacteria 0 0 3 3 0 0 9 1 1 0 1 2 3 1 1 0 0 0 0 +*/ +//this function is just used to convert files to test the differences between the metastats version and mothurs version +int MetaStatsCommand::convertToShared(string filename) { + try { + ifstream in; + m->openInputFile(filename, in); + + string header = m->getline(in); m->gobble(in); + + vector groups = m->splitWhiteSpace(header); + vector newLookup; + cout << groups.size() << endl; + for (int i = 0; i < groups.size(); i++) { + cout << "creating group " << groups[i] << endl; + SharedRAbundFloatVector* temp = new SharedRAbundFloatVector(); + temp->setLabel("0.03"); + temp->setGroup(groups[i]); + newLookup.push_back(temp); + } + + int otuCount = 0; + while (!in.eof()) { + if (m->control_pressed) { break; } + + string otuname; + in >> otuname; m->gobble(in); + otuCount++; + cout << otuname << endl; + for (int i = 0; i < groups.size(); i++) { + double temp; + in >> temp; m->gobble(in); + newLookup[i]->push_back(temp, groups[i]); + } + m->gobble(in); + } + in.close(); + + ofstream out; + m->openOutputFile(filename+".shared", out); + + out << "label\tgroup\tnumOTUs\t"; + + string snumBins = toString(otuCount); + for (int i = 0; i < otuCount; i++) { + string binLabel = "Otu"; + string sbinNumber = toString(i+1); + if (sbinNumber.length() < snumBins.length()) { + int diff = snumBins.length() - sbinNumber.length(); + for (int h = 0; h < diff; h++) { binLabel += "0"; } + } + binLabel += sbinNumber; + out << binLabel << '\t'; + } + out << endl; + + for (int i = 0; i < groups.size(); i++) { + out << "0.03" << '\t' << groups[i] << '\t'; + newLookup[i]->print(out); + } + out.close(); + + cout << filename+".shared" << endl; + + return 0; + } + catch(exception& e) { + m->errorOut(e, "MetaStatsCommand", "convertToShared"); + exit(1); + } +} +//********************************************************************************************************************** + +int MetaStatsCommand::convertToInput(vector& subset, string thisfilename) { + try { + ofstream out; + m->openOutputFile(thisfilename+".matrix", out); + + out << "\t"; + for (int i = 0; i < subset.size()-1; i++) { + out << subset[i]->getGroup() << '\t'; + } + out << subset[subset.size()-1]->getGroup() << endl; + + for (int i = 0; i < subset[0]->getNumBins(); i++) { + out << m->currentSharedBinLabels[i] << '\t'; + for (int j = 0; j < subset.size()-1; j++) { + out << subset[j]->getAbundance(i) << '\t'; + } + out << subset[subset.size()-1]->getAbundance(i) << endl; + } + out.close(); + + cout << thisfilename+".matrix" << endl; + + return 0; + } + catch(exception& e) { + m->errorOut(e, "MetaStatsCommand", "convertToInput"); + exit(1); + } +} + +//**********************************************************************************************************************