X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=blobdiff_plain;f=metastatscommand.cpp;h=204d83e10df302ba17d3303c0bdb0a81f54536be;hp=c5d349bc77b685b67a1e3251fa70f15602bb215d;hb=050a3ff02473a3d4c0980964e1a9ebe52e55d6b8;hpb=d2390ede25cc211f95f99e377d5654bad7e6950e diff --git a/metastatscommand.cpp b/metastatscommand.cpp index c5d349b..204d83e 100644 --- a/metastatscommand.cpp +++ b/metastatscommand.cpp @@ -14,16 +14,16 @@ //********************************************************************************************************************** vector MetaStatsCommand::setParameters(){ try { - CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pshared); - CommandParameter pdesign("design", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pdesign); - CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors); - CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters); - CommandParameter pthreshold("threshold", "Number", "", "0.05", "", "", "",false,false); parameters.push_back(pthreshold); - CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel); - CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups); - CommandParameter psets("sets", "String", "", "", "", "", "",false,false); parameters.push_back(psets); - CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir); - CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir); + CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","metastats",false,true,true); parameters.push_back(pshared); + CommandParameter pdesign("design", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(pdesign); + CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors); + CommandParameter piters("iters", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(piters); + CommandParameter pthreshold("threshold", "Number", "", "0.05", "", "", "","",false,false); parameters.push_back(pthreshold); + CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel); + CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups); + CommandParameter psets("sets", "String", "", "", "", "", "","",false,false); parameters.push_back(psets); + CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); + CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } @@ -61,6 +61,21 @@ string MetaStatsCommand::getHelpString(){ } } //********************************************************************************************************************** +string MetaStatsCommand::getOutputPattern(string type) { + try { + string pattern = ""; + + if (type == "metastats") { pattern = "[filename],[distance],[group],metastats"; } + else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; } + + return pattern; + } + catch(exception& e) { + m->errorOut(e, "MetaStatsCommand", "getOutputPattern"); + exit(1); + } +} +//********************************************************************************************************************** MetaStatsCommand::MetaStatsCommand(){ try { abort = true; calledHelp = true; @@ -199,6 +214,14 @@ int MetaStatsCommand::execute(){ try { if (abort == true) { if (calledHelp) { return 0; } return 2; } + + //just used to convert files to test metastats online + /****************************************************/ + bool convertInputToShared = false; + convertSharedToInput = false; + if (convertInputToShared) { convertToShared(sharedfile); return 0; } + /****************************************************/ + designMap = new GroupMap(designfile); designMap->readDesignMap(); @@ -416,6 +439,9 @@ int MetaStatsCommand::process(vector& thisLookUp){ //Close all thread handles and free memory allocations. for(int i=0; i < pDataArray.size(); i++){ + if (pDataArray[i]->count != (pDataArray[i]->num)) { + m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of " + toString(pDataArray[i]->num) + " groups assigned to it, quitting. \n"); m->control_pressed = true; + } for (int j = 0; j < pDataArray[i]->thisLookUp.size(); j++) { delete pDataArray[i]->thisLookUp[j]; } for (int j = 0; j < pDataArray[i]->outputNames.size(); j++) { outputNames.push_back(pDataArray[i]->outputNames[j]); @@ -445,11 +471,15 @@ int MetaStatsCommand::driver(int start, int num, vector& th for (int c = start; c < (start+num); c++) { //get set names - string setA = namesOfGroupCombos[c][0]; + string setA = namesOfGroupCombos[c][0]; string setB = namesOfGroupCombos[c][1]; //get filename - string outputFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + thisLookUp[0]->getLabel() + "." + setA + "-" + setB + ".metastats"; + map variables; + variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile)); + variables["[distance]"] = thisLookUp[0]->getLabel(); + variables["[group]"] = setA + "-" + setB; + string outputFileName = getOutputFileName("metastats",variables); outputNames.push_back(outputFileName); outputTypes["metastats"].push_back(outputFileName); //int nameLength = outputFileName.length(); //char * output = new char[nameLength]; @@ -495,6 +525,7 @@ int MetaStatsCommand::driver(int start, int num, vector& th m->mothurOut("Comparing " + setA + " and " + setB + "..."); m->mothurOutEndLine(); //metastat_main(output, thisLookUp[0]->getNumBins(), subset.size(), threshold, iters, data, setACount); + if (convertSharedToInput) { convertToInput(subset, outputFileName); } m->mothurOutEndLine(); MothurMetastats mothurMeta(threshold, iters); @@ -518,3 +549,117 @@ int MetaStatsCommand::driver(int start, int num, vector& th } } //********************************************************************************************************************** +/*Metastats files look like: + 13_0 14_0 13_52 14_52 70S 71S 72S M1 M2 M3 C11 C12 C21 C15 C16 C19 C3 C4 C9 + Alphaproteobacteria 0 0 0 0 0 0 5 0 0 0 0 0 0 0 0 0 0 0 0 + Mollicutes 0 0 2 0 0 59 5 11 4 1 0 2 8 1 0 1 0 3 0 + Verrucomicrobiae 0 0 0 0 0 1 6 0 0 0 0 0 0 0 0 0 0 0 0 + Deltaproteobacteria 0 0 0 0 0 6 1 0 1 0 1 1 7 0 0 0 0 0 0 + Cyanobacteria 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 + Epsilonproteobacteria 0 0 0 0 0 0 0 0 6 0 0 3 1 0 0 0 0 0 0 + Clostridia 75 65 207 226 801 280 267 210 162 197 81 120 106 148 120 94 84 98 121 + Bacilli 3 2 16 8 21 52 31 70 46 65 4 28 5 23 62 26 20 30 25 + Bacteroidetes (class) 21 25 22 64 226 193 296 172 98 55 19 149 201 85 50 76 113 92 82 + Gammaproteobacteria 0 0 0 0 0 1 0 0 0 0 1 1 0 0 0 1 0 0 0 + TM7_genera_incertae_sedis 0 0 0 0 0 0 0 0 1 0 1 2 0 2 0 0 0 0 0 + Actinobacteria (class) 1 1 1 2 0 0 0 9 3 7 1 1 1 3 1 2 1 2 3 + Betaproteobacteria 0 0 3 3 0 0 9 1 1 0 1 2 3 1 1 0 0 0 0 +*/ +//this function is just used to convert files to test the differences between the metastats version and mothurs version +int MetaStatsCommand::convertToShared(string filename) { + try { + ifstream in; + m->openInputFile(filename, in); + + string header = m->getline(in); m->gobble(in); + + vector groups = m->splitWhiteSpace(header); + vector newLookup; + for (int i = 0; i < groups.size(); i++) { + SharedRAbundVector* temp = new SharedRAbundVector(); + temp->setLabel("0.03"); + temp->setGroup(groups[i]); + newLookup.push_back(temp); + } + + int otuCount = 0; + while (!in.eof()) { + if (m->control_pressed) { break; } + + string otuname; + in >> otuname; m->gobble(in); + otuCount++; + + for (int i = 0; i < groups.size(); i++) { + int temp; + in >> temp; m->gobble(in); + newLookup[i]->push_back(temp, groups[i]); + } + m->gobble(in); + } + in.close(); + + ofstream out; + m->openOutputFile(filename+".shared", out); + + out << "label\tgroup\tnumOTUs\t"; + + string snumBins = toString(otuCount); + for (int i = 0; i < otuCount; i++) { + string binLabel = "Otu"; + string sbinNumber = toString(i+1); + if (sbinNumber.length() < snumBins.length()) { + int diff = snumBins.length() - sbinNumber.length(); + for (int h = 0; h < diff; h++) { binLabel += "0"; } + } + binLabel += sbinNumber; + out << binLabel << '\t'; + } + out << endl; + + for (int i = 0; i < groups.size(); i++) { + out << "0.03" << '\t' << groups[i] << '\t'; + newLookup[i]->print(out); + } + out.close(); + + cout << filename+".shared" << endl; + + return 0; + } + catch(exception& e) { + m->errorOut(e, "MetaStatsCommand", "convertToShared"); + exit(1); + } +} +//********************************************************************************************************************** + +int MetaStatsCommand::convertToInput(vector& subset, string thisfilename) { + try { + ofstream out; + m->openOutputFile(thisfilename+".matrix", out); + + out << "\t"; + for (int i = 0; i < subset.size()-1; i++) { + out << subset[i]->getGroup() << '\t'; + } + out << subset[subset.size()-1]->getGroup() << endl; + + for (int i = 0; i < subset[0]->getNumBins(); i++) { + out << m->currentBinLabels[i] << '\t'; + for (int j = 0; j < subset.size()-1; j++) { + out << subset[j]->getAbundance(i) << '\t'; + } + out << subset[subset.size()-1]->getAbundance(i) << endl; + } + out.close(); + + return 0; + } + catch(exception& e) { + m->errorOut(e, "MetaStatsCommand", "convertToInput"); + exit(1); + } +} + +//**********************************************************************************************************************