X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=summarycommand.cpp;h=2bed467066da5033680af7a3b4a5c0cc7cf83a42;hb=HEAD;hp=85f0970f25930f563616c5fba9bc27fecda9cf98;hpb=36a6b02cf7f09d2bc34376b588944a9ca73429c5;p=mothur.git diff --git a/summarycommand.cpp b/summarycommand.cpp index 85f0970..2bed467 100644 --- a/summarycommand.cpp +++ b/summarycommand.cpp @@ -34,23 +34,25 @@ #include "solow.h" #include "shen.h" #include "subsample.h" +#include "shannonrange.h" //********************************************************************************************************************** vector SummaryCommand::setParameters(){ try { - CommandParameter plist("list", "InputTypes", "", "", "LRSS", "LRSS", "none",false,false); parameters.push_back(plist); - CommandParameter prabund("rabund", "InputTypes", "", "", "LRSS", "LRSS", "none",false,false); parameters.push_back(prabund); - CommandParameter psabund("sabund", "InputTypes", "", "", "LRSS", "LRSS", "none",false,false); parameters.push_back(psabund); - CommandParameter pshared("shared", "InputTypes", "", "", "LRSS", "LRSS", "none",false,false); parameters.push_back(pshared); - CommandParameter psubsample("subsample", "String", "", "", "", "", "",false,false); parameters.push_back(psubsample); - CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters); - CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel); - CommandParameter pcalc("calc", "Multiple", "sobs-chao-nseqs-coverage-ace-jack-shannon-shannoneven-npshannon-heip-smithwilson-simpson-simpsoneven-invsimpson-bootstrap-geometric-qstat-logseries-bergerparker-bstick-goodscoverage-efron-boneh-solow-shen", "sobs-chao-ace-jack-shannon-npshannon-simpson", "", "", "",true,false); parameters.push_back(pcalc); - CommandParameter pabund("abund", "Number", "", "10", "", "", "",false,false); parameters.push_back(pabund); - CommandParameter psize("size", "Number", "", "0", "", "", "",false,false); parameters.push_back(psize); - CommandParameter pgroupmode("groupmode", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pgroupmode); - CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir); - CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir); + CommandParameter plist("list", "InputTypes", "", "", "LRSS", "LRSS", "none","summary",false,false,true); parameters.push_back(plist); + CommandParameter prabund("rabund", "InputTypes", "", "", "LRSS", "LRSS", "none","summary",false,false); parameters.push_back(prabund); + CommandParameter psabund("sabund", "InputTypes", "", "", "LRSS", "LRSS", "none","summary",false,false); parameters.push_back(psabund); + CommandParameter pshared("shared", "InputTypes", "", "", "LRSS", "LRSS", "none","summary",false,false,true); parameters.push_back(pshared); + CommandParameter psubsample("subsample", "String", "", "", "", "", "","",false,false); parameters.push_back(psubsample); + CommandParameter piters("iters", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(piters); + CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel); + CommandParameter pcalc("calc", "Multiple", "sobs-chao-nseqs-coverage-ace-jack-shannon-shannoneven-npshannon-heip-smithwilson-simpson-simpsoneven-invsimpson-bootstrap-geometric-qstat-logseries-bergerparker-bstick-goodscoverage-efron-boneh-solow-shen", "sobs-chao-ace-jack-shannon-npshannon-simpson-shannonrange", "", "", "","",true,false,true); parameters.push_back(pcalc); + CommandParameter palpha("alpha", "Multiple", "0-1-2", "1", "", "", "","",false,false,true); parameters.push_back(palpha); + CommandParameter pabund("abund", "Number", "", "10", "", "", "","",false,false); parameters.push_back(pabund); + CommandParameter psize("size", "Number", "", "0", "", "", "","",false,false); parameters.push_back(psize); + CommandParameter pgroupmode("groupmode", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pgroupmode); + CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); + CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } @@ -75,6 +77,7 @@ string SummaryCommand::getHelpString(){ helpString += "The iters parameter allows you to choose the number of times you would like to run the subsample.\n"; helpString += "The default value calc is sobs-chao-ace-jack-shannon-npshannon-simpson\n"; helpString += "If you are running summary.single with a shared file and would like your summary results collated in one file, set groupmode=t. (Default=true).\n"; + helpString += "The alpha parameter is used to set the alpha value for the shannonrange calculator.\n"; helpString += "The label parameter is used to analyze specific labels in your input.\n"; helpString += "Note: No spaces between parameter labels (i.e. label), '=' and parameters (i.e.yourLabels).\n"; return helpString; @@ -84,7 +87,21 @@ string SummaryCommand::getHelpString(){ exit(1); } } - +//********************************************************************************************************************** +string SummaryCommand::getOutputPattern(string type) { + try { + string pattern = ""; + + if (type == "summary") { pattern = "[filename],summary-[filename],[tag],summary"; } + else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; } + + return pattern; + } + catch(exception& e) { + m->errorOut(e, "SummaryCommand", "getOutputPattern"); + exit(1); + } +} //********************************************************************************************************************** SummaryCommand::SummaryCommand(){ try { @@ -254,7 +271,12 @@ SummaryCommand::SummaryCommand(string option) { else { subsample = false; subsampleSize = -1; } } - if (subsample == false) { iters = 1; } + temp = validParameter.validFile(parameters, "alpha", false); if (temp == "not found") { temp = "1"; } + m->mothurConvert(temp, alpha); + + if ((alpha != 0) && (alpha != 1) && (alpha != 2)) { m->mothurOut("[ERROR]: Not a valid alpha value. Valid values are 0, 1 and 2."); m->mothurOutEndLine(); abort=true; } + + if (subsample == false) { iters = 0; } else { //if you did not set a samplesize and are not using a sharedfile if ((subsampleSize == -1) && (format != "sharedfile")) { m->mothurOut("[ERROR]: If you want to subsample with a list, rabund or sabund file, you must provide the sample size. You can do this by setting subsample=yourSampleSize.\n"); abort=true; } @@ -288,13 +310,13 @@ int SummaryCommand::execute(){ numLines = 0; numCols = 0; - string fileNameRoot = outputDir + m->getRootName(m->getSimpleName(inputFileNames[p])) + "summary"; - string fileNameAve = outputDir + m->getRootName(m->getSimpleName(inputFileNames[p])) + "ave"; - string fileNameSTD = outputDir + m->getRootName(m->getSimpleName(inputFileNames[p])) + "std"; - outputNames.push_back(fileNameRoot); outputTypes["summary"].push_back(fileNameRoot); - + map variables; + variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(inputFileNames[p])); + string fileNameRoot = getOutputFileName("summary",variables); + variables["[tag]"] = "ave-std"; + string fileNameAve = getOutputFileName("summary",variables); + outputNames.push_back(fileNameRoot); outputTypes["summary"].push_back(fileNameRoot); - if (inputFileNames.size() > 1) { m->mothurOutEndLine(); m->mothurOut("Processing group " + groups[p]); m->mothurOutEndLine(); m->mothurOutEndLine(); groupIndex[fileNameRoot] = groups[p]; @@ -332,6 +354,8 @@ int SummaryCommand::execute(){ sumCalculators.push_back(new Shannon()); }else if(Estimators[i] == "shannoneven"){ sumCalculators.push_back(new ShannonEven()); + }else if(Estimators[i] == "shannonrange"){ + sumCalculators.push_back(new RangeShannon(alpha)); }else if(Estimators[i] == "npshannon"){ sumCalculators.push_back(new NPShannon()); }else if(Estimators[i] == "heip"){ @@ -369,18 +393,14 @@ int SummaryCommand::execute(){ m->openOutputFile(fileNameRoot, outputFileHandle); outputFileHandle << "label"; - ofstream outAve, outSTD; + ofstream outAve; if (subsample) { m->openOutputFile(fileNameAve, outAve); - m->openOutputFile(fileNameSTD, outSTD); - outputNames.push_back(fileNameAve); outputTypes["ave"].push_back(fileNameAve); - outputNames.push_back(fileNameSTD); outputTypes["std"].push_back(fileNameSTD); - outAve << "label"; outSTD << "label"; + outputNames.push_back(fileNameAve); outputTypes["summary"].push_back(fileNameAve); + outAve << "label\tmethod"; outAve.setf(ios::fixed, ios::floatfield); outAve.setf(ios::showpoint); - outSTD.setf(ios::fixed, ios::floatfield); outSTD.setf(ios::showpoint); if (inputFileNames.size() > 1) { groupIndex[fileNameAve] = groups[p]; - groupIndex[fileNameSTD] = groups[p]; } } @@ -391,17 +411,17 @@ int SummaryCommand::execute(){ for(int i=0;igetCols() == 1){ outputFileHandle << '\t' << sumCalculators[i]->getName(); - if (subsample) { outAve << '\t' << sumCalculators[i]->getName(); outSTD << '\t' << sumCalculators[i]->getName(); } + if (subsample) { outAve << '\t' << sumCalculators[i]->getName(); } numCols++; } else{ outputFileHandle << '\t' << sumCalculators[i]->getName() << "\t" << sumCalculators[i]->getName() << "_lci\t" << sumCalculators[i]->getName() << "_hci"; - if (subsample) { outAve << '\t' << sumCalculators[i]->getName() << "\t" << sumCalculators[i]->getName() << "_lci\t" << sumCalculators[i]->getName() << "_hci"; outSTD << '\t' << sumCalculators[i]->getName() << "\t" << sumCalculators[i]->getName() << "_lci\t" << sumCalculators[i]->getName() << "_hci"; } + if (subsample) { outAve << '\t' << sumCalculators[i]->getName() << "\t" << sumCalculators[i]->getName() << "_lci\t" << sumCalculators[i]->getName() << "_hci"; } numCols += 3; } } outputFileHandle << endl; - if (subsample) { outSTD << endl; outAve << endl; } + if (subsample) { outAve << endl; } //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. set processedLabels; @@ -409,11 +429,11 @@ int SummaryCommand::execute(){ - if (m->control_pressed) { outputFileHandle.close(); outAve.close(); outSTD.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } for(int i=0;icontrol_pressed) { outputFileHandle.close(); outAve.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } for(int i=0;icontrol_pressed) { outputFileHandle.close(); outAve.close(); outSTD.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } for(int i=0;icontrol_pressed) { outputFileHandle.close(); outAve.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } for(int i=0;igetLabel()) == 1){ @@ -421,9 +441,9 @@ int SummaryCommand::execute(){ processedLabels.insert(sabund->getLabel()); userLabels.erase(sabund->getLabel()); - process(sabund, outputFileHandle, outAve, outSTD); + process(sabund, outputFileHandle, outAve); - if (m->control_pressed) { outputFileHandle.close(); outAve.close(); outSTD.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } for(int i=0;icontrol_pressed) { outputFileHandle.close(); outAve.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } for(int i=0;igetLabel()); userLabels.erase(sabund->getLabel()); - process(sabund, outputFileHandle, outAve, outSTD); + process(sabund, outputFileHandle, outAve); - if (m->control_pressed) { outputFileHandle.close(); outAve.close(); outSTD.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } for(int i=0;icontrol_pressed) { outputFileHandle.close(); outAve.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } for(int i=0;igetSAbundVector(); } - if (m->control_pressed) { outputFileHandle.close(); outAve.close(); outSTD.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } for(int i=0;icontrol_pressed) { outputFileHandle.close(); outAve.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } for(int i=0;i::iterator it; @@ -473,15 +493,15 @@ int SummaryCommand::execute(){ sabund = input->getSAbundVector(lastLabel); m->mothurOut(sabund->getLabel()); m->mothurOutEndLine(); - process(sabund, outputFileHandle, outAve, outSTD); + process(sabund, outputFileHandle, outAve); - if (m->control_pressed) { outputFileHandle.close(); outAve.close(); outSTD.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } for(int i=0;icontrol_pressed) { outputFileHandle.close(); outAve.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } for(int i=0;icontrol_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } for(int i=0;i data -> values @@ -559,7 +579,7 @@ int SummaryCommand::process(SAbundVector*& sabund, ofstream& outputFileHandle, o outputFileHandle << endl; if (subsample) { - outAve << sabund->getLabel() << '\t'; outStd << sabund->getLabel() << '\t'; + outAve << sabund->getLabel() << '\t' << "ave\t"; //find ave and std for this label and output //will need to modify the createGroupSummary to combine results and not mess with the .summary file. @@ -594,14 +614,15 @@ int SummaryCommand::process(SAbundVector*& sabund, ofstream& outputFileHandle, o } } + outAve << endl << sabund->getLabel() << '\t' << "std\t"; for (int i = 0; i < stdDev.size(); i++) { //finds average. for (int j = 0; j < stdDev[i].size(); j++) { stdDev[i][j] /= (float) iters; stdDev[i][j] = sqrt(stdDev[i][j]); - outStd << stdDev[i][j] << '\t'; + outAve << stdDev[i][j] << '\t'; } } - outAve << endl; outStd << endl; + outAve << endl; } return 0; @@ -702,38 +723,37 @@ vector SummaryCommand::createGroupSummaryFile(int numLines, int numCols, //open each groups summary file vector newComboNames; - string newLabel = ""; + map > > files; + map filesTypesLabels; + map filesTypesNumLines; for (int i=0; igetExtension(outputNames[i]); - string combineFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + "groups" + extension; - m->mothurRemove(combineFileName); //remove old file - vector thisFilesLines; ifstream temp; m->openInputFile(outputNames[i], temp); //read through first line - labels - string tempLabel; - if (i == 0) { //we want to save the labels to output below - for (int j = 0; j < numCols+1; j++) { - temp >> tempLabel; - - if (j == 1) { newLabel += "group\t" + tempLabel + '\t'; - }else{ newLabel += tempLabel + '\t'; } - } - }else{ for (int j = 0; j < numCols+1; j++) { temp >> tempLabel; } } + string labelsLine = m->getline(temp); + vector theseLabels = m->splitWhiteSpace(labelsLine); + + string newLabel = ""; + for (int j = 0; j < theseLabels.size(); j++) { + if (j == 1) { newLabel += "group\t" + theseLabels[j] + '\t'; + }else{ newLabel += theseLabels[j] + '\t'; } + } m->gobble(temp); + int stop = numLines; + if (theseLabels.size() != numCols+1) { stop = numLines*2; } //for each label - for (int k = 0; k < numLines; k++) { + for (int k = 0; k < stop; k++) { string thisLine = ""; string tempLabel; - for (int j = 0; j < numCols+1; j++) { + for (int j = 0; j < theseLabels.size(); j++) { temp >> tempLabel; //save for later @@ -748,6 +768,13 @@ vector SummaryCommand::createGroupSummaryFile(int numLines, int numCols, m->gobble(temp); } + string extension = m->getExtension(outputNames[i]); + if (theseLabels.size() != numCols+1) { extension = ".ave-std" + extension; } + string combineFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + "groups" + extension; + m->mothurRemove(combineFileName); //remove old file + filesTypesLabels[extension] = newLabel; + filesTypesNumLines[extension] = stop; + map > >::iterator itFiles = files.find(extension); if (itFiles != files.end()) { //add new files info to existing type files[extension][outputNames[i]] = thisFilesLines; @@ -775,10 +802,10 @@ vector SummaryCommand::createGroupSummaryFile(int numLines, int numCols, m->openOutputFile(combineFileName, out); //output label line to new file - out << newLabel << endl; + out << filesTypesLabels[extension] << endl; //for each label - for (int k = 0; k < numLines; k++) { + for (int k = 0; k < filesTypesNumLines[extension]; k++) { //grab summary data for each group for (map >::iterator itType = thisType.begin(); itType != thisType.end(); itType++) {