X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=summarycommand.cpp;h=70470b015684b386947056686789da5077000565;hb=260ae19c36cb11a53ddc5a75b5e507f8dd8b31d6;hp=12382a48864a87363f5faae62425de574cb5d0e7;hpb=74844a60d80c6dd06e3fb02ee9b928424f9019b0;p=mothur.git diff --git a/summarycommand.cpp b/summarycommand.cpp index 12382a4..70470b0 100644 --- a/summarycommand.cpp +++ b/summarycommand.cpp @@ -14,8 +14,13 @@ #include "chao1.h" #include "bootstrap.h" #include "simpson.h" +#include "simpsoneven.h" +#include "invsimpson.h" #include "npshannon.h" #include "shannon.h" +#include "heip.h" +#include "smithwilson.h" +#include "shannoneven.h" #include "jackknife.h" #include "geom.h" #include "logsd.h" @@ -44,7 +49,7 @@ SummaryCommand::SummaryCommand(string option) { else { //valid paramters for this command - string Array[] = {"label","calc","abund","size","outputdir","inputdir"}; + string Array[] = {"label","calc","abund","size","outputdir","groupmode","inputdir"}; vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); OptionParser parser(option); @@ -63,7 +68,7 @@ SummaryCommand::SummaryCommand(string option) { //if the user changes the output directory command factory will send this info to us in the output parameter outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; - outputDir += hasPath(globaldata->inputFileName); //if user entered a file with a path then preserve it + outputDir += m->hasPath(globaldata->inputFileName); //if user entered a file with a path then preserve it } //check for optional parameter and set defaults @@ -71,7 +76,7 @@ SummaryCommand::SummaryCommand(string option) { label = validParameter.validFile(parameters, "label", false); if (label == "not found") { label = ""; } else { - if(label != "all") { splitAtDash(label, labels); allLines = 0; } + if(label != "all") { m->splitAtDash(label, labels); allLines = 0; } else { allLines = 1; } } @@ -86,7 +91,7 @@ SummaryCommand::SummaryCommand(string option) { else { if (calc == "default") { calc = "sobs-chao-ace-jack-shannon-npshannon-simpson"; } } - splitAtDash(calc, Estimators); + m->splitAtDash(calc, Estimators); string temp; temp = validParameter.validFile(parameters, "abund", false); if (temp == "not found") { temp = "10"; } @@ -94,6 +99,10 @@ SummaryCommand::SummaryCommand(string option) { temp = validParameter.validFile(parameters, "size", false); if (temp == "not found") { temp = "0"; } convert(temp, size); + + temp = validParameter.validFile(parameters, "groupmode", false); if (temp == "not found") { temp = "F"; } + groupMode = m->isTrue(temp); + } } @@ -108,12 +117,13 @@ void SummaryCommand::help(){ try { m->mothurOut("The summary.single command can only be executed after a successful read.otu WTIH ONE EXECEPTION.\n"); m->mothurOut("The summary.single command can be executed after a successful cluster command. It will use the .list file from the output of the cluster.\n"); - m->mothurOut("The summary.single command parameters are label, calc, abund. No parameters are required.\n"); + m->mothurOut("The summary.single command parameters are label, calc, abund and groupmode. No parameters are required.\n"); m->mothurOut("The summary.single command should be in the following format: \n"); m->mothurOut("summary.single(label=yourLabel, calc=yourEstimators).\n"); m->mothurOut("Example summary.single(label=unique-.01-.03, calc=sobs-chao-ace-jack-bootstrap-shannon-npshannon-simpson).\n"); validCalculator->printCalc("summary", cout); m->mothurOut("The default value calc is sobs-chao-ace-jack-shannon-npshannon-simpson\n"); + m->mothurOut("If you are running summary.single with a shared file and would like your summary results collated in one file, set groupmode=t. (Default=False).\n"); m->mothurOut("The label parameter is used to analyze specific labels in your input.\n"); m->mothurOut("Note: No spaces between parameter labels (i.e. label), '=' and parameters (i.e.yourLabels).\n\n"); } @@ -136,12 +146,21 @@ int SummaryCommand::execute(){ vector outputNames; + string hadShared = ""; if ((globaldata->getFormat() != "sharedfile")) { inputFileNames.push_back(globaldata->inputFileName); } - else { inputFileNames = parseSharedFile(globaldata->getSharedFile()); globaldata->setFormat("rabund"); } + else { hadShared = globaldata->getSharedFile(); inputFileNames = parseSharedFile(globaldata->getSharedFile()); globaldata->setFormat("rabund"); } + + if (m->control_pressed) { if (hadShared != "") { globaldata->setSharedFile(hadShared); globaldata->setFormat("sharedfile"); } return 0; } + + int numLines = 0; + int numCols = 0; for (int p = 0; p < inputFileNames.size(); p++) { - string fileNameRoot = outputDir + getRootName(getSimpleName(inputFileNames[p])) + "summary"; + numLines = 0; + numCols = 0; + + string fileNameRoot = outputDir + m->getRootName(m->getSimpleName(inputFileNames[p])) + "summary"; globaldata->inputFileName = inputFileNames[p]; outputNames.push_back(fileNameRoot); @@ -179,10 +198,20 @@ int SummaryCommand::execute(){ sumCalculators.push_back(new Jackknife()); }else if(Estimators[i] == "shannon"){ sumCalculators.push_back(new Shannon()); + }else if(Estimators[i] == "shannoneven"){ + sumCalculators.push_back(new ShannonEven()); }else if(Estimators[i] == "npshannon"){ sumCalculators.push_back(new NPShannon()); + }else if(Estimators[i] == "heip"){ + sumCalculators.push_back(new Heip()); + }else if(Estimators[i] == "smithwilson"){ + sumCalculators.push_back(new SmithWilson()); }else if(Estimators[i] == "simpson"){ sumCalculators.push_back(new Simpson()); + }else if(Estimators[i] == "simpsoneven"){ + sumCalculators.push_back(new SimpsonEven()); + }else if(Estimators[i] == "invsimpson"){ + sumCalculators.push_back(new InvSimpson()); }else if(Estimators[i] == "bootstrap"){ sumCalculators.push_back(new Bootstrap()); }else if (Estimators[i] == "nseqs") { @@ -202,10 +231,10 @@ int SummaryCommand::execute(){ } //if the users entered no valid calculators don't execute command - if (sumCalculators.size() == 0) { return 0; } + if (sumCalculators.size() == 0) { if (hadShared != "") { globaldata->setSharedFile(hadShared); globaldata->setFormat("sharedfile"); } return 0; } ofstream outputFileHandle; - openOutputFile(fileNameRoot, outputFileHandle); + m->openOutputFile(fileNameRoot, outputFileHandle); outputFileHandle << "label"; read = new ReadOTUFile(globaldata->inputFileName); @@ -218,9 +247,11 @@ int SummaryCommand::execute(){ for(int i=0;igetCols() == 1){ outputFileHandle << '\t' << sumCalculators[i]->getName(); + numCols++; } else{ outputFileHandle << '\t' << sumCalculators[i]->getName() << "\t" << sumCalculators[i]->getName() << "_lci\t" << sumCalculators[i]->getName() << "_hci"; + numCols += 3; } } outputFileHandle << endl; @@ -229,8 +260,12 @@ int SummaryCommand::execute(){ set processedLabels; set userLabels = labels; + if (m->control_pressed) { if (hadShared != "") { globaldata->setSharedFile(hadShared); globaldata->setFormat("sharedfile"); } outputFileHandle.close(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } for(int i=0;isabund = NULL; delete input; globaldata->ginput = NULL; return 0; } + while((sabund != NULL) && ((allLines == 1) || (userLabels.size() != 0))) { + if (m->control_pressed) { if (hadShared != "") { globaldata->setSharedFile(hadShared); globaldata->setFormat("sharedfile"); } outputFileHandle.close(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } for(int i=0;isabund = NULL; delete input; globaldata->ginput = NULL; return 0; } + if(allLines == 1 || labels.count(sabund->getLabel()) == 1){ m->mothurOut(sabund->getLabel()); m->mothurOutEndLine(); @@ -240,13 +275,17 @@ int SummaryCommand::execute(){ outputFileHandle << sabund->getLabel(); for(int i=0;i data = sumCalculators[i]->getValues(sabund); + + if (m->control_pressed) { if (hadShared != "") { globaldata->setSharedFile(hadShared); globaldata->setFormat("sharedfile"); } outputFileHandle.close(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } for(int i=0;isabund = NULL; delete input; globaldata->ginput = NULL; return 0; } + outputFileHandle << '\t'; sumCalculators[i]->print(outputFileHandle); } outputFileHandle << endl; + numLines++; } - if ((anyLabelsToProcess(sabund->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) { + if ((m->anyLabelsToProcess(sabund->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) { string saveLabel = sabund->getLabel(); delete sabund; @@ -259,10 +298,14 @@ int SummaryCommand::execute(){ outputFileHandle << sabund->getLabel(); for(int i=0;i data = sumCalculators[i]->getValues(sabund); + + if (m->control_pressed) { if (hadShared != "") { globaldata->setSharedFile(hadShared); globaldata->setFormat("sharedfile"); } outputFileHandle.close(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } for(int i=0;isabund = NULL; delete input; globaldata->ginput = NULL; return 0; } + outputFileHandle << '\t'; sumCalculators[i]->print(outputFileHandle); } outputFileHandle << endl; + numLines++; //restore real lastlabel to save below sabund->setLabel(saveLabel); @@ -274,6 +317,8 @@ int SummaryCommand::execute(){ sabund = input->getSAbundVector(); } + if (m->control_pressed) { if (hadShared != "") { globaldata->setSharedFile(hadShared); globaldata->setFormat("sharedfile"); } outputFileHandle.close(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } for(int i=0;iginput = NULL; return 0; } + //output error messages about any remaining user labels set::iterator it; bool needToRun = false; @@ -296,21 +341,38 @@ int SummaryCommand::execute(){ outputFileHandle << sabund->getLabel(); for(int i=0;i data = sumCalculators[i]->getValues(sabund); + + if (m->control_pressed) { if (hadShared != "") { globaldata->setSharedFile(hadShared); globaldata->setFormat("sharedfile"); } outputFileHandle.close(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } for(int i=0;isabund = NULL; delete input; globaldata->ginput = NULL; return 0; } + outputFileHandle << '\t'; sumCalculators[i]->print(outputFileHandle); } outputFileHandle << endl; + numLines++; delete sabund; } outputFileHandle.close(); + if (m->control_pressed) { if (hadShared != "") { globaldata->setSharedFile(hadShared); globaldata->setFormat("sharedfile"); } for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } for(int i=0;iginput = NULL; return 0; } + + delete input; globaldata->ginput = NULL; delete read; delete validCalculator; globaldata->sabund = NULL; + for(int i=0;isetSharedFile(hadShared); globaldata->setFormat("sharedfile"); } + + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; } + + //create summary file containing all the groups data for each label - this function just combines the info from the files already created. + if ((hadShared != "") && (groupMode)) { outputNames.push_back(createGroupSummaryFile(numLines, numCols, outputNames)); } + + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; } + m->mothurOutEndLine(); m->mothurOut("Output File Names: "); m->mothurOutEndLine(); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } @@ -339,7 +401,7 @@ vector SummaryCommand::parseSharedFile(string filename) { input = globaldata->ginput; vector lookup = input->getSharedRAbundVectors(); - string sharedFileRoot = getRootName(filename); + string sharedFileRoot = m->getRootName(filename); //clears file before we start to write to it below for (int i=0; i SummaryCommand::parseSharedFile(string filename) { for (int i = 0; i < lookup.size(); i++) { RAbundVector rav = lookup[i]->getRAbundVector(); - openOutputFileAppend(sharedFileRoot + lookup[i]->getGroup() + ".rabund", *(filehandles[lookup[i]->getGroup()])); + m->openOutputFileAppend(sharedFileRoot + lookup[i]->getGroup() + ".rabund", *(filehandles[lookup[i]->getGroup()])); rav.print(*(filehandles[lookup[i]->getGroup()])); (*(filehandles[lookup[i]->getGroup()])).close(); } @@ -383,3 +445,72 @@ vector SummaryCommand::parseSharedFile(string filename) { } } //********************************************************************************************************************** +string SummaryCommand::createGroupSummaryFile(int numLines, int numCols, vector outputNames) { + try { + + ofstream out; + string combineFileName = outputDir + m->getRootName(m->getSimpleName(globaldata->inputFileName)) + "groups.summary"; + + //open combined file + m->openOutputFile(combineFileName, out); + + //open each groups summary file + string newLabel = ""; + ifstream* temp; + map filehandles; + for (int i=0; iopenInputFile(outputNames[i], *(temp)); + + //read through first line - labels + string tempLabel; + if (i == 0) { //we want to save the labels to output below + for (int j = 0; j < numCols+1; j++) { + *(temp) >> tempLabel; + + if (j == 1) { newLabel += "group\t" + tempLabel + '\t'; + }else{ newLabel += tempLabel + '\t'; } + } + }else{ for (int j = 0; j < numCols+1; j++) { *(temp) >> tempLabel; } } + + m->gobble(*(temp)); + } + + //output label line to new file + out << newLabel << endl; + + //for each label + for (int i = 0; i < numLines; i++) { + + //grab summary data for each group + for (int i=0; i> tempLabel; + + //print to combined file + if (j == 1) { out << groups[i] << '\t' << tempLabel << '\t'; } + else{ out << tempLabel << '\t'; } + } + + out << endl; + m->gobble(*(filehandles[outputNames[i]])); + } + } + + //close each groups summary file + for (int i=0; ierrorOut(e, "SummaryCommand", "createGroupSummaryFile"); + exit(1); + } +} +//**********************************************************************************************************************