X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=summarycommand.cpp;h=4d71c07511d711ec10baad3b94ca724cbf1e320f;hb=8f7f4fc08b8c70d9ef0f79607813dba4e926e102;hp=638119543480ffe0a8364379edfef0b184c526bf;hpb=7588ff51d365aad66f455694afb90b6fd3e6639a;p=mothur.git diff --git a/summarycommand.cpp b/summarycommand.cpp index 6381195..4d71c07 100644 --- a/summarycommand.cpp +++ b/summarycommand.cpp @@ -34,26 +34,82 @@ #include "solow.h" #include "shen.h" +//********************************************************************************************************************** +vector SummaryCommand::setParameters(){ + try { + CommandParameter plist("list", "InputTypes", "", "", "LRSS", "LRSS", "none",false,false); parameters.push_back(plist); + CommandParameter prabund("rabund", "InputTypes", "", "", "LRSS", "LRSS", "none",false,false); parameters.push_back(prabund); + CommandParameter psabund("sabund", "InputTypes", "", "", "LRSS", "LRSS", "none",false,false); parameters.push_back(psabund); + CommandParameter pshared("shared", "InputTypes", "", "", "LRSS", "LRSS", "none",false,false); parameters.push_back(pshared); + CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel); + CommandParameter pcalc("calc", "Multiple", "sobs-chao-nseqs-coverage-ace-jack-shannon-shannoneven-np_shannon-heip-smithwilson-simpson-simpsoneven-invsimpson-bootstrap-geometric-qstat-logseries-bergerparker-bstick-goodscoverage-efron-boneh-solow-shen", "sobs-chao-ace-jack-shannon-npshannon-simpson", "", "", "",true,false); parameters.push_back(pcalc); + CommandParameter pabund("abund", "Number", "", "10", "", "", "",false,false); parameters.push_back(pabund); + CommandParameter psize("size", "Number", "", "0", "", "", "",false,false); parameters.push_back(psize); + CommandParameter pgroupmode("groupmode", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pgroupmode); + CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir); + CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir); + + vector myArray; + for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } + return myArray; + } + catch(exception& e) { + m->errorOut(e, "SummaryCommand", "setParameters"); + exit(1); + } +} +//********************************************************************************************************************** +string SummaryCommand::getHelpString(){ + try { + string helpString = ""; + ValidCalculators validCalculator; + helpString += "The summary.single command parameters are list, sabund, rabund, shared, label, calc, abund and groupmode. list, sabund, rabund or shared is required unless you have a valid current file.\n"; + helpString += "The summary.single command should be in the following format: \n"; + helpString += "summary.single(label=yourLabel, calc=yourEstimators).\n"; + helpString += "Example summary.single(label=unique-.01-.03, calc=sobs-chao-ace-jack-bootstrap-shannon-npshannon-simpson).\n"; + helpString += validCalculator.printCalc("summary"); + helpString += "The default value calc is sobs-chao-ace-jack-shannon-npshannon-simpson\n"; + helpString += "If you are running summary.single with a shared file and would like your summary results collated in one file, set groupmode=t. (Default=true).\n"; + helpString += "The label parameter is used to analyze specific labels in your input.\n"; + helpString += "Note: No spaces between parameter labels (i.e. label), '=' and parameters (i.e.yourLabels).\n"; + return helpString; + } + catch(exception& e) { + m->errorOut(e, "SummaryCommand", "getHelpString"); + exit(1); + } +} + +//********************************************************************************************************************** +SummaryCommand::SummaryCommand(){ + try { + abort = true; calledHelp = true; + setParameters(); + vector tempOutNames; + outputTypes["summary"] = tempOutNames; + } + catch(exception& e) { + m->errorOut(e, "SummaryCommand", "SummaryCommand"); + exit(1); + } +} //********************************************************************************************************************** SummaryCommand::SummaryCommand(string option) { try { - globaldata = GlobalData::getInstance(); - abort = false; + abort = false; calledHelp = false; allLines = 1; - labels.clear(); - Estimators.clear(); - + //allow user to run help - if(option == "help") { validCalculator = new ValidCalculators(); help(); delete validCalculator; abort = true; } + if(option == "help") { help(); abort = true; calledHelp = true; } + else if(option == "citation") { citation(); abort = true; calledHelp = true;} else { - //valid paramters for this command - string Array[] = {"label","calc","abund","size","outputdir","groupmode","inputdir"}; - vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); + vector myArray = setParameters(); OptionParser parser(option); map parameters = parser.getParameters(); + map::iterator it; ValidParameters validParameter; @@ -62,14 +118,95 @@ SummaryCommand::SummaryCommand(string option) { if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; } } - //make sure the user has already run the read.otu command - if ((globaldata->getSharedFile() == "") && (globaldata->getListFile() == "") && (globaldata->getRabundFile() == "") && (globaldata->getSabundFile() == "")) { m->mothurOut("You must read a list, sabund, rabund or shared file before you can use the summary.single command."); m->mothurOutEndLine(); abort = true; } + //initialize outputTypes + vector tempOutNames; + outputTypes["summary"] = tempOutNames; - //if the user changes the output directory command factory will send this info to us in the output parameter - outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ - outputDir = ""; - outputDir += m->hasPath(globaldata->inputFileName); //if user entered a file with a path then preserve it + //if the user changes the input directory command factory will send this info to us in the output parameter + string inputDir = validParameter.validFile(parameters, "inputdir", false); + if (inputDir == "not found"){ inputDir = ""; } + else { + string path; + it = parameters.find("shared"); + //user has given a template file + if(it != parameters.end()){ + path = m->hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["shared"] = inputDir + it->second; } + } + + it = parameters.find("rabund"); + //user has given a template file + if(it != parameters.end()){ + path = m->hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["rabund"] = inputDir + it->second; } + } + + it = parameters.find("sabund"); + //user has given a template file + if(it != parameters.end()){ + path = m->hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["sabund"] = inputDir + it->second; } + } + + it = parameters.find("list"); + //user has given a template file + if(it != parameters.end()){ + path = m->hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["list"] = inputDir + it->second; } + } } + + //check for required parameters + listfile = validParameter.validFile(parameters, "list", true); + if (listfile == "not open") { listfile = ""; abort = true; } + else if (listfile == "not found") { listfile = ""; } + else { format = "list"; inputfile = listfile; } + + sabundfile = validParameter.validFile(parameters, "sabund", true); + if (sabundfile == "not open") { sabundfile = ""; abort = true; } + else if (sabundfile == "not found") { sabundfile = ""; } + else { format = "sabund"; inputfile = sabundfile; } + + rabundfile = validParameter.validFile(parameters, "rabund", true); + if (rabundfile == "not open") { rabundfile = ""; abort = true; } + else if (rabundfile == "not found") { rabundfile = ""; } + else { format = "rabund"; inputfile = rabundfile; } + + sharedfile = validParameter.validFile(parameters, "shared", true); + if (sharedfile == "not open") { sharedfile = ""; abort = true; } + else if (sharedfile == "not found") { sharedfile = ""; } + else { format = "sharedfile"; inputfile = sharedfile; } + + if ((sharedfile == "") && (listfile == "") && (rabundfile == "") && (sabundfile == "")) { + //is there are current file available for any of these? + //give priority to shared, then list, then rabund, then sabund + //if there is a current shared file, use it + sharedfile = m->getSharedFile(); + if (sharedfile != "") { inputfile = sharedfile; format = "sharedfile"; m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); } + else { + listfile = m->getListFile(); + if (listfile != "") { inputfile = listfile; format = "list"; m->mothurOut("Using " + listfile + " as input file for the list parameter."); m->mothurOutEndLine(); } + else { + rabundfile = m->getRabundFile(); + if (rabundfile != "") { inputfile = rabundfile; format = "rabund"; m->mothurOut("Using " + rabundfile + " as input file for the rabund parameter."); m->mothurOutEndLine(); } + else { + sabundfile = m->getSabundFile(); + if (sabundfile != "") { inputfile = sabundfile; format = "sabund"; m->mothurOut("Using " + sabundfile + " as input file for the sabund parameter."); m->mothurOutEndLine(); } + else { + m->mothurOut("No valid current files. You must provide a list, sabund, rabund or shared file before you can use the collect.single command."); m->mothurOutEndLine(); + abort = true; + } + } + } + } + } + + //if the user changes the output directory command factory will send this info to us in the output parameter + outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = m->hasPath(inputfile); } //check for optional parameter and set defaults // ...at some point should added some additional type checking... @@ -79,12 +216,6 @@ SummaryCommand::SummaryCommand(string option) { if(label != "all") { m->splitAtDash(label, labels); allLines = 0; } else { allLines = 1; } } - - //if the user has not specified any labels use the ones from read.otu - if(label == "") { - allLines = globaldata->allLines; - labels = globaldata->labels; - } calc = validParameter.validFile(parameters, "calc", false); if (calc == "not found") { calc = "sobs-chao-ace-jack-shannon-npshannon-simpson"; } @@ -92,6 +223,11 @@ SummaryCommand::SummaryCommand(string option) { if (calc == "default") { calc = "sobs-chao-ace-jack-shannon-npshannon-simpson"; } } m->splitAtDash(calc, Estimators); + if (m->inUsersGroups("citation", Estimators)) { + ValidCalculators validCalc; validCalc.printCitations(Estimators); + //remove citation from list of calcs + for (int i = 0; i < Estimators.size(); i++) { if (Estimators[i] == "citation") { Estimators.erase(Estimators.begin()+i); break; } } + } string temp; temp = validParameter.validFile(parameters, "abund", false); if (temp == "not found") { temp = "10"; } @@ -113,44 +249,15 @@ SummaryCommand::SummaryCommand(string option) { } //********************************************************************************************************************** -void SummaryCommand::help(){ - try { - m->mothurOut("The summary.single command can only be executed after a successful read.otu WTIH ONE EXECEPTION.\n"); - m->mothurOut("The summary.single command can be executed after a successful cluster command. It will use the .list file from the output of the cluster.\n"); - m->mothurOut("The summary.single command parameters are label, calc, abund and groupmode. No parameters are required.\n"); - m->mothurOut("The summary.single command should be in the following format: \n"); - m->mothurOut("summary.single(label=yourLabel, calc=yourEstimators).\n"); - m->mothurOut("Example summary.single(label=unique-.01-.03, calc=sobs-chao-ace-jack-bootstrap-shannon-npshannon-simpson).\n"); - validCalculator->printCalc("summary", cout); - m->mothurOut("The default value calc is sobs-chao-ace-jack-shannon-npshannon-simpson\n"); - m->mothurOut("If you are running summary.single with a shared file and would like your summary results collated in one file, set groupmode=t. (Default=true).\n"); - m->mothurOut("The label parameter is used to analyze specific labels in your input.\n"); - m->mothurOut("Note: No spaces between parameter labels (i.e. label), '=' and parameters (i.e.yourLabels).\n\n"); - } - catch(exception& e) { - m->errorOut(e, "SummaryCommand", "help"); - exit(1); - } -} - -//********************************************************************************************************************** - -SummaryCommand::~SummaryCommand(){} - -//********************************************************************************************************************** - int SummaryCommand::execute(){ try { - if (abort == true) { return 0; } + if (abort == true) { if (calledHelp) { return 0; } return 2; } - vector outputNames; + if ((format != "sharedfile")) { inputFileNames.push_back(inputfile); } + else { inputFileNames = parseSharedFile(sharedfile); format = "rabund"; } - string hadShared = ""; - if ((globaldata->getFormat() != "sharedfile")) { inputFileNames.push_back(globaldata->inputFileName); } - else { hadShared = globaldata->getSharedFile(); inputFileNames = parseSharedFile(globaldata->getSharedFile()); globaldata->setFormat("rabund"); } - - if (m->control_pressed) { if (hadShared != "") { globaldata->setSharedFile(hadShared); globaldata->setFormat("sharedfile"); } return 0; } + if (m->control_pressed) { return 0; } int numLines = 0; int numCols = 0; @@ -161,8 +268,7 @@ int SummaryCommand::execute(){ numCols = 0; string fileNameRoot = outputDir + m->getRootName(m->getSimpleName(inputFileNames[p])) + "summary"; - globaldata->inputFileName = inputFileNames[p]; - outputNames.push_back(fileNameRoot); + outputNames.push_back(fileNameRoot); outputTypes["summary"].push_back(fileNameRoot); if (inputFileNames.size() > 1) { m->mothurOutEndLine(); m->mothurOut("Processing group " + groups[p]); m->mothurOutEndLine(); m->mothurOutEndLine(); @@ -170,10 +276,10 @@ int SummaryCommand::execute(){ sumCalculators.clear(); - validCalculator = new ValidCalculators(); + ValidCalculators validCalculator; for (int i=0; iisValidCalculator("summary", Estimators[i]) == true) { + if (validCalculator.isValidCalculator("summary", Estimators[i]) == true) { if(Estimators[i] == "sobs"){ sumCalculators.push_back(new Sobs()); }else if(Estimators[i] == "chao"){ @@ -231,19 +337,16 @@ int SummaryCommand::execute(){ } //if the users entered no valid calculators don't execute command - if (sumCalculators.size() == 0) { if (hadShared != "") { globaldata->setSharedFile(hadShared); globaldata->setFormat("sharedfile"); } return 0; } + if (sumCalculators.size() == 0) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; } ofstream outputFileHandle; m->openOutputFile(fileNameRoot, outputFileHandle); outputFileHandle << "label"; - - read = new ReadOTUFile(globaldata->inputFileName); - read->read(&*globaldata); - - sabund = globaldata->sabund; + + input = new InputData(inputFileNames[p], format); + sabund = input->getSAbundVector(); string lastLabel = sabund->getLabel(); - input = globaldata->ginput; - + for(int i=0;igetCols() == 1){ outputFileHandle << '\t' << sumCalculators[i]->getName(); @@ -260,11 +363,11 @@ int SummaryCommand::execute(){ set processedLabels; set userLabels = labels; - if (m->control_pressed) { if (hadShared != "") { globaldata->setSharedFile(hadShared); globaldata->setFormat("sharedfile"); } outputFileHandle.close(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } for(int i=0;isabund = NULL; delete input; globaldata->ginput = NULL; return 0; } + if (m->control_pressed) { outputFileHandle.close(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } for(int i=0;icontrol_pressed) { if (hadShared != "") { globaldata->setSharedFile(hadShared); globaldata->setFormat("sharedfile"); } outputFileHandle.close(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } for(int i=0;isabund = NULL; delete input; globaldata->ginput = NULL; return 0; } + if (m->control_pressed) { outputFileHandle.close(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } for(int i=0;igetLabel()) == 1){ @@ -276,7 +379,7 @@ int SummaryCommand::execute(){ for(int i=0;i data = sumCalculators[i]->getValues(sabund); - if (m->control_pressed) { if (hadShared != "") { globaldata->setSharedFile(hadShared); globaldata->setFormat("sharedfile"); } outputFileHandle.close(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } for(int i=0;isabund = NULL; delete input; globaldata->ginput = NULL; return 0; } + if (m->control_pressed) { outputFileHandle.close(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } for(int i=0;iprint(outputFileHandle); @@ -299,7 +402,7 @@ int SummaryCommand::execute(){ for(int i=0;i data = sumCalculators[i]->getValues(sabund); - if (m->control_pressed) { if (hadShared != "") { globaldata->setSharedFile(hadShared); globaldata->setFormat("sharedfile"); } outputFileHandle.close(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } for(int i=0;isabund = NULL; delete input; globaldata->ginput = NULL; return 0; } + if (m->control_pressed) { outputFileHandle.close(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } for(int i=0;iprint(outputFileHandle); @@ -317,7 +420,7 @@ int SummaryCommand::execute(){ sabund = input->getSAbundVector(); } - if (m->control_pressed) { if (hadShared != "") { globaldata->setSharedFile(hadShared); globaldata->setFormat("sharedfile"); } outputFileHandle.close(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } for(int i=0;iginput = NULL; return 0; } + if (m->control_pressed) { outputFileHandle.close(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } for(int i=0;i::iterator it; @@ -342,7 +445,7 @@ int SummaryCommand::execute(){ for(int i=0;i data = sumCalculators[i]->getValues(sabund); - if (m->control_pressed) { if (hadShared != "") { globaldata->setSharedFile(hadShared); globaldata->setFormat("sharedfile"); } outputFileHandle.close(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } for(int i=0;isabund = NULL; delete input; globaldata->ginput = NULL; return 0; } + if (m->control_pressed) { outputFileHandle.close(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } for(int i=0;iprint(outputFileHandle); @@ -354,22 +457,17 @@ int SummaryCommand::execute(){ outputFileHandle.close(); - if (m->control_pressed) { if (hadShared != "") { globaldata->setSharedFile(hadShared); globaldata->setFormat("sharedfile"); } for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } for(int i=0;iginput = NULL; return 0; } + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } for(int i=0;iginput = NULL; - delete read; - delete validCalculator; - globaldata->sabund = NULL; + delete input; for(int i=0;isetSharedFile(hadShared); globaldata->setFormat("sharedfile"); } - if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; } //create summary file containing all the groups data for each label - this function just combines the info from the files already created. - if ((hadShared != "") && (groupMode)) { outputNames.push_back(createGroupSummaryFile(numLines, numCols, outputNames)); } + if ((sharedfile != "") && (groupMode)) { outputNames.push_back(createGroupSummaryFile(numLines, numCols, outputNames)); } if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; } @@ -393,12 +491,7 @@ vector SummaryCommand::parseSharedFile(string filename) { map filehandles; map::iterator it3; - - //read first line - read = new ReadOTUFile(filename); - read->read(&*globaldata); - - input = globaldata->ginput; + input = new InputData(filename, "sharedfile"); vector lookup = input->getSharedRAbundVectors(); string sharedFileRoot = m->getRootName(filename); @@ -433,9 +526,8 @@ vector SummaryCommand::parseSharedFile(string filename) { for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { delete it3->second; } - delete read; + delete input; - globaldata->ginput = NULL; return filenames; } @@ -449,59 +541,72 @@ string SummaryCommand::createGroupSummaryFile(int numLines, int numCols, vector< try { ofstream out; - string combineFileName = outputDir + m->getRootName(m->getSimpleName(globaldata->inputFileName)) + "groups.summary"; + string combineFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + "groups.summary"; //open combined file m->openOutputFile(combineFileName, out); //open each groups summary file string newLabel = ""; - ifstream* temp; - map filehandles; + map > files; for (int i=0; iopenInputFile(outputNames[i], *(temp)); + vector thisFilesLines; + + ifstream temp; + m->openInputFile(outputNames[i], temp); //read through first line - labels string tempLabel; if (i == 0) { //we want to save the labels to output below for (int j = 0; j < numCols+1; j++) { - *(temp) >> tempLabel; + temp >> tempLabel; if (j == 1) { newLabel += "group\t" + tempLabel + '\t'; }else{ newLabel += tempLabel + '\t'; } } - }else{ for (int j = 0; j < numCols+1; j++) { *(temp) >> tempLabel; } } + }else{ for (int j = 0; j < numCols+1; j++) { temp >> tempLabel; } } + + m->gobble(temp); + + //for each label + for (int k = 0; k < numLines; k++) { + + string thisLine = ""; + string tempLabel; + + for (int j = 0; j < numCols+1; j++) { + temp >> tempLabel; + + //save for later + if (j == 1) { thisLine += groups[i] + "\t" + tempLabel + "\t"; } + else{ thisLine += tempLabel + "\t"; } + } + + thisLine += "\n"; + + thisFilesLines.push_back(thisLine); + + m->gobble(temp); + } + + files[outputNames[i]] = thisFilesLines; - m->gobble(*(temp)); + temp.close(); + remove(outputNames[i].c_str()); } //output label line to new file out << newLabel << endl; //for each label - for (int i = 0; i < numLines; i++) { + for (int k = 0; k < numLines; k++) { //grab summary data for each group for (int i=0; i> tempLabel; - - //print to combined file - if (j == 1) { out << groups[i] << '\t' << tempLabel << '\t'; } - else{ out << tempLabel << '\t'; } - } - - out << endl; - m->gobble(*(filehandles[outputNames[i]])); + out << files[outputNames[i]][k]; } } - //close each groups summary file - for (int i=0; i