X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=blobdiff_plain;f=collectsharedcommand.cpp;h=dd1ec270334e09f1d316ce060c2ab8c185e0d313;hp=21695e328c90590572a9efebc7caf562c2525c75;hb=1a20e24ee786195ab0e1cccd4f5aede7a88f3f4e;hpb=c69e2e9749626cfbf1d6cb0125ae94f869e00b18 diff --git a/collectsharedcommand.cpp b/collectsharedcommand.cpp index 21695e3..dd1ec27 100644 --- a/collectsharedcommand.cpp +++ b/collectsharedcommand.cpp @@ -50,45 +50,112 @@ #include "memeuclidean.h" #include "mempearson.h" + //********************************************************************************************************************** -vector CollectSharedCommand::getValidParameters(){ +vector CollectSharedCommand::setParameters(){ try { - string AlignArray[] = {"freq","label","calc","groups","all","outputdir","inputdir"}; - vector myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string))); + CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(pshared); + CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel); + CommandParameter pfreq("freq", "Number", "", "100", "", "", "","",false,false); parameters.push_back(pfreq); + CommandParameter pcalc("calc", "Multiple", "sharedchao-sharedsobs-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan-kstest-whittaker-sharednseqs-ochiai-anderberg-kulczynski-kulczynskicody-lennon-morisitahorn-braycurtis-odum-canberra-structeuclidean-structchord-hellinger-manhattan-structpearson-soergel-spearman-structkulczynski-speciesprofile-structchi2-hamming-gower-memchi2-memchord-memeuclidean-mempearson", "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan", "", "", "","",true,false,true); parameters.push_back(pcalc); + CommandParameter pall("all", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pall); + CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups); + CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); + CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); + + vector myArray; + for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { - m->errorOut(e, "CollectSharedCommand", "getValidParameters"); + m->errorOut(e, "CollectSharedCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** -vector CollectSharedCommand::getRequiredParameters(){ +string CollectSharedCommand::getHelpString(){ try { - vector myArray; - return myArray; + string helpString = ""; + ValidCalculators validCalculator; + helpString += "The collect.shared command parameters are shared, label, freq, calc and groups. shared is required if there is no current sharedfile. \n"; + helpString += "The collect.shared command should be in the following format: \n"; + helpString += "collect.shared(label=yourLabel, freq=yourFreq, calc=yourEstimators, groups=yourGroups).\n"; + helpString += "Example collect.shared(label=unique-.01-.03, freq=10, groups=B-C, calc=sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan).\n"; + helpString += "The default values for freq is 100 and calc are sharedsobs-sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan.\n"; + helpString += "The default value for groups is all the groups in your groupfile.\n"; + helpString += "The freq parameter is used indicate when to output your data, by default it is set to 100. But you can set it to a percentage of the number of sequence. For example freq=0.10, means 10%. \n"; + helpString += validCalculator.printCalc("shared"); + helpString += "The label parameter is used to analyze specific labels in your input.\n"; + helpString += "The all parameter is used to specify if you want the estimate of all your groups together. This estimate can only be made for sharedsobs and sharedchao calculators. The default is false.\n"; + helpString += "If you use sharedchao and run into memory issues, set all to false. \n"; + helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 2 valid groups.\n"; + helpString += "Note: No spaces between parameter labels (i.e. list), '=' and parameters (i.e.yourListfile).\n"; + return helpString; } catch(exception& e) { - m->errorOut(e, "CollectSharedCommand", "getRequiredParameters"); + m->errorOut(e, "CollectSharedCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** -vector CollectSharedCommand::getRequiredFiles(){ - try { - string AlignArray[] = {"shared"}; - vector myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string))); - return myArray; - } - catch(exception& e) { - m->errorOut(e, "CollectSharedCommand", "getRequiredFiles"); - exit(1); - } +string CollectSharedCommand::getOutputPattern(string type) { + try { + string pattern = ""; + + if (type == "sharedchao") { pattern = "[filename],shared.chao"; } + else if (type == "sharedsobs") { pattern = "[filename],shared.sobs"; } + else if (type == "sharedace") { pattern = "[filename],shared.ace"; } + else if (type == "jabund") { pattern = "[filename],jabund"; } + else if (type == "sorabund") { pattern = "[filename],sorabund"; } + else if (type == "jclass") { pattern = "[filename],jclass"; } + else if (type == "sorclass") { pattern = "[filename],sorclass"; } + else if (type == "jest") { pattern = "[filename],jest"; } + else if (type == "sorest") { pattern = "[filename],sorest"; } + else if (type == "thetayc") { pattern = "[filename],thetayc"; } + else if (type == "thetan") { pattern = "[filename],thetan"; } + else if (type == "kstest") { pattern = "[filename],kstest"; } + else if (type == "whittaker") { pattern = "[filename],whittaker"; } + else if (type == "sharednseqs") { pattern = "[filename],shared.nseqs"; } + else if (type == "ochiai") { pattern = "[filename],ochiai"; } + else if (type == "anderberg") { pattern = "[filename],anderberg"; } + else if (type == "kulczynski") { pattern = "[filename],kulczynski"; } + else if (type == "kulczynskicody") { pattern = "[filename],kulczynskicody"; } + else if (type == "lennon") { pattern = "[filename],lennon"; } + else if (type == "morisitahorn") { pattern = "[filename],morisitahorn"; } + else if (type == "braycurtis") { pattern = "[filename],braycurtis"; } + else if (type == "odum") { pattern = "[filename],odum"; } + else if (type == "canberra") { pattern = "[filename],canberra"; } + else if (type == "structeuclidean") { pattern = "[filename],structeuclidean"; } + else if (type == "structchord") { pattern = "[filename],structchord"; } + else if (type == "hellinger") { pattern = "[filename],hellinger"; } + else if (type == "manhattan") { pattern = "[filename],manhattan"; } + else if (type == "structpearson") { pattern = "[filename],structpearson"; } + else if (type == "soergel") { pattern = "[filename],soergel"; } + else if (type == "spearman") { pattern = "[filename],spearman"; } + else if (type == "structkulczynski") { pattern = "[filename],structkulczynski";} + else if (type == "structchi2") { pattern = "[filename],structchi2"; } + else if (type == "speciesprofile") { pattern = "[filename],speciesprofile"; } + else if (type == "hamming") { pattern = "[filename],hamming"; } + else if (type == "gower") { pattern = "[filename],gower"; } + else if (type == "memchi2") { pattern = "[filename],memchi2"; } + else if (type == "memchord") { pattern = "[filename],memchord"; } + else if (type == "memeuclidean") { pattern = "[filename],memeuclidean"; } + else if (type == "mempearson") { pattern = "[filename],mempearson"; } + else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; } + + return pattern; + } + catch(exception& e) { + m->errorOut(e, "CollectSharedCommand", "getOutputPattern"); + exit(1); + } } + //********************************************************************************************************************** CollectSharedCommand::CollectSharedCommand(){ try { abort = true; calledHelp = true; + setParameters(); vector tempOutNames; outputTypes["sharedchao"] = tempOutNames; outputTypes["sharedsobs"] = tempOutNames; @@ -106,7 +173,7 @@ CollectSharedCommand::CollectSharedCommand(){ outputTypes["sharednseqs"] = tempOutNames; outputTypes["ochiai"] = tempOutNames; outputTypes["anderberg"] = tempOutNames; - outputTypes["skulczynski"] = tempOutNames; + outputTypes["kulczynski"] = tempOutNames; outputTypes["kulczynskicody"] = tempOutNames; outputTypes["lennon"] = tempOutNames; outputTypes["morisitahorn"] = tempOutNames; @@ -139,31 +206,27 @@ CollectSharedCommand::CollectSharedCommand(){ //********************************************************************************************************************** CollectSharedCommand::CollectSharedCommand(string option) { try { - globaldata = GlobalData::getInstance(); abort = false; calledHelp = false; allLines = 1; - labels.clear(); - Estimators.clear(); - Groups.clear(); //allow user to run help - if(option == "help") { validCalculator = new ValidCalculators(); help(); abort = true; calledHelp = true; } + if(option == "help") { help(); abort = true; calledHelp = true; } + else if(option == "citation") { citation(); abort = true; calledHelp = true;} else { - //valid paramters for this command - string Array[] = {"freq","label","calc","groups","all","outputdir","inputdir"}; - vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); + vector myArray = setParameters(); OptionParser parser(option); map parameters=parser.getParameters(); + map::iterator it; ValidParameters validParameter; //check to make sure all parameters are valid for command - for (map::iterator it = parameters.begin(); it != parameters.end(); it++) { + for (it = parameters.begin(); it != parameters.end(); it++) { if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; } } - + //initialize outputTypes vector tempOutNames; outputTypes["sharedchao"] = tempOutNames; @@ -182,7 +245,7 @@ CollectSharedCommand::CollectSharedCommand(string option) { outputTypes["sharednseqs"] = tempOutNames; outputTypes["ochiai"] = tempOutNames; outputTypes["anderberg"] = tempOutNames; - outputTypes["skulczynski"] = tempOutNames; + outputTypes["kulczynski"] = tempOutNames; outputTypes["kulczynskicody"] = tempOutNames; outputTypes["lennon"] = tempOutNames; outputTypes["morisitahorn"] = tempOutNames; @@ -206,16 +269,34 @@ CollectSharedCommand::CollectSharedCommand(string option) { outputTypes["memeuclidean"] = tempOutNames; outputTypes["mempearson"] = tempOutNames; - //if the user changes the output directory command factory will send this info to us in the output parameter - outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; } - - //make sure the user has already run the read.otu command - if (globaldata->getSharedFile() == "") { - if (globaldata->getListFile() == "") { m->mothurOut("You must read a list and a group, or a shared before you can use the collect.shared command."); m->mothurOutEndLine(); abort = true; } - else if (globaldata->getGroupFile() == "") { m->mothurOut("You must read a list and a group, or a shared before you can use the collect.shared command."); m->mothurOutEndLine(); abort = true; } + //if the user changes the input directory command factory will send this info to us in the output parameter + string inputDir = validParameter.validFile(parameters, "inputdir", false); + if (inputDir == "not found"){ inputDir = ""; } + else { + string path; + it = parameters.find("shared"); + //user has given a template file + if(it != parameters.end()){ + path = m->hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["shared"] = inputDir + it->second; } + } } - + + //get shared file + sharedfile = validParameter.validFile(parameters, "shared", true); + if (sharedfile == "not open") { sharedfile = ""; abort = true; } + else if (sharedfile == "not found") { + //if there is a current shared file, use it + sharedfile = m->getSharedFile(); + if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); } + else { m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; } + }else { m->setSharedFile(sharedfile); } + + + //if the user changes the output directory command factory will send this info to us in the output parameter + outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = m->hasPath(sharedfile); } //check for optional parameter and set defaults // ...at some point should added some additional type checking.. @@ -226,162 +307,159 @@ CollectSharedCommand::CollectSharedCommand(string option) { else { allLines = 1; } } - //if the user has not specified any labels use the ones from read.otu - if(label == "") { - allLines = globaldata->allLines; - labels = globaldata->labels; - } - calc = validParameter.validFile(parameters, "calc", false); if (calc == "not found") { calc = "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan"; } else { if (calc == "default") { calc = "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan"; } } m->splitAtDash(calc, Estimators); + if (m->inUsersGroups("citation", Estimators)) { + ValidCalculators validCalc; validCalc.printCitations(Estimators); + //remove citation from list of calcs + for (int i = 0; i < Estimators.size(); i++) { if (Estimators[i] == "citation") { Estimators.erase(Estimators.begin()+i); break; } } + } groups = validParameter.validFile(parameters, "groups", false); if (groups == "not found") { groups = ""; } else { m->splitAtDash(groups, Groups); } - globaldata->Groups = Groups; + m->setGroups(Groups); string temp; temp = validParameter.validFile(parameters, "freq", false); if (temp == "not found") { temp = "100"; } - convert(temp, freq); + m->mothurConvert(temp, freq); temp = validParameter.validFile(parameters, "all", false); if (temp == "not found") { temp = "false"; } all = m->isTrue(temp); if (abort == false) { - if (outputDir == "") { outputDir += m->hasPath(globaldata->inputFileName); } - string fileNameRoot = outputDir + m->getRootName(m->getSimpleName(globaldata->inputFileName)); - format = globaldata->getFormat(); - int i; + string fileNameRoot = outputDir + m->getRootName(m->getSimpleName(sharedfile)); + map variables; + variables["[filename]"] = fileNameRoot; + + ValidCalculators validCalculator; - validCalculator = new ValidCalculators(); - util = new SharedUtil(); - - for (i=0; iisValidCalculator("shared", Estimators[i]) == true) { + for (int i=0; imothurOut("The collect.shared command can only be executed after a successful read.otu command.\n"); - m->mothurOut("The collect.shared command parameters are label, freq, calc and groups. No parameters are required \n"); - m->mothurOut("The collect.shared command should be in the following format: \n"); - m->mothurOut("collect.shared(label=yourLabel, freq=yourFreq, calc=yourEstimators, groups=yourGroups).\n"); - m->mothurOut("Example collect.shared(label=unique-.01-.03, freq=10, groups=B-C, calc=sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan).\n"); - m->mothurOut("The default values for freq is 100 and calc are sharedsobs-sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan.\n"); - m->mothurOut("The default value for groups is all the groups in your groupfile.\n"); - m->mothurOut("The freq parameter is used indicate when to output your data, by default it is set to 100. But you can set it to a percentage of the number of sequence. For example freq=0.10, means 10%. \n"); - validCalculator->printCalc("shared", cout); - m->mothurOut("The label parameter is used to analyze specific labels in your input.\n"); - m->mothurOut("The all parameter is used to specify if you want the estimate of all your groups together. This estimate can only be made for sharedsobs and sharedchao calculators. The default is false.\n"); - m->mothurOut("If you use sharedchao and run into memory issues, set all to false. \n"); - m->mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 2 valid groups.\n"); - m->mothurOut("Note: No spaces between parameter labels (i.e. list), '=' and parameters (i.e.yourListfile).\n\n"); - - } - catch(exception& e) { - m->errorOut(e, "CollectSharedCommand", "help"); - exit(1); - } -} - -//********************************************************************************************************************** - -CollectSharedCommand::~CollectSharedCommand(){ - if (abort == false) { - delete input; globaldata->ginput = NULL; - delete read; - delete util; - delete validCalculator; - globaldata->gorder = NULL; - } -} - +CollectSharedCommand::~CollectSharedCommand(){} //********************************************************************************************************************** int CollectSharedCommand::execute(){ @@ -444,10 +486,7 @@ int CollectSharedCommand::execute(){ if (cDisplays.size() == 0) { return 0; } for(int i=0;isetAll(all); } - read = new ReadOTUFile(globaldata->inputFileName); - read->read(&*globaldata); - - input = globaldata->ginput; + input = new InputData(sharedfile, "sharedfile"); order = input->getSharedOrderVector(); string lastLabel = order->getLabel(); @@ -456,15 +495,20 @@ int CollectSharedCommand::execute(){ set userLabels = labels; //set users groups - util->setGroups(globaldata->Groups, globaldata->gGroupmap->namesOfGroups, "collect"); - util->updateGroupIndex(globaldata->Groups, globaldata->gGroupmap->groupIndex); + SharedUtil* util = new SharedUtil(); + Groups = m->getGroups(); + vector allGroups = m->getAllGroups(); + util->setGroups(Groups, allGroups, "collect"); + m->setGroups(Groups); + m->setAllGroups(allGroups); + delete util; while((order != NULL) && ((allLines == 1) || (userLabels.size() != 0))) { if (m->control_pressed) { - for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } outputTypes.clear(); + for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); for(int i=0;iGroups.clear(); + delete order; delete input; + m->clearGroups(); return 0; } @@ -509,9 +553,10 @@ int CollectSharedCommand::execute(){ } if (m->control_pressed) { - for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } outputTypes.clear(); + for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); for(int i=0;iGroups.clear(); + m->clearGroups(); + delete input; return 0; } @@ -539,10 +584,11 @@ int CollectSharedCommand::execute(){ delete cCurve; if (m->control_pressed) { - for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } outputTypes.clear(); + for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); for(int i=0;iGroups.clear(); + delete input; + m->clearGroups(); return 0; } @@ -552,7 +598,8 @@ int CollectSharedCommand::execute(){ for(int i=0;iGroups.clear(); + m->clearGroups(); + delete input; m->mothurOutEndLine(); m->mothurOut("Output File Names: "); m->mothurOutEndLine();