X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=blobdiff_plain;f=parsimonycommand.cpp;h=64d249831ad56f5501dd6dd4d44f17bda35daf78;hp=1be4697a4f3fde882fa08985a8a1e01072110596;hb=a8e2df1b96a57f5f29576b08361b86a96a8eff4f;hpb=0e051b4cfda410b0d441da6ff2f96d4bbe1d9e5a diff --git a/parsimonycommand.cpp b/parsimonycommand.cpp index 1be4697..64d2498 100644 --- a/parsimonycommand.cpp +++ b/parsimonycommand.cpp @@ -8,19 +8,21 @@ */ #include "parsimonycommand.h" +#include "treereader.h" //********************************************************************************************************************** vector ParsimonyCommand::setParameters(){ try { - CommandParameter ptree("tree", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptree); - CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup); - CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname); - CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups); - CommandParameter prandom("random", "String", "", "", "", "", "",false,false); parameters.push_back(prandom); - CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters); - CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors); - CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir); - CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir); + CommandParameter ptree("tree", "InputTypes", "", "", "none", "none", "none","parsimony-psummary",false,true,true); parameters.push_back(ptree); + CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none","",false,false,true); parameters.push_back(pname); + CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none","",false,false,true); parameters.push_back(pcount); + CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","",false,false,true); parameters.push_back(pgroup); + CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups); + CommandParameter prandom("random", "String", "", "", "", "", "","",false,false); parameters.push_back(prandom); + CommandParameter piters("iters", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(piters); + CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors); + CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); + CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } @@ -35,7 +37,7 @@ vector ParsimonyCommand::setParameters(){ string ParsimonyCommand::getHelpString(){ try { string helpString = ""; - helpString += "The parsimony command parameters are tree, group, name, random, groups, processors and iters. tree parameter is required unless you have valid current tree file or are using random.\n"; + helpString += "The parsimony command parameters are tree, group, name, count, random, groups, processors and iters. tree parameter is required unless you have valid current tree file or are using random.\n"; helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 1 valid group.\n"; helpString += "The group names are separated by dashes. The iters parameter allows you to specify how many random trees you would like compared to your tree.\n"; helpString += "The parsimony command should be in the following format: parsimony(random=yourOutputFilename, groups=yourGroups, iters=yourIters).\n"; @@ -51,7 +53,22 @@ string ParsimonyCommand::getHelpString(){ exit(1); } } - +//********************************************************************************************************************** +string ParsimonyCommand::getOutputPattern(string type) { + try { + string pattern = ""; + + if (type == "parsimony") { pattern = "[filename],parsimony"; } + else if (type == "psummary") { pattern = "[filename],psummary"; } + else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; } + + return pattern; + } + catch(exception& e) { + m->errorOut(e, "ParsimonyCommand", "getOutputPattern"); + exit(1); + } +} //********************************************************************************************************************** ParsimonyCommand::ParsimonyCommand(){ try { @@ -74,6 +91,7 @@ ParsimonyCommand::ParsimonyCommand(string option) { //allow user to run help if(option == "help") { help(); abort = true; calledHelp = true; } + else if(option == "citation") { citation(); abort = true; calledHelp = true;} else { vector myArray = setParameters(); @@ -122,14 +140,16 @@ ParsimonyCommand::ParsimonyCommand(string option) { //if the user has not given a path then, add inputdir. else leave path alone. if (path == "") { parameters["name"] = inputDir + it->second; } } + + it = parameters.find("count"); + //user has given a template file + if(it != parameters.end()){ + path = m->hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["count"] = inputDir + it->second; } + } } - m->runParse = true; - m->Groups.clear(); - m->namesOfGroups.clear(); - m->Treenames.clear(); - m->names.clear(); - outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; } randomtree = validParameter.validFile(parameters, "random", false); if (randomtree == "not found") { randomtree = ""; } @@ -138,21 +158,37 @@ ParsimonyCommand::ParsimonyCommand(string option) { if (randomtree == "") { //check for required parameters treefile = validParameter.validFile(parameters, "tree", true); - if (treefile == "not open") { abort = true; } + if (treefile == "not open") { treefile = ""; abort = true; } else if (treefile == "not found") { //if there is a current design file, use it treefile = m->getTreeFile(); if (treefile != "") { m->mothurOut("Using " + treefile + " as input file for the tree parameter."); m->mothurOutEndLine(); } else { m->mothurOut("You have no current tree file and the tree parameter is required."); m->mothurOutEndLine(); abort = true; } - } + }else { m->setTreeFile(treefile); } //check for required parameters groupfile = validParameter.validFile(parameters, "group", true); if (groupfile == "not open") { abort = true; } else if (groupfile == "not found") { groupfile = ""; } + else { m->setGroupFile(groupfile); } namefile = validParameter.validFile(parameters, "name", true); - if (namefile == "not open") { abort = true; } + if (namefile == "not open") { namefile = ""; abort = true; } else if (namefile == "not found") { namefile = ""; } + else { m->setNameFile(namefile); } + + countfile = validParameter.validFile(parameters, "count", true); + if (countfile == "not open") { countfile = ""; abort = true; } + else if (countfile == "not found") { countfile = ""; } + else { m->setCountTableFile(countfile); } + + if ((namefile != "") && (countfile != "")) { + m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true; + } + + if ((groupfile != "") && (countfile != "")) { + m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true; + } + } //if the user changes the output directory command factory will send this info to us in the output parameter @@ -161,18 +197,25 @@ ParsimonyCommand::ParsimonyCommand(string option) { //check for optional parameter and set defaults // ...at some point should added some additional type checking... groups = validParameter.validFile(parameters, "groups", false); - if (groups == "not found") { groups = ""; m->Groups.clear(); } + if (groups == "not found") { groups = ""; m->clearGroups(); } else { m->splitAtDash(groups, Groups); - m->Groups = Groups; + m->setGroups(Groups); } itersString = validParameter.validFile(parameters, "iters", false); if (itersString == "not found") { itersString = "1000"; } - convert(itersString, iters); + m->mothurConvert(itersString, iters); string temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); } m->setProcessors(temp); - convert(temp, processors); + m->mothurConvert(temp, processors); + + if (countfile=="") { + if (namefile == "") { + vector files; files.push_back(treefile); + parser.getNameFile(files); + } + } } @@ -195,74 +238,22 @@ int ParsimonyCommand::execute() { m->setTreeFile(treefile); - if (groupfile != "") { - //read in group map info. - tmap = new TreeMap(groupfile); - tmap->readMap(); - }else{ //fake out by putting everyone in one group - Tree* tree = new Tree(treefile); delete tree; //extracts names from tree to make faked out groupmap - tmap = new TreeMap(); - - for (int i = 0; i < m->Treenames.size(); i++) { tmap->addSeq(m->Treenames[i], "Group1"); } - } - - if (namefile != "") { readNamesFile(); } - - read = new ReadNewickTree(treefile); - int readOk = read->read(tmap); - - if (readOk != 0) { m->mothurOut("Read Terminated."); m->mothurOutEndLine(); delete tmap; delete read; return 0; } - - read->AssembleTrees(); - T = read->getTrees(); - delete read; - - //make sure all files match - //if you provide a namefile we will use the numNames in the namefile as long as the number of unique match the tree names size. - int numNamesInTree; - if (namefile != "") { - if (numUniquesInName == m->Treenames.size()) { numNamesInTree = nameMap.size(); } - else { numNamesInTree = m->Treenames.size(); } - }else { numNamesInTree = m->Treenames.size(); } - - - //output any names that are in group file but not in tree - if (numNamesInTree < tmap->getNumSeqs()) { - for (int i = 0; i < tmap->namesOfSeqs.size(); i++) { - //is that name in the tree? - int count = 0; - for (int j = 0; j < m->Treenames.size(); j++) { - if (tmap->namesOfSeqs[i] == m->Treenames[j]) { break; } //found it - count++; - } - - if (m->control_pressed) { - delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } - for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } outputTypes.clear(); - m->Groups.clear(); - return 0; - } - - //then you did not find it so report it - if (count == m->Treenames.size()) { - //if it is in your namefile then don't remove - map::iterator it = nameMap.find(tmap->namesOfSeqs[i]); - - if (it == nameMap.end()) { - m->mothurOut(tmap->namesOfSeqs[i] + " is in your groupfile and not in your tree. It will be disregarded."); m->mothurOutEndLine(); - tmap->removeSeq(tmap->namesOfSeqs[i]); - i--; //need this because removeSeq removes name from namesOfSeqs - } - } - } - } - + TreeReader* reader; + if (countfile == "") { reader = new TreeReader(treefile, groupfile, namefile); } + else { reader = new TreeReader(treefile, countfile); } + T = reader->getTrees(); + ct = T[0]->getCountTable(); + delete reader; + if(outputDir == "") { outputDir += m->hasPath(treefile); } - output = new ColumnFile(outputDir + m->getSimpleName(treefile) + ".parsimony", itersString); - outputNames.push_back(outputDir + m->getSimpleName(treefile) + ".parsimony"); - outputTypes["parsimony"].push_back(outputDir + m->getSimpleName(treefile) + ".parsimony"); + map variables; + variables["[filename]"] = outputDir + m->getSimpleName(treefile) + "."; + + output = new ColumnFile(getOutputFileName("parsimony",variables), itersString); + outputNames.push_back(getOutputFileName("parsimony",variables)); + outputTypes["parsimony"].push_back(getOutputFileName("parsimony",variables)); - sumFile = outputDir + m->getSimpleName(treefile) + ".psummary"; + sumFile = getOutputFileName("psummary",variables); m->openOutputFile(sumFile, outSum); outputNames.push_back(sumFile); outputTypes["psummary"].push_back(sumFile); @@ -276,25 +267,27 @@ int ParsimonyCommand::execute() { } //set users groups to analyze - util = new SharedUtil(); - util->setGroups(m->Groups, tmap->namesOfGroups, allGroups, numGroups, "parsimony"); //sets the groups the user wants to analyze - util->getCombos(groupComb, m->Groups, numComp); - delete util; + SharedUtil util; + vector mGroups = m->getGroups(); + vector tGroups = ct->getNamesOfGroups(); + util.setGroups(mGroups, tGroups, allGroups, numGroups, "parsimony"); //sets the groups the user wants to analyze + util.getCombos(groupComb, mGroups, numComp); + m->setGroups(mGroups); if (numGroups == 1) { numComp++; groupComb.push_back(allGroups); } - pars = new Parsimony(tmap); + Parsimony pars; counter = 0; Progress* reading; reading = new Progress("Comparing to random:", iters); if (m->control_pressed) { - delete reading; delete pars; delete output; - delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } + delete reading; delete output; + delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } if (randomtree == "") { outSum.close(); } - for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } outputTypes.clear(); - m->Groups.clear(); + for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); + m->clearGroups(); return 0; } @@ -312,14 +305,14 @@ int ParsimonyCommand::execute() { if (randomtree == "") { //get pscores for users trees for (int i = 0; i < T.size(); i++) { - userData = pars->getValues(T[i], processors, outputDir); //data = AB, AC, BC, ABC. + userData = pars.getValues(T[i], processors, outputDir); //data = AB, AC, BC, ABC. if (m->control_pressed) { - delete reading; delete pars; delete output; - delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } + delete reading; delete output; + delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } if (randomtree == "") { outSum.close(); } - for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } outputTypes.clear(); - m->Groups.clear(); + for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); + m->clearGroups(); return 0; } @@ -345,20 +338,20 @@ int ParsimonyCommand::execute() { for (int j = 0; j < iters; j++) { //create new tree with same num nodes and leaves as users - randT = new Tree(tmap); + randT = new Tree(ct); //create random relationships between nodes randT->assembleRandomTree(); //get pscore of random tree - randomData = pars->getValues(randT, processors, outputDir); + randomData = pars.getValues(randT, processors, outputDir); if (m->control_pressed) { - delete reading; delete pars; delete output; delete randT; + delete reading; delete output; delete randT; if (randomtree == "") { outSum.close(); } - for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } outputTypes.clear(); - delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } - m->Groups.clear(); + for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); + delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } + m->clearGroups(); return 0; } @@ -386,29 +379,23 @@ int ParsimonyCommand::execute() { for (int j = 0; j < iters; j++) { //create new tree with same num nodes and leaves as users - randT = new Tree(tmap); + randT = new Tree(ct); //create random relationships between nodes randT->assembleRandomTree(); if (m->control_pressed) { - delete reading; delete pars; delete output; delete randT; - delete tmap; - for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } outputTypes.clear(); - m->Groups.clear(); - return 0; + delete reading; delete output; delete randT; delete ct; + for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); return 0; } //get pscore of random tree - randomData = pars->getValues(randT, processors, outputDir); + randomData = pars.getValues(randT, processors, outputDir); if (m->control_pressed) { - delete reading; delete pars; delete output; delete randT; - delete tmap; - for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } outputTypes.clear(); - m->Groups.clear(); - return 0; + delete reading; delete output; delete randT; delete ct; + for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); return 0; } for(int r = 0; r < numComp; r++) { @@ -460,29 +447,23 @@ int ParsimonyCommand::execute() { } if (m->control_pressed) { - delete reading; delete pars; delete output; - delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } + delete reading; delete output; + delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } if (randomtree == "") { outSum.close(); } - for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } outputTypes.clear(); - m->Groups.clear(); + for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); return 0; } //finish progress bar reading->finish(); delete reading; - printParsimonyFile(); if (randomtree == "") { printUSummaryFile(); } + + delete output; delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } - //reset groups parameter - m->Groups.clear(); - - delete pars; delete output; - delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } - - if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } outputTypes.clear(); return 0;} + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); return 0;} m->mothurOutEndLine(); m->mothurOut("Output File Names: "); m->mothurOutEndLine(); @@ -572,7 +553,7 @@ void ParsimonyCommand::getUserInput() { try { //create treemap - tmap = new TreeMap(); + ct = new CountTable(); m->mothurOut("Please enter the number of groups you would like to analyze: "); cin >> numGroups; @@ -582,75 +563,38 @@ void ParsimonyCommand::getUserInput() { count = 1; numEachGroup.resize(numGroups, 0); + set nameMap; + map groupMap; + set gps; + for (int i = 1; i <= numGroups; i++) { m->mothurOut("Please enter the number of sequences in group " + toString(i) + ": "); cin >> num; m->mothurOutJustToLog(toString(num)); m->mothurOutEndLine(); - - //set tmaps seqsPerGroup - tmap->seqsPerGroup[toString(i)] = num; - tmap->namesOfGroups.push_back(toString(i)); + gps.insert(toString(i)); + //set tmaps namesOfSeqs for (int j = 0; j < num; j++) { - tmap->namesOfSeqs.push_back(toString(count)); - tmap->treemap[toString(count)].groupname = toString(i); + groupMap[toString(count)] = toString(i); + nameMap.insert(toString(count)); count++; } } - + ct->createTable(nameMap, groupMap, gps); + //clears buffer so next command doesn't have error string s; getline(cin, s); - m->Treenames = tmap->namesOfSeqs; - + m->Treenames = ct->getNamesOfSeqs(); + m->runParse = false; } catch(exception& e) { m->errorOut(e, "ParsimonyCommand", "getUserInput"); exit(1); } } -/*****************************************************************/ -int ParsimonyCommand::readNamesFile() { - try { - m->names.clear(); - numUniquesInName = 0; - - ifstream in; - m->openInputFile(namefile, in); - - string first, second; - map::iterator itNames; - - while(!in.eof()) { - in >> first >> second; m->gobble(in); - - numUniquesInName++; - - itNames = m->names.find(first); - if (itNames == m->names.end()) { - m->names[first] = second; - - //we need a list of names in your namefile to use above when removing extra seqs above so we don't remove them - vector dupNames; - m->splitAtComma(second, dupNames); - - for (int i = 0; i < dupNames.size(); i++) { - nameMap[dupNames[i]] = dupNames[i]; - if ((groupfile == "") && (i != 0)) { tmap->addSeq(dupNames[i], "Group1"); } - } - }else { m->mothurOut(first + " has already been seen in namefile, disregarding names file."); m->mothurOutEndLine(); in.close(); m->names.clear(); namefile = ""; return 1; } - } - in.close(); - - return 0; - } - catch(exception& e) { - m->errorOut(e, "ParsimonyCommand", "readNamesFile"); - exit(1); - } -} /***********************************************************/