From 16bea3130e36addc54e2116dfbcd02d706ebee45 Mon Sep 17 00:00:00 2001 From: westcott Date: Wed, 18 Feb 2009 15:11:01 +0000 Subject: [PATCH] minor bugs fixes and added line and label options to read.otu's parselist and shared commands. --- deconvolutecommand.cpp | 4 ++++ deconvolutecommand.h | 4 ++-- engine.cpp | 6 +++++- errorchecking.cpp | 11 ++++------- fastamap.cpp | 19 +++++++++++++++++++ fastamap.h | 1 + globaldata.cpp | 12 +++++++----- helpcommand.cpp | 20 +++++++++++--------- parselistcommand.cpp | 35 ++++++++++++++++++++++------------- parsimonycommand.cpp | 22 +++++++++++++--------- parsimonycommand.h | 7 ++++--- sharedcommand.cpp | 9 +++++++-- validparameter.cpp | 2 +- 13 files changed, 100 insertions(+), 52 deletions(-) diff --git a/deconvolutecommand.cpp b/deconvolutecommand.cpp index f2dfdd5..966fee0 100644 --- a/deconvolutecommand.cpp +++ b/deconvolutecommand.cpp @@ -17,8 +17,11 @@ int DeconvoluteCommand::execute() { //prepare filenames and open files filename = globaldata->getFastaFile(); outputFileName = (getRootName(filename) + "names"); + outFastafile = (getRootName(filename) + "uni_fasta"); + openInputFile(filename, in); openOutputFile(outputFileName, out); + openOutputFile(outFastafile, outFasta); //constructor reads in file and store internally fastamap = new FastaMap(); @@ -30,6 +33,7 @@ int DeconvoluteCommand::execute() { //file contains 2 columns separated by tabs. the first column is the groupname(name of first sequence found. //the second column is the list of names of identical sequences separated by ','. fastamap->print(out); + fastamap->printCondensedFasta(outFasta); return 0; } diff --git a/deconvolutecommand.h b/deconvolutecommand.h index b9b2084..2ccc20a 100644 --- a/deconvolutecommand.h +++ b/deconvolutecommand.h @@ -32,8 +32,8 @@ private: GlobalData* globaldata; FastaMap* fastamap; ifstream in; - ofstream out; - string filename, outputFileName; + ofstream out, outFasta; + string filename, outputFileName, outFastafile; }; diff --git a/engine.cpp b/engine.cpp index 1ca98b0..e251f00 100644 --- a/engine.cpp +++ b/engine.cpp @@ -75,6 +75,8 @@ bool InteractEngine::getInput(){ cout << endl << "mothur > "; getline(cin, input); + if (cin.eof()) { input = "quit()"; } + errorFree = errorCheckor->checkInput(input); if (errorFree == true) { CommandOptionParser parser(input); @@ -148,7 +150,9 @@ bool BatchEngine::getInput(){ while(quitCommandCalled == 0){ getline(inputBatchFile, input); - cout << endl << "dotur > " << input << endl; + if (inputBatchFile.eof()) { input = "quit()"; } + + cout << endl << "mothur > " << input << endl; errorFree = errorCheckor->checkInput(input); if (errorFree == true) { CommandOptionParser parser(input); diff --git a/errorchecking.cpp b/errorchecking.cpp index 7f581d9..1d60a4a 100644 --- a/errorchecking.cpp +++ b/errorchecking.cpp @@ -102,7 +102,7 @@ bool ErrorCheck::checkInput(string input) { if (parameter == "fileroot" ) { fileroot = value; } if (parameter == "line" ) { line = value; } if (parameter == "label" ) { label = value; } - if (parameter == "randomtree" ) { randomtree = value; } + if (parameter == "random" ) { randomtree = value; } } @@ -133,7 +133,7 @@ bool ErrorCheck::checkInput(string input) { if (parameter == "fileroot" ) { fileroot = value; } if (parameter == "line" ) { line = value; } if (parameter == "label" ) { label = value; } - if (parameter == "randomtree" ) { randomtree = value; } + if (parameter == "random" ) { randomtree = value; } } } @@ -141,9 +141,6 @@ bool ErrorCheck::checkInput(string input) { //make sure the user does not use both the line and label parameters if ((line != "") && (label != "")) { cout << "You may use either the line or label parameters, but not both." << endl; return false; } - //make sure you have a valid random tree value - if ((randomtree != "0") && (randomtree != "1")) { cout << randomtree << " is not a valid randomtree value. Valid values for randomtree are 0, (meaning you have read your own trees) or 1 (meaning you want to random distribution of trees)." << endl; return false; } - if (commandName == "read.dist") { validateReadFiles(); validateReadDist(); @@ -174,9 +171,9 @@ bool ErrorCheck::checkInput(string input) { if (commandName == "parsimony") { //are you trying to use parsimony without reading a tree or saying you want random distribution - if (randomtree == "0") { + if (randomtree == "") { if (globaldata->gTree.size() == 0) { - cout << "You must read a treefile and a groupfile or set the randomtree parameter to 1, before you may execute the parsimony command." << endl; return false; } + cout << "You must read a treefile and a groupfile or set the randomtree parameter to the output filename you wish, before you may execute the parsimony command." << endl; return false; } } } diff --git a/fastamap.cpp b/fastamap.cpp index 0e6c22e..1406523 100644 --- a/fastamap.cpp +++ b/fastamap.cpp @@ -107,3 +107,22 @@ void FastaMap::print(ostream& out){ //prints data } } /*******************************************************************************/ +void FastaMap::printCondensedFasta(ostream& out){ //prints data + try { + // two column file created with groupname and them list of identical sequence names + for (it = data.begin(); it != data.end(); it++) { + out << ">" << it->second.groupname << endl; + out << it->first << endl; + } + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the FastaMap class Function print. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the FastaMap class function print. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} +/*******************************************************************************/ + diff --git a/fastamap.h b/fastamap.h index 864a2d0..8ba2fc6 100644 --- a/fastamap.h +++ b/fastamap.h @@ -38,6 +38,7 @@ public: void clear(); int size(); //returns number of unique sequences void print(ostream&); //produces a 2 column file with the groupname in the first column and the names in the second column. + void printCondensedFasta(ostream&); //produces a fasta file. void readFastaFile(ifstream&); private: diff --git a/globaldata.cpp b/globaldata.cpp index 9b740a6..eeb1f4d 100644 --- a/globaldata.cpp +++ b/globaldata.cpp @@ -67,7 +67,7 @@ void GlobalData::parseGlobalData(string commandString, string optionText){ allLines = 1; commandName = commandString; //save command name to be used by other classes - //set all non filename paramters to default values + //set all non filename paramters to default reset(); //clears out data from previous read @@ -75,6 +75,8 @@ void GlobalData::parseGlobalData(string commandString, string optionText){ clear(); gGroupmap = NULL; gTree.clear(); + labels.clear(); lines.clear(); groups.clear(); + } //saves help request @@ -107,7 +109,7 @@ void GlobalData::parseGlobalData(string commandString, string optionText){ if (key == "freq" ) { freq = value; } if (key == "method" ) { method = value; } if (key == "fileroot" ) { fileroot = value; } - if (key == "randomtree" ) { randomtree = value; } + if (key == "random" ) { randomtree = value; } if (key == "groups" ) { groups = value; } if (key == "calc") { calc = value; } @@ -153,7 +155,7 @@ void GlobalData::parseGlobalData(string commandString, string optionText){ if (key == "freq" ) { freq = value; } if (key == "method" ) { method = value; } if (key == "fileroot" ) { fileroot = value; } - if (key == "randomtree" ) { randomtree = value; } + if (key == "random" ) { randomtree = value; } if (key == "groups" ) { groups = value; } if (key == "calc") { calc = value; } @@ -296,7 +298,7 @@ void GlobalData::clear() { label = ""; groups = ""; jumble = "1"; //0 means don't jumble, 1 means jumble. - randomtree = "0"; //0 means user will enter some user trees, 1 means they just want the random tree distribution. + randomtree = ""; //"" means user will enter some user trees, "outputfile" means they just want the random tree distribution to be outputted to outputfile. freq = "100"; method = "furthest"; fileroot = ""; @@ -313,7 +315,7 @@ void GlobalData::reset() { label = ""; groups = ""; jumble = "1"; //0 means don't jumble, 1 means jumble. - randomtree = "0"; //0 means user will enter some user trees, 1 means they just want the random tree distribution. + randomtree = ""; //"" means user will enter some user trees, "outputfile" means they just want the random tree distribution to be outputted to outputfile. freq = "100"; method = "furthest"; calc = ""; diff --git a/helpcommand.cpp b/helpcommand.cpp index 0abab5a..e5247f9 100644 --- a/helpcommand.cpp +++ b/helpcommand.cpp @@ -34,12 +34,14 @@ int HelpCommand::execute(){ }else if (globaldata->helpRequest == "read.otu") { cout << "The read.otu command must be run before you execute a collect.single, rarefaction.single, summary.single, " << "\n"; cout << "collect.shared, rarefaction.shared or summary.shared command. Mothur will generate a .list, .rabund and .sabund upon completion of the cluster command " << "\n"; - cout << "or you may use your own. The read.otu command parameter options are list, rabund, sabund, group and order." << "\n"; + cout << "or you may use your own. The read.otu command parameter options are list, rabund, sabund, group, order, line and label." << "\n"; cout << "The read.otu command can be used in two ways. The first is to read a list, rabund or sabund and run the collect.single, rarefaction.single or summary.single." << "\n"; - cout << "For this use the read.otu command should be in the following format: read.otu(list=yourListFile, order=yourOrderFile)." << "\n"; + cout << "For this use the read.otu command should be in the following format: read.otu(list=yourListFile, order=yourOrderFile, label=yourLabels)." << "\n"; cout << "The list, rabund or sabund parameter is required, but you may only use one of them." << "\n"; + cout << "The line and label parameters are optional but you may not use both the line and label parameters at the same time." << "\n"; + cout << "The label and line parameters are used to read specific lines in your input." << "\n"; cout << "The second way to use the read.otu command is to read a list and a group so you can use the collect.shared, rarefaction.shared or summary.shared commands." << "\n"; - cout << "In this case the read.otu command should be in the following format: read.otu(list=yourListFile, group=yourGroupFile). " << "\n"; + cout << "In this case the read.otu command should be in the following format: read.otu(list=yourListFile, group=yourGroupFile, line=yourLines). " << "\n"; cout << "The list parameter and group paramaters are required. When using the command the second way read.otu command parses the .list file" << "\n"; cout << "and separates it into groups. It outputs a .shared file containing the OTU information for each group. The read.otu command also outputs a .list file for each group. " << "\n"; cout << "Note: No spaces between parameter labels (i.e. list), '=' and parameters (i.e.yourListfile)." << "\n" << "\n"; @@ -125,13 +127,13 @@ int HelpCommand::execute(){ cout << "The label and line parameters are used to analyze specific lines in your input." << "\n"; cout << "Note: No spaces between parameter labels (i.e. line), '=' and parameters (i.e.yourLines)." << "\n" << "\n"; }else if (globaldata->helpRequest == "parsimony") { - cout << "The parsimony command can only be executed after a successful read.tree command, unless you use the randomtree parameter." << "\n"; - cout << "The parsimony command parameters are randomtree and iters. No parameters are required." << "\n"; - cout << "The parsimony command should be in the following format: parsimony(randomtree=yourRandomTreeValue, iters=yourIters)." << "\n"; - cout << "Example parsimony(randomtree=1, iters=500)." << "\n"; - cout << "The default value for randomTree is 0 (meaning you want to use the trees in your inputfile, randomtree=1 means you just want the random distribution of trees)," << "\n"; + cout << "The parsimony command can only be executed after a successful read.tree command, unless you use the random parameter." << "\n"; + cout << "The parsimony command parameters are random and iters. No parameters are required." << "\n"; + cout << "The parsimony command should be in the following format: parsimony(random=yourOutputFilename, iters=yourIters)." << "\n"; + cout << "Example parsimony(random=out, iters=500)." << "\n"; + cout << "The default value for random is "" (meaning you want to use the trees in your inputfile, randomtree=out means you just want the random distribution of trees outputted to out.rd_parsimony)," << "\n"; cout << "and iters is 1000. The parsimony command output three files: .parsimony, .psummary and .pdistrib, their descriptions are in the manual." << "\n"; - cout << "Note: No spaces between parameter labels (i.e. list), '=' and parameters (i.e.yourListfile)." << "\n" << "\n"; + cout << "Note: No spaces between parameter labels (i.e. random), '=' and parameters (i.e.yourOutputFilename)." << "\n" << "\n"; }else if (globaldata->helpRequest == "unifrac.weighted") { cout << "The unifrac.weighted command can only be executed after a successful read.tree command." << "\n"; cout << "The unifrac.weighted command parameters are groups and iters. No parameters are required." << "\n"; diff --git a/parselistcommand.cpp b/parselistcommand.cpp index 5379052..d9925ad 100644 --- a/parselistcommand.cpp +++ b/parselistcommand.cpp @@ -86,6 +86,7 @@ void ParseListCommand::parse(int index) { int ParseListCommand::execute(){ try{ globaldata = GlobalData::getInstance(); + int count = 1; //read in listfile read = new ReadPhilFile(globaldata->inputFileName); @@ -107,22 +108,30 @@ int ParseListCommand::execute(){ //parses and sets each groups listvector while(list != NULL){ label = list->getLabel(); - for(i=0; isize(); i++) { - parse(i); //parses data[i] list of sequence names - for (it=listGroups.begin(); it != listGroups.end(); it++) { //loop through map and set new list vectors - seq = it->second; - seq = seq.substr(1, seq.length()); //rips off extra comma - groupOfLists[it->first]->push_back(seq); //sets new listvector for each group + + if(globaldata->allLines == 1 || globaldata->lines.count(count) == 1 || globaldata->labels.count(label) == 1){ + + for(i=0; isize(); i++) { + parse(i); //parses data[i] list of sequence names + for (it=listGroups.begin(); it != listGroups.end(); it++) { //loop through map and set new list vectors + seq = it->second; + seq = seq.substr(1, seq.length()); //rips off extra comma + groupOfLists[it->first]->push_back(seq); //sets new listvector for each group + } + listGroups.clear(); } - listGroups.clear(); - } - //prints each new list file - for (i=0; igetNumGroups(); i++) { - groupOfLists[groupMap->namesOfGroups[i]]->setLabel(label); - groupOfLists[groupMap->namesOfGroups[i]]->print(*(filehandles[groupMap->namesOfGroups[i]])); - groupOfLists[groupMap->namesOfGroups[i]]->clear(); + //prints each new list file + for (i=0; igetNumGroups(); i++) { + groupOfLists[groupMap->namesOfGroups[i]]->setLabel(label); + groupOfLists[groupMap->namesOfGroups[i]]->print(*(filehandles[groupMap->namesOfGroups[i]])); + groupOfLists[groupMap->namesOfGroups[i]]->clear(); + } + + cout << label << '\t' << count << endl; } + list = input->getSharedListVector(); + count++; } //set groupmap for .shared commands diff --git a/parsimonycommand.cpp b/parsimonycommand.cpp index 4632019..989ea12 100644 --- a/parsimonycommand.cpp +++ b/parsimonycommand.cpp @@ -15,10 +15,10 @@ ParsimonyCommand::ParsimonyCommand() { globaldata = GlobalData::getInstance(); //randomtree will tell us if user had their own treefile or if they just want the random distribution - convert(globaldata->getRandomTree(), randomtree); + randomtree = globaldata->getRandomTree(); //user has entered their own tree - if (randomtree == 0) { + if (randomtree == "") { T = globaldata->gTree; tmap = globaldata->gTreemap; parsFile = globaldata->getTreeFile() + ".parsimony"; @@ -29,8 +29,9 @@ ParsimonyCommand::ParsimonyCommand() { openOutputFile(distFile, outDist); }else { //user wants random distribution + savetmap = globaldata->gTreemap; getUserInput(); - parsFile = "rd_parsimony"; + parsFile = randomtree + ".rd_parsimony"; openOutputFile(parsFile, out); } @@ -59,7 +60,7 @@ int ParsimonyCommand::execute() { outDist.setf(ios::fixed, ios::floatfield); outDist.setf(ios::showpoint); outDist << "RandomTree#" << '\t' << "ParsScore" << endl; - if (randomtree == 0) { + if (randomtree == "") { //get pscores for users trees for (int i = 0; i < T.size(); i++) { cout << "Processing tree " << i+1 << endl; @@ -133,7 +134,7 @@ int ParsimonyCommand::execute() { //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print. for (it = validScores.begin(); it != validScores.end(); it++) { - if (randomtree == 0) { + if (randomtree == "") { it2 = uscoreFreq.find(it->first); //user data has that score if (it2 != uscoreFreq.end()) { uscoreFreq[it->first] /= T.size(); ucumul+= it2->second; } @@ -158,8 +159,11 @@ int ParsimonyCommand::execute() { printParsimonyFile(); printUSummaryFile(); - //reset randomTree parameter to 0 - globaldata->setRandomTree("0"); + //reset globaldata's treemap if you just did random distrib + if (randomtree != "") { globaldata->gTreemap = savetmap; } + + //reset randomTree parameter to "" + globaldata->setRandomTree(""); return 0; @@ -178,7 +182,7 @@ int ParsimonyCommand::execute() { void ParsimonyCommand::printParsimonyFile() { try { //column headers - if (randomtree == 0) { + if (randomtree == "") { out << "Score" << '\t' << "UserFreq" << '\t' << "UserCumul" << '\t' << "RandFreq" << '\t' << "RandCumul" << endl; }else { out << "Score" << '\t' << "RandFreq" << '\t' << "RandCumul" << endl; @@ -189,7 +193,7 @@ void ParsimonyCommand::printParsimonyFile() { //print each line for (it = validScores.begin(); it != validScores.end(); it++) { - if (randomtree == 0) { + if (randomtree == "") { out << setprecision(6) << it->first << '\t' << '\t' << uscoreFreq[it->first] << '\t' << uCumul[it->first] << '\t' << rscoreFreq[it->first] << '\t' << rCumul[it->first] << endl; }else{ out << setprecision(6) << it->first << '\t' << '\t' << rscoreFreq[it->first] << '\t' << rCumul[it->first] << endl; diff --git a/parsimonycommand.h b/parsimonycommand.h index 5429e77..e854294 100644 --- a/parsimonycommand.h +++ b/parsimonycommand.h @@ -33,10 +33,11 @@ class ParsimonyCommand : public Command { GlobalData* globaldata; vector T; //user trees Tree* randT; //random tree - TreeMap* tmap; + TreeMap* tmap; + TreeMap* savetmap; Parsimony* pars; - string parsFile, sumFile, distFile; - int iters, randomtree, numGroups; + string parsFile, sumFile, distFile, randomtree; + int iters, numGroups; vector numEachGroup; //vector containing the number of sequences in each group the users wants for random distrib. vector userTreeScores; //scores for users trees vector UScoreSig; //tree score signifigance when compared to random trees - percentage of random trees with that score or lower. diff --git a/sharedcommand.cpp b/sharedcommand.cpp index 0cc1e22..a797d54 100644 --- a/sharedcommand.cpp +++ b/sharedcommand.cpp @@ -46,9 +46,14 @@ int SharedCommand::execute(){ shared = new Shared(); int i = 0; while(SharedList != NULL){ - shared->getSharedVectors(i, SharedList); //fills sharedGroups with new info and updates sharedVector + + if(globaldata->allLines == 1 || globaldata->lines.count(i+1) == 1 || globaldata->labels.count(SharedList->getLabel()) == 1){ + + shared->getSharedVectors(i, SharedList); //fills sharedGroups with new info and updates sharedVector + printSharedData(); //prints info to the .shared file + } + SharedList = input->getSharedListVector(); //get new list vector to process - printSharedData(); //prints info to the .shared file i++; } return 0; diff --git a/validparameter.cpp b/validparameter.cpp index 1c6cd60..a82962f 100644 --- a/validparameter.cpp +++ b/validparameter.cpp @@ -40,7 +40,7 @@ ValidParameters::ValidParameters() { parameters["shared"] = "shared"; parameters["summary"] = "summary"; parameters["sharedsummary"] = "sharedsummary"; - parameters["randomtree"] = "randomtree"; + parameters["random"] = "random"; parameters["groups"] = "groups"; parameters["calc"] = "calc"; -- 2.39.2