From: westcott Date: Wed, 9 Dec 2009 19:56:36 +0000 (+0000) Subject: added sorted parameter to get.oturep, added error checking to chimera classes in... X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=commitdiff_plain;h=a5afca18544555fba2d9c3670ad1f8574916b0a0 added sorted parameter to get.oturep, added error checking to chimera classes in case unaligned sequences are used where aligned are expected and added probs parameter to classify.seqs to shut off bootstrapping with bayesian method --- diff --git a/Mothur.xcodeproj/project.pbxproj b/Mothur.xcodeproj/project.pbxproj index 1c2e31e..efe0beb 100644 --- a/Mothur.xcodeproj/project.pbxproj +++ b/Mothur.xcodeproj/project.pbxproj @@ -1368,6 +1368,7 @@ "-pedantic", "-wall", "-lreadline", + "-DUSE_READLINE", ); PREBINDING = NO; SDKROOT = "$(DEVELOPER_SDK_DIR)/MacOSX10.5.sdk"; diff --git a/bayesian.cpp b/bayesian.cpp index 01e4139..5272b2d 100644 --- a/bayesian.cpp +++ b/bayesian.cpp @@ -11,8 +11,8 @@ #include "kmer.hpp" /**************************************************************************************************/ -Bayesian::Bayesian(string tfile, string tempFile, string method, int ksize, int cutoff) : -Classify(tfile, tempFile, method, ksize, 0.0, 0.0, 0.0, 0.0), kmerSize(ksize), confidenceThreshold(cutoff) { +Bayesian::Bayesian(string tfile, string tempFile, string method, int ksize, int cutoff, bool p) : +Classify(tfile, tempFile, method, ksize, 0.0, 0.0, 0.0, 0.0), kmerSize(ksize), confidenceThreshold(cutoff), probs(p) { try { numKmers = database->getMaxKmer() + 1; @@ -99,7 +99,7 @@ Classify(tfile, tempFile, method, ksize, 0.0, 0.0, 0.0, 0.0), kmerSize(ksize), c /**************************************************************************************************/ string Bayesian::getTaxonomy(Sequence* seq) { try { - string tax; + string tax = ""; Kmer kmer(kmerSize); //get words contained in query @@ -116,8 +116,17 @@ string Bayesian::getTaxonomy(Sequence* seq) { int index = getMostProbableTaxonomy(queryKmers); //bootstrap - to set confidenceScore - int numToSelect = queryKmers.size() / 8; - tax = bootstrapResults(queryKmers, index, numToSelect); + if (probs) { + int numToSelect = queryKmers.size() / 8; + tax = bootstrapResults(queryKmers, index, numToSelect); + }else{ + TaxNode seqTax = phyloTree->get(index); + while (seqTax.level != 0) { //while you are not at the root + tax = seqTax.name + ";" + tax; + seqTax = phyloTree->get(seqTax.parent); + } + simpleTax = tax; + } return tax; } diff --git a/bayesian.h b/bayesian.h index 9b93c83..0020962 100644 --- a/bayesian.h +++ b/bayesian.h @@ -18,7 +18,7 @@ class Bayesian : public Classify { public: - Bayesian(string, string, string, int, int); + Bayesian(string, string, string, int, int, bool); ~Bayesian() {}; string getTaxonomy(Sequence*); @@ -36,6 +36,7 @@ private: vector genusNodes; //indexes in phyloTree where genus' are located int kmerSize, numKmers, confidenceThreshold; + bool probs; string bootstrapResults(vector, int, int); int getMostProbableTaxonomy(vector); diff --git a/bellerophon.cpp b/bellerophon.cpp index 9aa4f7a..a60a838 100644 --- a/bellerophon.cpp +++ b/bellerophon.cpp @@ -75,7 +75,7 @@ inline bool comparePref(Preference left, Preference right){ } //*************************************************************************************************************** -void Bellerophon::getChimeras() { +int Bellerophon::getChimeras() { try { //do soft filter @@ -94,6 +94,8 @@ void Bellerophon::getChimeras() { //read in sequences seqs = readSeqs(fastafile); + if (unaligned) { mothurOut("Your sequences need to be aligned when you use the bellerophon method."); mothurOutEndLine(); return 1; } + int numSeqs = seqs.size(); if (numSeqs == 0) { mothurOut("Error in reading you sequences."); mothurOutEndLine(); exit(1); } @@ -138,21 +140,22 @@ void Bellerophon::getChimeras() { vector left; vector right; for (int i = 0; i < seqs.size(); i++) { -//cout << "whole = " << seqs[i].getAligned() << endl; +//cout << "midpoint = " << midpoint << "\twindow = " << window << endl; +//cout << "whole = " << seqs[i]->getAligned().length() << endl; //save left side string seqLeft = seqs[i]->getAligned().substr(midpoint-window, window); Sequence tempLeft; tempLeft.setName(seqs[i]->getName()); tempLeft.setAligned(seqLeft); left.push_back(tempLeft); -//cout << "left = " << tempLeft.getAligned() << endl; +//cout << "left = " << tempLeft.getAligned().length() << endl; //save right side string seqRight = seqs[i]->getAligned().substr(midpoint, window); Sequence tempRight; tempRight.setName(seqs[i]->getName()); tempRight.setAligned(seqRight); right.push_back(tempRight); -//cout << "right = " << seqRight << endl; +//cout << "right = " << seqRight.length() << endl; } //adjust midpoint by increment @@ -220,6 +223,8 @@ void Bellerophon::getChimeras() { //sort Preferences highest to lowest sort(pref.begin(), pref.end(), comparePref); + + return 0; } catch(exception& e) { diff --git a/bellerophon.h b/bellerophon.h index b309545..85c5550 100644 --- a/bellerophon.h +++ b/bellerophon.h @@ -28,7 +28,7 @@ class Bellerophon : public Chimera { Bellerophon(string); ~Bellerophon() {}; - void getChimeras(); + int getChimeras(); void print(ostream&); void setCons(string){}; diff --git a/ccode.cpp b/ccode.cpp index 0a9214e..a86e0fd 100644 --- a/ccode.cpp +++ b/ccode.cpp @@ -128,7 +128,7 @@ void Ccode::print(ostream& out) { } //*************************************************************************************************************** -void Ccode::getChimeras() { +int Ccode::getChimeras() { try { //read in query sequences and subject sequences @@ -139,6 +139,8 @@ void Ccode::getChimeras() { int numSeqs = querySeqs.size(); + if (unaligned) { mothurOut("Your sequences need to be aligned when you use the bellerophon ccode."); mothurOutEndLine(); return 1; } + closest.resize(numSeqs); refCombo.resize(numSeqs, 0); @@ -297,7 +299,8 @@ void Ccode::getChimeras() { for (int i = 0; i < lines.size(); i++) { delete lines[i]; } delete distCalc; delete decalc; - + + return 0; } catch(exception& e) { errorOut(e, "Ccode", "getChimeras"); diff --git a/ccode.h b/ccode.h index de7c7c4..0ea2753 100644 --- a/ccode.h +++ b/ccode.h @@ -27,7 +27,7 @@ class Ccode : public Chimera { Ccode(string, string); ~Ccode(); - void getChimeras(); + int getChimeras(); void print(ostream&); void setCons(string c) {} diff --git a/chimera.cpp b/chimera.cpp index db153dd..4ae4991 100644 --- a/chimera.cpp +++ b/chimera.cpp @@ -90,12 +90,20 @@ vector Chimera::readSeqs(string file) { ifstream in; openInputFile(file, in); vector container; + int count = 0; + int length = 0; + unaligned = false; //read in seqs and store in vector while(!in.eof()){ Sequence* current = new Sequence(in); gobble(in); + if (count == 0) { length = current->getAligned().length(); count++; } //gets first seqs length + else if (length != current->getAligned().length()) { //seqs are unaligned + unaligned = true; + } + if (current->getName() != "") { container.push_back(current); } } diff --git a/chimera.h b/chimera.h index 33e514b..339f2c6 100644 --- a/chimera.h +++ b/chimera.h @@ -89,12 +89,12 @@ class Chimera { //pure functions - virtual void getChimeras() = 0; + virtual int getChimeras() = 0; virtual void print(ostream&) = 0; protected: - bool filter, correction, svg; + bool filter, correction, svg, unaligned; int processors, window, increment, numWanted, kmerSize, match, misMatch, minSim, parents, iters; float divR; string seqMask, quanfile, filterString, name; diff --git a/chimeracheckrdp.cpp b/chimeracheckrdp.cpp index dcd2ef3..1278513 100644 --- a/chimeracheckrdp.cpp +++ b/chimeracheckrdp.cpp @@ -74,7 +74,7 @@ void ChimeraCheckRDP::print(ostream& out) { } //*************************************************************************************************************** -void ChimeraCheckRDP::getChimeras() { +int ChimeraCheckRDP::getChimeras() { try { //read in query sequences and subject sequences @@ -135,7 +135,7 @@ void ChimeraCheckRDP::getChimeras() { //free memory for (int i = 0; i < lines.size(); i++) { delete lines[i]; } - + return 0; } catch(exception& e) { errorOut(e, "ChimeraCheckRDP", "getChimeras"); diff --git a/chimeracheckrdp.h b/chimeracheckrdp.h index fe46206..e885af5 100644 --- a/chimeracheckrdp.h +++ b/chimeracheckrdp.h @@ -28,7 +28,7 @@ class ChimeraCheckRDP : public Chimera { ChimeraCheckRDP(string, string); ~ChimeraCheckRDP(); - void getChimeras(); + int getChimeras(); void print(ostream&); void setCons(string){}; diff --git a/chimeraseqscommand.cpp b/chimeraseqscommand.cpp index bf0ffa6..3632e63 100644 --- a/chimeraseqscommand.cpp +++ b/chimeraseqscommand.cpp @@ -196,7 +196,7 @@ int ChimeraSeqsCommand::execute(){ else if (method == "pintail") { chimera = new Pintail(fastafile, templatefile); } else if (method == "ccode") { chimera = new Ccode(fastafile, templatefile); } else if (method == "chimeracheck") { chimera = new ChimeraCheckRDP(fastafile, templatefile); } - else if (method == "chimeraslayer") { chimera = new ChimeraSlayer(fastafile, templatefile); } + //else if (method == "chimeraslayer") { chimera = new ChimeraSlayer(fastafile, templatefile); } else { mothurOut("Not a valid method."); mothurOutEndLine(); return 0; } //set user options @@ -227,7 +227,10 @@ int ChimeraSeqsCommand::execute(){ //find chimeras - chimera->getChimeras(); + int error = chimera->getChimeras(); + + //there was a problem + if (error == 1) { return 0; } string outputFileName = getRootName(fastafile) + method + maskfile + ".chimeras"; ofstream out; diff --git a/chimeraslayer.cpp b/chimeraslayer.cpp index 0b7c221..9b85b5c 100644 --- a/chimeraslayer.cpp +++ b/chimeraslayer.cpp @@ -58,7 +58,7 @@ void ChimeraSlayer::print(ostream& out) { } //*************************************************************************************************************** -void ChimeraSlayer::getChimeras() { +int ChimeraSlayer::getChimeras() { try { //read in query sequences and subject sequences @@ -69,6 +69,8 @@ void ChimeraSlayer::getChimeras() { int numSeqs = querySeqs.size(); + if (unaligned) { mothurOut("Your sequences need to be aligned when you use the chimeraslayer method."); mothurOutEndLine(); return 1; } + chimeraResults.resize(numSeqs); chimeraFlags.resize(numSeqs, "no"); spotMap.resize(numSeqs); @@ -212,6 +214,8 @@ void ChimeraSlayer::getChimeras() { if (seqMask != "") { delete decalc; } + + return 0; } catch(exception& e) { errorOut(e, "ChimeraSlayer", "getChimeras"); diff --git a/chimeraslayer.h b/chimeraslayer.h index 379209e..5343ec2 100644 --- a/chimeraslayer.h +++ b/chimeraslayer.h @@ -26,7 +26,7 @@ class ChimeraSlayer : public Chimera { ChimeraSlayer(string, string); ~ChimeraSlayer(); - void getChimeras(); + int getChimeras(); void print(ostream&); void setCons(string){}; diff --git a/classifyseqscommand.cpp b/classifyseqscommand.cpp index 2b76e50..eeef1cc 100644 --- a/classifyseqscommand.cpp +++ b/classifyseqscommand.cpp @@ -25,7 +25,7 @@ ClassifySeqsCommand::ClassifySeqsCommand(string option){ else { //valid paramters for this command - string AlignArray[] = {"template","fasta","search","ksize","method","processors","taxonomy","match","mismatch","gapopen","gapextend","numwanted","cutoff"}; + string AlignArray[] = {"template","fasta","search","ksize","method","processors","taxonomy","match","mismatch","gapopen","gapextend","numwanted","cutoff","probs"}; vector myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string))); OptionParser parser(option); @@ -94,6 +94,9 @@ ClassifySeqsCommand::ClassifySeqsCommand(string option){ temp = validParameter.validFile(parameters, "cutoff", false); if (temp == "not found"){ temp = "0"; } convert(temp, cutoff); + + temp = validParameter.validFile(parameters, "probs", false); if (temp == "not found"){ temp = "true"; } + probs = isTrue(temp); if ((method == "bayesian") && (search != "kmer")) { @@ -123,7 +126,7 @@ ClassifySeqsCommand::~ClassifySeqsCommand(){ void ClassifySeqsCommand::help(){ try { mothurOut("The classify.seqs command reads a fasta file containing sequences and creates a .taxonomy file and a .tax.summary file.\n"); - mothurOut("The classify.seqs command parameters are template, fasta, search, ksize, method, taxonomy, processors, match, mismatch, gapopen, gapextend and numwanted.\n"); + mothurOut("The classify.seqs command parameters are template, fasta, search, ksize, method, taxonomy, processors, match, mismatch, gapopen, gapextend, numwanted and probs.\n"); mothurOut("The template, fasta and taxonomy parameters are required.\n"); mothurOut("The search parameter allows you to specify the method to find most similar template. Your options are: suffix, kmer and blast. The default is kmer.\n"); mothurOut("The method parameter allows you to specify classification method to use. Your options are: bayesian and knn. The default is bayesian.\n"); @@ -135,6 +138,7 @@ void ClassifySeqsCommand::help(){ mothurOut("The gapextend parameter allows you to specify the penalty for extending a gap in an alignment. The default is -2.0.\n"); mothurOut("The numwanted parameter allows you to specify the number of sequence matches you want with the knn method. The default is 10.\n"); mothurOut("The cutoff parameter allows you to specify a bootstrap confidence threshold for your taxonomy. The default is 0.\n"); + mothurOut("The probs parameter shut off the bootstrapping results for the bayesian method. The default is true, meaning you want the bootstrapping to be run.\n"); mothurOut("The classify.seqs command should be in the following format: \n"); mothurOut("classify.seqs(template=yourTemplateFile, fasta=yourFastaFile, method=yourClassificationMethod, search=yourSearchmethod, ksize=yourKmerSize, taxonomy=yourTaxonomyFile, processors=yourProcessors) \n"); mothurOut("Example classify.seqs(fasta=amazon.fasta, template=core.filtered, method=knn, search=gotoh, ksize=8, processors=2)\n"); @@ -155,14 +159,12 @@ int ClassifySeqsCommand::execute(){ try { if (abort == true) { return 0; } - - - if(method == "bayesian") { classify = new Bayesian(taxonomyFileName, templateFileName, search, kmerSize, cutoff); } + if(method == "bayesian") { classify = new Bayesian(taxonomyFileName, templateFileName, search, kmerSize, cutoff, probs); } else if(method == "knn") { classify = new Knn(taxonomyFileName, templateFileName, search, kmerSize, gapOpen, gapExtend, match, misMatch, numWanted); } else { mothurOut(search + " is not a valid method option. I will run the command using bayesian."); mothurOutEndLine(); - classify = new Bayesian(taxonomyFileName, templateFileName, search, kmerSize, cutoff); + classify = new Bayesian(taxonomyFileName, templateFileName, search, kmerSize, cutoff, probs); } int numFastaSeqs = 0; diff --git a/classifyseqscommand.h b/classifyseqscommand.h index ed2819b..993968b 100644 --- a/classifyseqscommand.h +++ b/classifyseqscommand.h @@ -47,7 +47,7 @@ private: string fastaFileName, templateFileName, distanceFileName, search, method, taxonomyFileName; int processors, kmerSize, numWanted, cutoff; float match, misMatch, gapOpen, gapExtend; - bool abort; + bool abort, probs; int driver(linePair*, string, string); void appendTaxFiles(string, string); diff --git a/commandfactory.cpp b/commandfactory.cpp index a385863..c33de86 100644 --- a/commandfactory.cpp +++ b/commandfactory.cpp @@ -61,10 +61,20 @@ #include "classifyseqscommand.h" #include "phylotypecommand.h" +/*******************************************************/ + +/******************************************************/ +CommandFactory* CommandFactory::getInstance() { + if( _uniqueInstance == 0) { + _uniqueInstance = new CommandFactory(); + } + return _uniqueInstance; +} /***********************************************************/ /***********************************************************/ CommandFactory::CommandFactory(){ + _uniqueInstance = 0; string s = ""; command = new NoCommand(s); diff --git a/commandfactory.hpp b/commandfactory.hpp index 55f70ea..70e2630 100644 --- a/commandfactory.hpp +++ b/commandfactory.hpp @@ -16,8 +16,7 @@ class Command; class CommandFactory { public: - CommandFactory(); - ~CommandFactory(); + static CommandFactory* getInstance(); Command* getCommand(string, string); Command* getCommand(); bool isValidCommand(string); @@ -27,8 +26,12 @@ private: Command* command; map commands; map::iterator it; - + static CommandFactory* _uniqueInstance; + CommandFactory( const CommandFactory& ); // Disable copy constructor + void operator=( const CommandFactory& ); // Disable assignment operator + CommandFactory(); + ~CommandFactory(); }; diff --git a/engine.cpp b/engine.cpp index 8112360..c6ade38 100644 --- a/engine.cpp +++ b/engine.cpp @@ -14,6 +14,26 @@ #include "engine.hpp" +/***********************************************************************/ +inline void terminateCommand(int dummy) { + + //mothurOut("Stopping command...."); + //CommandFactory* cFactory = CommandFactory::getInstance(); + //cFactory->getCommand(); //deletes old command and makes new no command. + //this may cause memory leak if old commands execute function allocated memory + //that is freed in the execute function and not the deconstructor + //mothurOut("DONE."); mothurOutEndLine(); +} +/***********************************************************************/ +Engine::Engine(){ + try { + cFactory = CommandFactory::getInstance(); + } + catch(exception& e) { + errorOut(e, "Engine", "Engine"); + exit(1); + } +} /***********************************************************************/ @@ -38,14 +58,11 @@ bool InteractEngine::getInput(){ int quitCommandCalled = 0; while(quitCommandCalled != 1){ - + mothurOutEndLine(); input = getCommand(); - mothurOutJustToLog("mothur > " + input); - mothurOutEndLine(); - //allow user to omit the () on the quit command if (input == "quit") { input = "quit()"; } @@ -75,12 +92,28 @@ bool InteractEngine::getInput(){ /***********************************************************************/ string Engine::getCommand() { try { - char* nextCommand = NULL; + #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + #ifdef USE_READLINE + char* nextCommand = NULL; + nextCommand = readline("mothur > "); + if(nextCommand != NULL) { add_history(nextCommand); } + mothurOutJustToLog("mothur > " + toString(nextCommand)); + return nextCommand; + #else + string nextCommand = ""; + mothurOut("mothur > "); + getline(cin, nextCommand); + return nextCommand; + #endif + #else + string nextCommand = ""; + mothurOut("mothur > "); + getline(cin, nextCommand); + return nextCommand; + #endif - nextCommand = readline("mothur > "); - if(nextCommand != NULL) { add_history(nextCommand); } + mothurOutEndLine(); - return nextCommand; } catch(exception& e) { errorOut(e, "Engine", "getCommand"); @@ -88,17 +121,6 @@ string Engine::getCommand() { } } /***********************************************************************/ -void Engine::terminateCommand(int dummy) { - try { - mothurOut("Stopping command."); mothurOutEndLine(); - cFactory->getCommand(); //terminates command - } - catch(exception& e) { - errorOut(e, "Engine", "terminateCommand"); - exit(1); - } -} -/***********************************************************************/ //This function opens the batchfile to be used by BatchEngine::getInput. BatchEngine::BatchEngine(string path, string batchFileName){ try { diff --git a/engine.hpp b/engine.hpp index 3aabf13..df4e08d 100644 --- a/engine.hpp +++ b/engine.hpp @@ -22,12 +22,12 @@ class GlobalData; class Engine { public: - Engine() { cFactory = new CommandFactory(); } - virtual ~Engine(){ delete cFactory; } + Engine(); + virtual ~Engine(){} virtual bool getInput() = 0; virtual string getCommand(); vector getOptions() { return options; } - virtual void terminateCommand(int); + //virtual void terminateCommand(int); protected: vector options; CommandFactory* cFactory; diff --git a/getoturepcommand.cpp b/getoturepcommand.cpp index e9bed00..ed26cbe 100644 --- a/getoturepcommand.cpp +++ b/getoturepcommand.cpp @@ -9,6 +9,26 @@ #include "getoturepcommand.h" +//******************************************************************************************************************** +//sorts lowest to highest +inline bool compareName(repStruct left, repStruct right){ + return (left.name < right.name); +} +//******************************************************************************************************************** +//sorts lowest to highest +inline bool compareBin(repStruct left, repStruct right){ + return (left.bin < right.bin); +} +//******************************************************************************************************************** +//sorts lowest to highest +inline bool compareSize(repStruct left, repStruct right){ + return (left.size < right.size); +} +//******************************************************************************************************************** +//sorts lowest to highest +inline bool compareGroup(repStruct left, repStruct right){ + return (left.group < right.group); +} //********************************************************************************************************************** GetOTURepCommand::GetOTURepCommand(string option){ try{ @@ -22,7 +42,7 @@ GetOTURepCommand::GetOTURepCommand(string option){ help(); abort = true; } else { //valid paramters for this command - string Array[] = {"fasta","list","label","name", "group"}; + string Array[] = {"fasta","list","label","name", "group", "sorted"}; vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); OptionParser parser(option); @@ -70,14 +90,25 @@ GetOTURepCommand::GetOTURepCommand(string option){ else if (namesfile == "not found") { namesfile = ""; } groupfile = validParameter.validFile(parameters, "group", true); - if (groupfile == "not open") { abort = true; } + if (groupfile == "not open") { groupfile = ""; abort = true; } else if (groupfile == "not found") { groupfile = ""; } else { //read in group map info. groupMap = new GroupMap(groupfile); groupMap->readMap(); } - + + sorted = validParameter.validFile(parameters, "sorted", false); if (sorted == "not found"){ sorted = ""; } + if ((sorted != "") && (sorted != "name") && (sorted != "bin") && (sorted != "size") && (sorted != "group")) { + mothurOut(sorted + " is not a valid option for the sorted parameter. The only options are: name, bin, size and group. I will not sort."); mothurOutEndLine(); + sorted = ""; + } + + if ((sorted == "group") && (groupfile == "")) { + mothurOut("You must provide a groupfile to sort by group. I will not sort."); mothurOutEndLine(); + sorted = ""; + } + if (abort == false) { if(globaldata->gSparseMatrix != NULL) { @@ -125,11 +156,12 @@ GetOTURepCommand::GetOTURepCommand(string option){ void GetOTURepCommand::help(){ try { mothurOut("The get.oturep command can only be executed after a successful read.dist command.\n"); - mothurOut("The get.oturep command parameters are list, fasta, name, group and label. The fasta and list parameters are required.\n"); + mothurOut("The get.oturep command parameters are list, fasta, name, group, sorted and label. The fasta and list parameters are required.\n"); mothurOut("The label parameter allows you to select what distance levels you would like a output files created for, and is separated by dashes.\n"); mothurOut("The get.oturep command should be in the following format: get.oturep(fasta=yourFastaFile, list=yourListFile, name=yourNamesFile, group=yourGroupFile, label=yourLabels).\n"); mothurOut("Example get.oturep(fasta=amazon.fasta, list=amazon.fn.list, group=amazon.groups, name=amazon.names).\n"); mothurOut("The default value for label is all labels in your inputfile.\n"); + mothurOut("The sorted parameter allows you to indicate you want the output sorted. You can sort by sequence name, bin number, bin size or group. The default is no sorting, but your options are name, number, size, or group.\n"); mothurOut("The get.oturep command outputs a .fastarep file for each distance you specify, selecting one OTU representative for each bin.\n"); mothurOut("If you provide a groupfile, then it also appends the names of the groups present in that bin.\n"); mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n\n"); @@ -319,7 +351,7 @@ string GetOTURepCommand::findRep(int bin, string& group, ListVector* thisList, i } //rip off last dash group = group.substr(0, group.length()-1); - } + }else{ group = ""; } // if only 1 sequence in bin or processing the "unique" label, then // the first sequence of the OTU is the representative one @@ -376,6 +408,7 @@ int GetOTURepCommand::process(ListVector* processList) { //create output file string outputFileName = getRootName(listfile) + processList->getLabel() + ".rep.fasta"; openOutputFile(outputFileName, out); + vector reps; //for each bin in the list vector for (int i = 0; i < processList->size(); i++) { @@ -387,19 +420,43 @@ int GetOTURepCommand::process(ListVector* processList) { sequence = fasta->getSequence(nameRep); if (sequence != "not found") { - nameRep = nameRep + "|" + toString(i+1); - nameRep = nameRep + "|" + toString(binsize); - if (groupfile != "") { - nameRep = nameRep + "|" + groups; + if (sorted == "") { //print them out + nameRep = nameRep + "|" + toString(i+1); + nameRep = nameRep + "|" + toString(binsize); + if (groupfile != "") { + nameRep = nameRep + "|" + groups; + } + out << ">" << nameRep << endl; + out << sequence << endl; + }else { //save them + repStruct newRep(nameRep, i+1, binsize, groups); + reps.push_back(newRep); } - out << ">" << nameRep << endl; - out << sequence << endl; - } else { + }else { mothurOut(nameRep + " is missing from your fasta or name file. Please correct. "); mothurOutEndLine(); remove(outputFileName.c_str()); return 1; } } + + if (sorted != "") { //then sort them and print them + if (sorted == "name") { sort(reps.begin(), reps.end(), compareName); } + else if (sorted == "bin") { sort(reps.begin(), reps.end(), compareBin); } + else if (sorted == "size") { sort(reps.begin(), reps.end(), compareSize); } + else if (sorted == "group") { sort(reps.begin(), reps.end(), compareGroup); } + + //print them + for (int i = 0; i < reps.size(); i++) { + string sequence = fasta->getSequence(reps[i].name); + string outputName = reps[i].name + "|" + toString(reps[i].bin); + outputName = outputName + "|" + toString(reps[i].size); + if (groupfile != "") { + outputName = outputName + "|" + reps[i].group; + } + out << ">" << outputName << endl; + out << sequence << endl; + } + } out.close(); return 0; diff --git a/getoturepcommand.h b/getoturepcommand.h index a48cbf1..d92df1a 100644 --- a/getoturepcommand.h +++ b/getoturepcommand.h @@ -23,6 +23,17 @@ typedef list::iterator MatData; typedef map SeqMap; +struct repStruct { + string name; + int bin; + int size; + string group; + + repStruct(){} + repStruct(string n, int b, int s, string g) : name(n), bin(b), size(s), group(g) {} + ~repStruct() {} +}; + class GetOTURepCommand : public Command { public: @@ -39,7 +50,7 @@ private: InputData* input; FastaMap* fasta; GroupMap* groupMap; - string filename, fastafile, listfile, namesfile, groupfile, label; + string filename, fastafile, listfile, namesfile, groupfile, label, sorted; ofstream out; ifstream in, inNames; bool groupError; diff --git a/helpcommand.cpp b/helpcommand.cpp index fc2f291..e0ef2a3 100644 --- a/helpcommand.cpp +++ b/helpcommand.cpp @@ -16,7 +16,7 @@ HelpCommand::HelpCommand(string option){ if (option != "") { mothurOut("There are no valid parameters for the help() command."); mothurOutEndLine(); } - validCommands = new CommandFactory(); + validCommands = CommandFactory::getInstance(); } //********************************************************************************************************************** @@ -30,8 +30,6 @@ int HelpCommand::execute(){ validCommands->printCommands(cout); mothurOut("For more information about a specific command type 'commandName(help)' i.e. 'read.dist(help)'"); mothurOutEndLine(); - delete validCommands; - mothurOutEndLine(); mothurOut("For further assistance please refer to the Mothur manual on our wiki at http://www.mothur.org/wiki, or contact Pat Schloss at mothur.bugs@gmail.com.\n"); return 0; } diff --git a/mothur.cpp b/mothur.cpp index 5941663..3bdbfcd 100644 --- a/mothur.cpp +++ b/mothur.cpp @@ -14,6 +14,7 @@ /**************************************************************************************************/ GlobalData* GlobalData::_uniqueInstance = 0; +CommandFactory* CommandFactory::_uniqueInstance = 0; int main(int argc, char *argv[]){ try { diff --git a/mothur.h b/mothur.h index 78ccf11..f839328 100644 --- a/mothur.h +++ b/mothur.h @@ -45,14 +45,20 @@ #include #include #include -#include -#include /***********************************************************************/ #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) #include #include + + #ifdef USE_READLINE + #include + #include + #endif + + //#include + //#include #else #include //allows unbuffered screen capture from stdin #endif @@ -195,7 +201,6 @@ inline void gobble(istream& f){ f.putback(d); } - /***********************************************************************/ inline string getline(ifstream& fileHandle) { @@ -496,7 +501,6 @@ inline bool inVector(string member, vector group){ return false; } - /***********************************************************************/ //This function parses the estimator options and puts them in a vector diff --git a/nocommands.cpp b/nocommands.cpp index 5cf5a56..b519d77 100644 --- a/nocommands.cpp +++ b/nocommands.cpp @@ -23,9 +23,8 @@ int NoCommand::execute(){ //Could choose to give more help here?fdsah mothurOut("Invalid command.\n"); - CommandFactory* valid = new CommandFactory(); + CommandFactory* valid = CommandFactory::getInstance(); valid->printCommands(cout); - delete valid; return 0; } diff --git a/pintail.cpp b/pintail.cpp index e92e19f..2698fe4 100644 --- a/pintail.cpp +++ b/pintail.cpp @@ -74,7 +74,7 @@ void Pintail::print(ostream& out) { } //*************************************************************************************************************** -void Pintail::getChimeras() { +int Pintail::getChimeras() { try { //read in query sequences and subject sequences @@ -85,6 +85,8 @@ void Pintail::getChimeras() { int numSeqs = querySeqs.size(); + if (unaligned) { mothurOut("Your sequences need to be aligned when you use the pintail method."); mothurOutEndLine(); return 1; } + obsDistance.resize(numSeqs); expectedDistance.resize(numSeqs); seqCoef.resize(numSeqs); @@ -382,6 +384,8 @@ void Pintail::getChimeras() { delete distcalculator; delete decalc; + + return 0; } catch(exception& e) { errorOut(e, "Pintail", "getChimeras"); diff --git a/pintail.h b/pintail.h index aada728..516c682 100644 --- a/pintail.h +++ b/pintail.h @@ -27,7 +27,7 @@ class Pintail : public Chimera { Pintail(string, string); ~Pintail(); - void getChimeras(); + int getChimeras(); void print(ostream&); void setCons(string c) { consfile = c; }