"-pedantic",
"-wall",
"-lreadline",
+ "-DUSE_READLINE",
);
PREBINDING = NO;
SDKROOT = "$(DEVELOPER_SDK_DIR)/MacOSX10.5.sdk";
#include "kmer.hpp"
/**************************************************************************************************/
-Bayesian::Bayesian(string tfile, string tempFile, string method, int ksize, int cutoff) :
-Classify(tfile, tempFile, method, ksize, 0.0, 0.0, 0.0, 0.0), kmerSize(ksize), confidenceThreshold(cutoff) {
+Bayesian::Bayesian(string tfile, string tempFile, string method, int ksize, int cutoff, bool p) :
+Classify(tfile, tempFile, method, ksize, 0.0, 0.0, 0.0, 0.0), kmerSize(ksize), confidenceThreshold(cutoff), probs(p) {
try {
numKmers = database->getMaxKmer() + 1;
/**************************************************************************************************/
string Bayesian::getTaxonomy(Sequence* seq) {
try {
- string tax;
+ string tax = "";
Kmer kmer(kmerSize);
//get words contained in query
int index = getMostProbableTaxonomy(queryKmers);
//bootstrap - to set confidenceScore
- int numToSelect = queryKmers.size() / 8;
- tax = bootstrapResults(queryKmers, index, numToSelect);
+ if (probs) {
+ int numToSelect = queryKmers.size() / 8;
+ tax = bootstrapResults(queryKmers, index, numToSelect);
+ }else{
+ TaxNode seqTax = phyloTree->get(index);
+ while (seqTax.level != 0) { //while you are not at the root
+ tax = seqTax.name + ";" + tax;
+ seqTax = phyloTree->get(seqTax.parent);
+ }
+ simpleTax = tax;
+ }
return tax;
}
class Bayesian : public Classify {
public:
- Bayesian(string, string, string, int, int);
+ Bayesian(string, string, string, int, int, bool);
~Bayesian() {};
string getTaxonomy(Sequence*);
vector<int> genusNodes; //indexes in phyloTree where genus' are located
int kmerSize, numKmers, confidenceThreshold;
+ bool probs;
string bootstrapResults(vector<int>, int, int);
int getMostProbableTaxonomy(vector<int>);
}
//***************************************************************************************************************
-void Bellerophon::getChimeras() {
+int Bellerophon::getChimeras() {
try {
//do soft filter
//read in sequences
seqs = readSeqs(fastafile);
+ if (unaligned) { mothurOut("Your sequences need to be aligned when you use the bellerophon method."); mothurOutEndLine(); return 1; }
+
int numSeqs = seqs.size();
if (numSeqs == 0) { mothurOut("Error in reading you sequences."); mothurOutEndLine(); exit(1); }
vector<Sequence> left; vector<Sequence> right;
for (int i = 0; i < seqs.size(); i++) {
-//cout << "whole = " << seqs[i].getAligned() << endl;
+//cout << "midpoint = " << midpoint << "\twindow = " << window << endl;
+//cout << "whole = " << seqs[i]->getAligned().length() << endl;
//save left side
string seqLeft = seqs[i]->getAligned().substr(midpoint-window, window);
Sequence tempLeft;
tempLeft.setName(seqs[i]->getName());
tempLeft.setAligned(seqLeft);
left.push_back(tempLeft);
-//cout << "left = " << tempLeft.getAligned() << endl;
+//cout << "left = " << tempLeft.getAligned().length() << endl;
//save right side
string seqRight = seqs[i]->getAligned().substr(midpoint, window);
Sequence tempRight;
tempRight.setName(seqs[i]->getName());
tempRight.setAligned(seqRight);
right.push_back(tempRight);
-//cout << "right = " << seqRight << endl;
+//cout << "right = " << seqRight.length() << endl;
}
//adjust midpoint by increment
//sort Preferences highest to lowest
sort(pref.begin(), pref.end(), comparePref);
+
+ return 0;
}
catch(exception& e) {
Bellerophon(string);
~Bellerophon() {};
- void getChimeras();
+ int getChimeras();
void print(ostream&);
void setCons(string){};
}
//***************************************************************************************************************
-void Ccode::getChimeras() {
+int Ccode::getChimeras() {
try {
//read in query sequences and subject sequences
int numSeqs = querySeqs.size();
+ if (unaligned) { mothurOut("Your sequences need to be aligned when you use the bellerophon ccode."); mothurOutEndLine(); return 1; }
+
closest.resize(numSeqs);
refCombo.resize(numSeqs, 0);
for (int i = 0; i < lines.size(); i++) { delete lines[i]; }
delete distCalc;
delete decalc;
-
+
+ return 0;
}
catch(exception& e) {
errorOut(e, "Ccode", "getChimeras");
Ccode(string, string);
~Ccode();
- void getChimeras();
+ int getChimeras();
void print(ostream&);
void setCons(string c) {}
ifstream in;
openInputFile(file, in);
vector<Sequence*> container;
+ int count = 0;
+ int length = 0;
+ unaligned = false;
//read in seqs and store in vector
while(!in.eof()){
Sequence* current = new Sequence(in); gobble(in);
+ if (count == 0) { length = current->getAligned().length(); count++; } //gets first seqs length
+ else if (length != current->getAligned().length()) { //seqs are unaligned
+ unaligned = true;
+ }
+
if (current->getName() != "") { container.push_back(current); }
}
//pure functions
- virtual void getChimeras() = 0;
+ virtual int getChimeras() = 0;
virtual void print(ostream&) = 0;
protected:
- bool filter, correction, svg;
+ bool filter, correction, svg, unaligned;
int processors, window, increment, numWanted, kmerSize, match, misMatch, minSim, parents, iters;
float divR;
string seqMask, quanfile, filterString, name;
}
//***************************************************************************************************************
-void ChimeraCheckRDP::getChimeras() {
+int ChimeraCheckRDP::getChimeras() {
try {
//read in query sequences and subject sequences
//free memory
for (int i = 0; i < lines.size(); i++) { delete lines[i]; }
-
+ return 0;
}
catch(exception& e) {
errorOut(e, "ChimeraCheckRDP", "getChimeras");
ChimeraCheckRDP(string, string);
~ChimeraCheckRDP();
- void getChimeras();
+ int getChimeras();
void print(ostream&);
void setCons(string){};
else if (method == "pintail") { chimera = new Pintail(fastafile, templatefile); }
else if (method == "ccode") { chimera = new Ccode(fastafile, templatefile); }
else if (method == "chimeracheck") { chimera = new ChimeraCheckRDP(fastafile, templatefile); }
- else if (method == "chimeraslayer") { chimera = new ChimeraSlayer(fastafile, templatefile); }
+ //else if (method == "chimeraslayer") { chimera = new ChimeraSlayer(fastafile, templatefile); }
else { mothurOut("Not a valid method."); mothurOutEndLine(); return 0; }
//set user options
//find chimeras
- chimera->getChimeras();
+ int error = chimera->getChimeras();
+
+ //there was a problem
+ if (error == 1) { return 0; }
string outputFileName = getRootName(fastafile) + method + maskfile + ".chimeras";
ofstream out;
}
//***************************************************************************************************************
-void ChimeraSlayer::getChimeras() {
+int ChimeraSlayer::getChimeras() {
try {
//read in query sequences and subject sequences
int numSeqs = querySeqs.size();
+ if (unaligned) { mothurOut("Your sequences need to be aligned when you use the chimeraslayer method."); mothurOutEndLine(); return 1; }
+
chimeraResults.resize(numSeqs);
chimeraFlags.resize(numSeqs, "no");
spotMap.resize(numSeqs);
if (seqMask != "") {
delete decalc;
}
+
+ return 0;
}
catch(exception& e) {
errorOut(e, "ChimeraSlayer", "getChimeras");
ChimeraSlayer(string, string);
~ChimeraSlayer();
- void getChimeras();
+ int getChimeras();
void print(ostream&);
void setCons(string){};
else {
//valid paramters for this command
- string AlignArray[] = {"template","fasta","search","ksize","method","processors","taxonomy","match","mismatch","gapopen","gapextend","numwanted","cutoff"};
+ string AlignArray[] = {"template","fasta","search","ksize","method","processors","taxonomy","match","mismatch","gapopen","gapextend","numwanted","cutoff","probs"};
vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
OptionParser parser(option);
temp = validParameter.validFile(parameters, "cutoff", false); if (temp == "not found"){ temp = "0"; }
convert(temp, cutoff);
+
+ temp = validParameter.validFile(parameters, "probs", false); if (temp == "not found"){ temp = "true"; }
+ probs = isTrue(temp);
if ((method == "bayesian") && (search != "kmer")) {
void ClassifySeqsCommand::help(){
try {
mothurOut("The classify.seqs command reads a fasta file containing sequences and creates a .taxonomy file and a .tax.summary file.\n");
- mothurOut("The classify.seqs command parameters are template, fasta, search, ksize, method, taxonomy, processors, match, mismatch, gapopen, gapextend and numwanted.\n");
+ mothurOut("The classify.seqs command parameters are template, fasta, search, ksize, method, taxonomy, processors, match, mismatch, gapopen, gapextend, numwanted and probs.\n");
mothurOut("The template, fasta and taxonomy parameters are required.\n");
mothurOut("The search parameter allows you to specify the method to find most similar template. Your options are: suffix, kmer and blast. The default is kmer.\n");
mothurOut("The method parameter allows you to specify classification method to use. Your options are: bayesian and knn. The default is bayesian.\n");
mothurOut("The gapextend parameter allows you to specify the penalty for extending a gap in an alignment. The default is -2.0.\n");
mothurOut("The numwanted parameter allows you to specify the number of sequence matches you want with the knn method. The default is 10.\n");
mothurOut("The cutoff parameter allows you to specify a bootstrap confidence threshold for your taxonomy. The default is 0.\n");
+ mothurOut("The probs parameter shut off the bootstrapping results for the bayesian method. The default is true, meaning you want the bootstrapping to be run.\n");
mothurOut("The classify.seqs command should be in the following format: \n");
mothurOut("classify.seqs(template=yourTemplateFile, fasta=yourFastaFile, method=yourClassificationMethod, search=yourSearchmethod, ksize=yourKmerSize, taxonomy=yourTaxonomyFile, processors=yourProcessors) \n");
mothurOut("Example classify.seqs(fasta=amazon.fasta, template=core.filtered, method=knn, search=gotoh, ksize=8, processors=2)\n");
try {
if (abort == true) { return 0; }
-
-
- if(method == "bayesian") { classify = new Bayesian(taxonomyFileName, templateFileName, search, kmerSize, cutoff); }
+ if(method == "bayesian") { classify = new Bayesian(taxonomyFileName, templateFileName, search, kmerSize, cutoff, probs); }
else if(method == "knn") { classify = new Knn(taxonomyFileName, templateFileName, search, kmerSize, gapOpen, gapExtend, match, misMatch, numWanted); }
else {
mothurOut(search + " is not a valid method option. I will run the command using bayesian.");
mothurOutEndLine();
- classify = new Bayesian(taxonomyFileName, templateFileName, search, kmerSize, cutoff);
+ classify = new Bayesian(taxonomyFileName, templateFileName, search, kmerSize, cutoff, probs);
}
int numFastaSeqs = 0;
string fastaFileName, templateFileName, distanceFileName, search, method, taxonomyFileName;
int processors, kmerSize, numWanted, cutoff;
float match, misMatch, gapOpen, gapExtend;
- bool abort;
+ bool abort, probs;
int driver(linePair*, string, string);
void appendTaxFiles(string, string);
#include "classifyseqscommand.h"
#include "phylotypecommand.h"
+/*******************************************************/
+
+/******************************************************/
+CommandFactory* CommandFactory::getInstance() {
+ if( _uniqueInstance == 0) {
+ _uniqueInstance = new CommandFactory();
+ }
+ return _uniqueInstance;
+}
/***********************************************************/
/***********************************************************/
CommandFactory::CommandFactory(){
+ _uniqueInstance = 0;
string s = "";
command = new NoCommand(s);
class CommandFactory {
public:
- CommandFactory();
- ~CommandFactory();
+ static CommandFactory* getInstance();
Command* getCommand(string, string);
Command* getCommand();
bool isValidCommand(string);
Command* command;
map<string, string> commands;
map<string, string>::iterator it;
-
+ static CommandFactory* _uniqueInstance;
+ CommandFactory( const CommandFactory& ); // Disable copy constructor
+ void operator=( const CommandFactory& ); // Disable assignment operator
+ CommandFactory();
+ ~CommandFactory();
};
#include "engine.hpp"
+/***********************************************************************/
+inline void terminateCommand(int dummy) {
+
+ //mothurOut("Stopping command....");
+ //CommandFactory* cFactory = CommandFactory::getInstance();
+ //cFactory->getCommand(); //deletes old command and makes new no command.
+ //this may cause memory leak if old commands execute function allocated memory
+ //that is freed in the execute function and not the deconstructor
+ //mothurOut("DONE."); mothurOutEndLine();
+}
+/***********************************************************************/
+Engine::Engine(){
+ try {
+ cFactory = CommandFactory::getInstance();
+ }
+ catch(exception& e) {
+ errorOut(e, "Engine", "Engine");
+ exit(1);
+ }
+}
/***********************************************************************/
int quitCommandCalled = 0;
while(quitCommandCalled != 1){
-
+
mothurOutEndLine();
input = getCommand();
- mothurOutJustToLog("mothur > " + input);
- mothurOutEndLine();
-
//allow user to omit the () on the quit command
if (input == "quit") { input = "quit()"; }
/***********************************************************************/
string Engine::getCommand() {
try {
- char* nextCommand = NULL;
+ #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+ #ifdef USE_READLINE
+ char* nextCommand = NULL;
+ nextCommand = readline("mothur > ");
+ if(nextCommand != NULL) { add_history(nextCommand); }
+ mothurOutJustToLog("mothur > " + toString(nextCommand));
+ return nextCommand;
+ #else
+ string nextCommand = "";
+ mothurOut("mothur > ");
+ getline(cin, nextCommand);
+ return nextCommand;
+ #endif
+ #else
+ string nextCommand = "";
+ mothurOut("mothur > ");
+ getline(cin, nextCommand);
+ return nextCommand;
+ #endif
- nextCommand = readline("mothur > ");
- if(nextCommand != NULL) { add_history(nextCommand); }
+ mothurOutEndLine();
- return nextCommand;
}
catch(exception& e) {
errorOut(e, "Engine", "getCommand");
}
}
/***********************************************************************/
-void Engine::terminateCommand(int dummy) {
- try {
- mothurOut("Stopping command."); mothurOutEndLine();
- cFactory->getCommand(); //terminates command
- }
- catch(exception& e) {
- errorOut(e, "Engine", "terminateCommand");
- exit(1);
- }
-}
-/***********************************************************************/
//This function opens the batchfile to be used by BatchEngine::getInput.
BatchEngine::BatchEngine(string path, string batchFileName){
try {
class Engine {
public:
- Engine() { cFactory = new CommandFactory(); }
- virtual ~Engine(){ delete cFactory; }
+ Engine();
+ virtual ~Engine(){}
virtual bool getInput() = 0;
virtual string getCommand();
vector<string> getOptions() { return options; }
- virtual void terminateCommand(int);
+ //virtual void terminateCommand(int);
protected:
vector<string> options;
CommandFactory* cFactory;
#include "getoturepcommand.h"
+//********************************************************************************************************************
+//sorts lowest to highest
+inline bool compareName(repStruct left, repStruct right){
+ return (left.name < right.name);
+}
+//********************************************************************************************************************
+//sorts lowest to highest
+inline bool compareBin(repStruct left, repStruct right){
+ return (left.bin < right.bin);
+}
+//********************************************************************************************************************
+//sorts lowest to highest
+inline bool compareSize(repStruct left, repStruct right){
+ return (left.size < right.size);
+}
+//********************************************************************************************************************
+//sorts lowest to highest
+inline bool compareGroup(repStruct left, repStruct right){
+ return (left.group < right.group);
+}
//**********************************************************************************************************************
GetOTURepCommand::GetOTURepCommand(string option){
try{
help(); abort = true;
} else {
//valid paramters for this command
- string Array[] = {"fasta","list","label","name", "group"};
+ string Array[] = {"fasta","list","label","name", "group", "sorted"};
vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
OptionParser parser(option);
else if (namesfile == "not found") { namesfile = ""; }
groupfile = validParameter.validFile(parameters, "group", true);
- if (groupfile == "not open") { abort = true; }
+ if (groupfile == "not open") { groupfile = ""; abort = true; }
else if (groupfile == "not found") { groupfile = ""; }
else {
//read in group map info.
groupMap = new GroupMap(groupfile);
groupMap->readMap();
}
-
+
+ sorted = validParameter.validFile(parameters, "sorted", false); if (sorted == "not found"){ sorted = ""; }
+ if ((sorted != "") && (sorted != "name") && (sorted != "bin") && (sorted != "size") && (sorted != "group")) {
+ mothurOut(sorted + " is not a valid option for the sorted parameter. The only options are: name, bin, size and group. I will not sort."); mothurOutEndLine();
+ sorted = "";
+ }
+
+ if ((sorted == "group") && (groupfile == "")) {
+ mothurOut("You must provide a groupfile to sort by group. I will not sort."); mothurOutEndLine();
+ sorted = "";
+ }
+
if (abort == false) {
if(globaldata->gSparseMatrix != NULL) {
void GetOTURepCommand::help(){
try {
mothurOut("The get.oturep command can only be executed after a successful read.dist command.\n");
- mothurOut("The get.oturep command parameters are list, fasta, name, group and label. The fasta and list parameters are required.\n");
+ mothurOut("The get.oturep command parameters are list, fasta, name, group, sorted and label. The fasta and list parameters are required.\n");
mothurOut("The label parameter allows you to select what distance levels you would like a output files created for, and is separated by dashes.\n");
mothurOut("The get.oturep command should be in the following format: get.oturep(fasta=yourFastaFile, list=yourListFile, name=yourNamesFile, group=yourGroupFile, label=yourLabels).\n");
mothurOut("Example get.oturep(fasta=amazon.fasta, list=amazon.fn.list, group=amazon.groups, name=amazon.names).\n");
mothurOut("The default value for label is all labels in your inputfile.\n");
+ mothurOut("The sorted parameter allows you to indicate you want the output sorted. You can sort by sequence name, bin number, bin size or group. The default is no sorting, but your options are name, number, size, or group.\n");
mothurOut("The get.oturep command outputs a .fastarep file for each distance you specify, selecting one OTU representative for each bin.\n");
mothurOut("If you provide a groupfile, then it also appends the names of the groups present in that bin.\n");
mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n\n");
}
//rip off last dash
group = group.substr(0, group.length()-1);
- }
+ }else{ group = ""; }
// if only 1 sequence in bin or processing the "unique" label, then
// the first sequence of the OTU is the representative one
//create output file
string outputFileName = getRootName(listfile) + processList->getLabel() + ".rep.fasta";
openOutputFile(outputFileName, out);
+ vector<repStruct> reps;
//for each bin in the list vector
for (int i = 0; i < processList->size(); i++) {
sequence = fasta->getSequence(nameRep);
if (sequence != "not found") {
- nameRep = nameRep + "|" + toString(i+1);
- nameRep = nameRep + "|" + toString(binsize);
- if (groupfile != "") {
- nameRep = nameRep + "|" + groups;
+ if (sorted == "") { //print them out
+ nameRep = nameRep + "|" + toString(i+1);
+ nameRep = nameRep + "|" + toString(binsize);
+ if (groupfile != "") {
+ nameRep = nameRep + "|" + groups;
+ }
+ out << ">" << nameRep << endl;
+ out << sequence << endl;
+ }else { //save them
+ repStruct newRep(nameRep, i+1, binsize, groups);
+ reps.push_back(newRep);
}
- out << ">" << nameRep << endl;
- out << sequence << endl;
- } else {
+ }else {
mothurOut(nameRep + " is missing from your fasta or name file. Please correct. "); mothurOutEndLine();
remove(outputFileName.c_str());
return 1;
}
}
+
+ if (sorted != "") { //then sort them and print them
+ if (sorted == "name") { sort(reps.begin(), reps.end(), compareName); }
+ else if (sorted == "bin") { sort(reps.begin(), reps.end(), compareBin); }
+ else if (sorted == "size") { sort(reps.begin(), reps.end(), compareSize); }
+ else if (sorted == "group") { sort(reps.begin(), reps.end(), compareGroup); }
+
+ //print them
+ for (int i = 0; i < reps.size(); i++) {
+ string sequence = fasta->getSequence(reps[i].name);
+ string outputName = reps[i].name + "|" + toString(reps[i].bin);
+ outputName = outputName + "|" + toString(reps[i].size);
+ if (groupfile != "") {
+ outputName = outputName + "|" + reps[i].group;
+ }
+ out << ">" << outputName << endl;
+ out << sequence << endl;
+ }
+ }
out.close();
return 0;
typedef list<PCell>::iterator MatData;
typedef map<int, float> SeqMap;
+struct repStruct {
+ string name;
+ int bin;
+ int size;
+ string group;
+
+ repStruct(){}
+ repStruct(string n, int b, int s, string g) : name(n), bin(b), size(s), group(g) {}
+ ~repStruct() {}
+};
+
class GetOTURepCommand : public Command {
public:
InputData* input;
FastaMap* fasta;
GroupMap* groupMap;
- string filename, fastafile, listfile, namesfile, groupfile, label;
+ string filename, fastafile, listfile, namesfile, groupfile, label, sorted;
ofstream out;
ifstream in, inNames;
bool groupError;
if (option != "") { mothurOut("There are no valid parameters for the help() command."); mothurOutEndLine(); }
- validCommands = new CommandFactory();
+ validCommands = CommandFactory::getInstance();
}
//**********************************************************************************************************************
validCommands->printCommands(cout);
mothurOut("For more information about a specific command type 'commandName(help)' i.e. 'read.dist(help)'"); mothurOutEndLine();
- delete validCommands;
-
mothurOutEndLine(); mothurOut("For further assistance please refer to the Mothur manual on our wiki at http://www.mothur.org/wiki, or contact Pat Schloss at mothur.bugs@gmail.com.\n");
return 0;
}
/**************************************************************************************************/
GlobalData* GlobalData::_uniqueInstance = 0;
+CommandFactory* CommandFactory::_uniqueInstance = 0;
int main(int argc, char *argv[]){
try {
#include <cerrno>
#include <ctime>
#include <limits>
-#include <readline/readline.h>
-#include <readline/history.h>
/***********************************************************************/
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
#include <sys/wait.h>
#include <unistd.h>
+
+ #ifdef USE_READLINE
+ #include <readline/readline.h>
+ #include <readline/history.h>
+ #endif
+
+ //#include <readline/readline.h>
+ //#include <readline/history.h>
#else
#include <conio.h> //allows unbuffered screen capture from stdin
#endif
f.putback(d);
}
-
/***********************************************************************/
inline string getline(ifstream& fileHandle) {
return false;
}
-
/***********************************************************************/
//This function parses the estimator options and puts them in a vector
//Could choose to give more help here?fdsah
mothurOut("Invalid command.\n");
- CommandFactory* valid = new CommandFactory();
+ CommandFactory* valid = CommandFactory::getInstance();
valid->printCommands(cout);
- delete valid;
return 0;
}
}
//***************************************************************************************************************
-void Pintail::getChimeras() {
+int Pintail::getChimeras() {
try {
//read in query sequences and subject sequences
int numSeqs = querySeqs.size();
+ if (unaligned) { mothurOut("Your sequences need to be aligned when you use the pintail method."); mothurOutEndLine(); return 1; }
+
obsDistance.resize(numSeqs);
expectedDistance.resize(numSeqs);
seqCoef.resize(numSeqs);
delete distcalculator;
delete decalc;
+
+ return 0;
}
catch(exception& e) {
errorOut(e, "Pintail", "getChimeras");
Pintail(string, string);
~Pintail();
- void getChimeras();
+ int getChimeras();
void print(ostream&);
void setCons(string c) { consfile = c; }