vector<string> setParameters();
string getCommandName() { return "classify.seqs"; }
string getCommandCategory() { return "Phylotype Analysis"; }
vector<string> setParameters();
string getCommandName() { return "classify.seqs"; }
string getCommandCategory() { return "Phylotype Analysis"; }
string getHelpString();
string getCitation() { return "Wang Q, Garrity GM, Tiedje JM, Cole JR (2007). Naive Bayesian classifier for rapid assignment of rRNA sequences into the new bacterial taxonomy. Appl Environ Microbiol 73: 5261-7. [ for Bayesian classifier ] \nAltschul SF, Madden TL, Schaffer AA, Zhang J, Zhang Z, Miller W, Lipman DJ (1997). Gapped BLAST and PSI-BLAST: a new generation of protein database search programs. Nucleic Acids Res 25: 3389-402. [ for BLAST ] \nDeSantis TZ, Hugenholtz P, Larsen N, Rojas M, Brodie EL, Keller K, Huber T, Dalevi D, Hu P, Andersen GL (2006). Greengenes, a chimera-checked 16S rRNA gene database and workbench compatible with ARB. Appl Environ Microbiol 72: 5069-72. [ for kmer ] \nhttp://www.mothur.org/wiki/Classify.seqs"; }
string getDescription() { return "classify sequences"; }
string getHelpString();
string getCitation() { return "Wang Q, Garrity GM, Tiedje JM, Cole JR (2007). Naive Bayesian classifier for rapid assignment of rRNA sequences into the new bacterial taxonomy. Appl Environ Microbiol 73: 5261-7. [ for Bayesian classifier ] \nAltschul SF, Madden TL, Schaffer AA, Zhang J, Zhang Z, Miller W, Lipman DJ (1997). Gapped BLAST and PSI-BLAST: a new generation of protein database search programs. Nucleic Acids Res 25: 3389-402. [ for BLAST ] \nDeSantis TZ, Hugenholtz P, Larsen N, Rojas M, Brodie EL, Keller K, Huber T, Dalevi D, Hu P, Andersen GL (2006). Greengenes, a chimera-checked 16S rRNA gene database and workbench compatible with ARB. Appl Environ Microbiol 72: 5069-72. [ for kmer ] \nhttp://www.mothur.org/wiki/Classify.seqs"; }
string getDescription() { return "classify sequences"; }
string fastaFileName, templateFileName, distanceFileName, namefile, search, method, taxonomyFileName, outputDir, groupfile;
int processors, kmerSize, numWanted, cutoff, iters;
float match, misMatch, gapOpen, gapExtend;
string fastaFileName, templateFileName, distanceFileName, namefile, search, method, taxonomyFileName, outputDir, groupfile;
int processors, kmerSize, numWanted, cutoff, iters;
float match, misMatch, gapOpen, gapExtend;
- int driver(linePair*, string, string, string);
- void appendTaxFiles(string, string);
- int createProcesses(string, string, string);
+ int driver(linePair*, string, string, string, string);
+ int createProcesses(string, string, string, string);
string addUnclassifieds(string, int);
int MPIReadNamesFile(string);
#ifdef USE_MPI
string addUnclassifieds(string, int);
int MPIReadNamesFile(string);
#ifdef USE_MPI
- int driverMPI(int, int, MPI_File&, MPI_File&, MPI_File&, vector<unsigned long long>&);
+ int driverMPI(int, int, MPI_File&, MPI_File&, MPI_File&, MPI_File&, vector<unsigned long long>&);
- string search, taxonomyFileName, templateFileName, method;
+ string search, taxonomyFileName, templateFileName, method, accnos;
unsigned long long start;
unsigned long long end;
MothurOut* m;
float match, misMatch, gapOpen, gapExtend;
int count, kmerSize, threadID, cutoff, iters, numWanted;
unsigned long long start;
unsigned long long end;
MothurOut* m;
float match, misMatch, gapOpen, gapExtend;
int count, kmerSize, threadID, cutoff, iters, numWanted;
- classifyData(bool p, string me, string te, string tx, string a, string r, string f, string se, int ks, int i, int numW, MothurOut* mout, unsigned long long st, unsigned long long en, float ma, float misMa, float gapO, float gapE, int cut, int tid) {
+ classifyData(string acc, bool p, string me, string te, string tx, string a, string r, string f, string se, int ks, int i, int numW, MothurOut* mout, unsigned long long st, unsigned long long en, float ma, float misMa, float gapO, float gapE, int cut, int tid, bool fli) {
+ accnos = acc;
- if(pDataArray->method == "bayesian"){ myclassify = new Bayesian("saved", "saved", pDataArray->search, pDataArray->kmerSize, pDataArray->cutoff, pDataArray->iters, pDataArray->threadID); }
- else if(pDataArray->method == "knn"){ myclassify = new Knn("saved", "saved", pDataArray->search, pDataArray->kmerSize, pDataArray->gapOpen, pDataArray->gapExtend, pDataArray->match, pDataArray->misMatch, pDataArray->numWanted, pDataArray->threadID); }
+ if(pDataArray->method == "bayesian"){ myclassify = new Bayesian(pDataArray->taxonomyFileName, pDataArray->templateFileName, pDataArray->search, pDataArray->kmerSize, pDataArray->cutoff, pDataArray->iters, pDataArray->threadID, pDataArray->flip); }
+ else if(pDataArray->method == "knn"){ myclassify = new Knn(pDataArray->taxonomyFileName, pDataArray->templateFileName, pDataArray->search, pDataArray->kmerSize, pDataArray->gapOpen, pDataArray->gapExtend, pDataArray->match, pDataArray->misMatch, pDataArray->numWanted, pDataArray->threadID); }
else {
pDataArray->m->mothurOut(pDataArray->search + " is not a valid method option. I will run the command using bayesian.");
pDataArray->m->mothurOutEndLine();
else {
pDataArray->m->mothurOut(pDataArray->search + " is not a valid method option. I will run the command using bayesian.");
pDataArray->m->mothurOutEndLine();
- myclassify = new Bayesian(pDataArray->taxonomyFileName, pDataArray->templateFileName, pDataArray->search, pDataArray->kmerSize, pDataArray->cutoff, pDataArray->iters, pDataArray->threadID);
+ myclassify = new Bayesian(pDataArray->taxonomyFileName, pDataArray->templateFileName, pDataArray->search, pDataArray->kmerSize, pDataArray->cutoff, pDataArray->iters, pDataArray->threadID, pDataArray->flip);
- if (taxonomy != "bad seq") {
- //output confidence scores or not
- if (pDataArray->probs) {
- outTax << candidateSeq->getName() << '\t' << taxonomy << endl;
- }else{
- outTax << candidateSeq->getName() << '\t' << myclassify->getSimpleTax() << endl;
- }
-
- outTaxSimple << candidateSeq->getName() << '\t' << myclassify->getSimpleTax() << endl;
+ if (taxonomy == "unknown;") { pDataArray->m->mothurOut("[WARNING]: " + candidateSeq->getName() + " could not be classified. You can use the remove.lineage command with taxon=unknown; to remove such sequences."); pDataArray->m->mothurOutEndLine(); }
+
+ //output confidence scores or not
+ if (pDataArray->probs) {
+ outTax << candidateSeq->getName() << '\t' << taxonomy << endl;
+ }else{
+ outTax << candidateSeq->getName() << '\t' << myclassify->getSimpleTax() << endl;