X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=classifyseqscommand.h;h=f075b41495afc9002e2a695d47972d4d947eafc9;hb=8f7f4fc08b8c70d9ef0f79607813dba4e926e102;hp=a77b7d1d7193b862c88e036553a7616fb3f33dc9;hpb=63e089e0b3aad1741bab60119ed7ccc784dce347;p=mothur.git diff --git a/classifyseqscommand.h b/classifyseqscommand.h index a77b7d1..f075b41 100644 --- a/classifyseqscommand.h +++ b/classifyseqscommand.h @@ -12,36 +12,69 @@ #include "mothur.h" #include "command.hpp" -#include "alignment.hpp" #include "classify.h" +//KNN and Bayesian methods modeled from algorithms in +//Naı¨ve Bayesian Classifier for Rapid Assignment of rRNA Sequences +//into the New Bacterial Taxonomy􏰎† +//Qiong Wang,1 George M. Garrity,1,2 James M. Tiedje,1,2 and James R. Cole1* +//Center for Microbial Ecology1 and Department of Microbiology and Molecular Genetics,2 Michigan State University, +//East Lansing, Michigan 48824 +//Received 10 January 2007/Accepted 18 June 2007 + + + class ClassifySeqsCommand : public Command { public: - ClassifySeqsCommand(string); + ClassifySeqsCommand(string); + ClassifySeqsCommand(); ~ClassifySeqsCommand(); + + vector setParameters(); + string getCommandName() { return "classify.seqs"; } + string getCommandCategory() { return "Phylotype Analysis"; } + string getHelpString(); + string getCitation() { return "Wang Q, Garrity GM, Tiedje JM, Cole JR (2007). Naive Bayesian classifier for rapid assignment of rRNA sequences into the new bacterial taxonomy. Appl Environ Microbiol 73: 5261-7. [ for Bayesian classifier ] \nAltschul SF, Madden TL, Schaffer AA, Zhang J, Zhang Z, Miller W, Lipman DJ (1997). Gapped BLAST and PSI-BLAST: a new generation of protein database search programs. Nucleic Acids Res 25: 3389-402. [ for BLAST ] \nDeSantis TZ, Hugenholtz P, Larsen N, Rojas M, Brodie EL, Keller K, Huber T, Dalevi D, Hu P, Andersen GL (2006). Greengenes, a chimera-checked 16S rRNA gene database and workbench compatible with ARB. Appl Environ Microbiol 72: 5069-72. [ for kmer ] \nhttp://www.mothur.org/wiki/Classify.seqs"; } + + int execute(); - void help(); + void help() { m->mothurOut(getHelpString()); } + + private: struct linePair { - int start; - int numSeqs; - linePair(int i, int j) : start(i), numSeqs(j) {} + unsigned long int start; + unsigned long int end; + linePair(unsigned long int i, unsigned long int j) : start(i), end(j) {} }; + vector processIDS; //processid vector lines; + vector fastaFileNames; + vector namefileNames; + vector groupfileNames; + vector outputNames; + map > nameMap; + map >::iterator itNames; Classify* classify; - string fastaFileName, templateFileName, distanceFileName, search, method, taxonomyFileName; - int processors, kmerSize, numWanted; + string fastaFileName, templateFileName, distanceFileName, namefile, search, method, taxonomyFileName, outputDir, groupfile; + int processors, kmerSize, numWanted, cutoff, iters; float match, misMatch, gapOpen, gapExtend; - bool abort; + bool abort, probs; - int driver(linePair*, string); + int driver(linePair*, string, string, string); void appendTaxFiles(string, string); - void createProcesses(string); + int createProcesses(string, string, string); + string addUnclassifieds(string, int); + + int MPIReadNamesFile(string); + #ifdef USE_MPI + int driverMPI(int, int, MPI_File&, MPI_File&, MPI_File&, vector&); + #endif }; #endif