1 #ifndef CLASSIFYSEQSCOMMAND_H
2 #define CLASSIFYSEQSCOMMAND_H
5 * classifyseqscommand.h
8 * Created by westcott on 11/2/09.
9 * Copyright 2009 Schloss Lab. All rights reserved.
14 #include "command.hpp"
17 //KNN and Bayesian methods modeled from algorithms in
18 //Naı¨ve Bayesian Classifier for Rapid Assignment of rRNA Sequences
19 //into the New Bacterial Taxonomy†
20 //Qiong Wang,1 George M. Garrity,1,2 James M. Tiedje,1,2 and James R. Cole1*
21 //Center for Microbial Ecology1 and Department of Microbiology and Molecular Genetics,2 Michigan State University,
22 //East Lansing, Michigan 48824
23 //Received 10 January 2007/Accepted 18 June 2007
27 class ClassifySeqsCommand : public Command {
30 ClassifySeqsCommand(string);
31 ClassifySeqsCommand();
32 ~ClassifySeqsCommand();
34 vector<string> setParameters();
35 string getCommandName() { return "classify.seqs"; }
36 string getCommandCategory() { return "Phylotype Analysis"; }
37 string getHelpString();
38 string getCitation() { return "Wang Q, Garrity GM, Tiedje JM, Cole JR (2007). Naive Bayesian classifier for rapid assignment of rRNA sequences into the new bacterial taxonomy. Appl Environ Microbiol 73: 5261-7. [ for Bayesian classifier ] \nAltschul SF, Madden TL, Schaffer AA, Zhang J, Zhang Z, Miller W, Lipman DJ (1997). Gapped BLAST and PSI-BLAST: a new generation of protein database search programs. Nucleic Acids Res 25: 3389-402. [ for BLAST ] \nDeSantis TZ, Hugenholtz P, Larsen N, Rojas M, Brodie EL, Keller K, Huber T, Dalevi D, Hu P, Andersen GL (2006). Greengenes, a chimera-checked 16S rRNA gene database and workbench compatible with ARB. Appl Environ Microbiol 72: 5069-72. [ for kmer ] \nhttp://www.mothur.org/wiki/Classify.seqs"; }
42 void help() { m->mothurOut(getHelpString()); }
48 unsigned long int start;
49 unsigned long int end;
50 linePair(unsigned long int i, unsigned long int j) : start(i), end(j) {}
53 vector<int> processIDS; //processid
54 vector<linePair*> lines;
55 vector<string> fastaFileNames;
56 vector<string> namefileNames;
57 vector<string> groupfileNames;
58 vector<string> outputNames;
59 map<string, vector<string> > nameMap;
60 map<string, vector<string> >::iterator itNames;
64 string fastaFileName, templateFileName, distanceFileName, namefile, search, method, taxonomyFileName, outputDir, groupfile;
65 int processors, kmerSize, numWanted, cutoff, iters;
66 float match, misMatch, gapOpen, gapExtend;
69 int driver(linePair*, string, string, string);
70 void appendTaxFiles(string, string);
71 int createProcesses(string, string, string);
72 string addUnclassifieds(string, int);
74 int MPIReadNamesFile(string);
76 int driverMPI(int, int, MPI_File&, MPI_File&, MPI_File&, vector<unsigned long int>&);