A751ACE91098B283003D0911 /* readcluster.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = readcluster.cpp; sourceTree = SOURCE_ROOT; };
A75B887B104C16860083C454 /* ccode.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = ccode.cpp; sourceTree = SOURCE_ROOT; };
A75B887C104C16860083C454 /* ccode.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ccode.h; sourceTree = SOURCE_ROOT; };
- A7861476109F5FA00010AAAF /* classifyseqscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = classifyseqscommand.h; sourceTree = "<group>"; };
- A7861477109F5FA00010AAAF /* classifyseqscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = classifyseqscommand.cpp; sourceTree = "<group>"; };
+ A7861476109F5FA00010AAAF /* classifyseqscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = classifyseqscommand.h; sourceTree = SOURCE_ROOT; };
+ A7861477109F5FA00010AAAF /* classifyseqscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = classifyseqscommand.cpp; sourceTree = SOURCE_ROOT; };
A78780FE10A06B8B0086103D /* classify.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = classify.h; sourceTree = SOURCE_ROOT; };
A787810210A06D5D0086103D /* classify.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = classify.cpp; sourceTree = SOURCE_ROOT; };
- A78781B810A0813D0086103D /* doTaxonomy.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = doTaxonomy.cpp; sourceTree = "<group>"; };
- A78781B910A0813D0086103D /* doTaxonomy.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = doTaxonomy.h; sourceTree = "<group>"; };
- A787820F10A0AAE70086103D /* bayesian.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = bayesian.h; sourceTree = "<group>"; };
- A787821010A0AAE70086103D /* bayesian.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = bayesian.cpp; sourceTree = "<group>"; };
- A78782A910A1B1CB0086103D /* alignmentdb.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = alignmentdb.h; sourceTree = "<group>"; };
- A78782AA10A1B1CB0086103D /* alignmentdb.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = alignmentdb.cpp; sourceTree = "<group>"; };
- A787844310A1EBDD0086103D /* knn.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = knn.h; sourceTree = "<group>"; };
- A787844410A1EBDD0086103D /* knn.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = knn.cpp; sourceTree = "<group>"; };
+ A78781B810A0813D0086103D /* doTaxonomy.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = doTaxonomy.cpp; sourceTree = SOURCE_ROOT; };
+ A78781B910A0813D0086103D /* doTaxonomy.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = doTaxonomy.h; sourceTree = SOURCE_ROOT; };
+ A787820F10A0AAE70086103D /* bayesian.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = bayesian.h; sourceTree = SOURCE_ROOT; };
+ A787821010A0AAE70086103D /* bayesian.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = bayesian.cpp; sourceTree = SOURCE_ROOT; };
+ A78782A910A1B1CB0086103D /* alignmentdb.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = alignmentdb.h; sourceTree = SOURCE_ROOT; };
+ A78782AA10A1B1CB0086103D /* alignmentdb.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = alignmentdb.cpp; sourceTree = SOURCE_ROOT; };
+ A787844310A1EBDD0086103D /* knn.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = knn.h; sourceTree = SOURCE_ROOT; };
+ A787844410A1EBDD0086103D /* knn.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = knn.cpp; sourceTree = SOURCE_ROOT; };
A7B04491106CC3E60046FC83 /* chimeraslayer.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = chimeraslayer.h; sourceTree = SOURCE_ROOT; };
A7B04492106CC3E60046FC83 /* chimeraslayer.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = chimeraslayer.cpp; sourceTree = SOURCE_ROOT; };
A7B0450C106CEEC90046FC83 /* slayer.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = slayer.h; sourceTree = SOURCE_ROOT; };
report.setCandidate(candidateSeq);
Sequence temp = templateDB->findClosestSequence(candidateSeq);
+
Sequence* templateSeq = &temp;
report.setTemplate(templateSeq);
/**************************************************************************************************/
-AlignmentDB::AlignmentDB(string fastaFileName, string method, int kmerSize, int gapOpen, int gapExtend, int match, int misMatch){ // This assumes that the template database is in fasta format, may
+AlignmentDB::AlignmentDB(string fastaFileName, string method, int kmerSize, float gapOpen, float gapExtend, float match, float misMatch){ // This assumes that the template database is in fasta format, may
try { // need to alter this in the future?
longest = 0;
public:
- AlignmentDB(string, string, int, int, int, int, int); //reads fastafile passed in and stores sequences
+ AlignmentDB(string, string, int, float, float, float, float); //reads fastafile passed in and stores sequences
~AlignmentDB();
Sequence findClosestSequence(Sequence*);
/**************************************************************************************************/
Bayesian::Bayesian(string tfile, string tempFile, string method, int ksize, int cutoff) :
-Classify(tfile, tempFile, method, ksize, 0, 0, 0, 0), kmerSize(ksize), confidenceThreshold(cutoff) {
+Classify(tfile, tempFile, method, ksize, 0.0, 0.0, 0.0, 0.0), kmerSize(ksize), confidenceThreshold(cutoff) {
try {
numKmers = database->getMaxKmer() + 1;
querySeqs = readSeqs(fastafile);
mothurOut("Done.");
//templateSeqs = readSeqs(templateFile);
- templateDB = new AlignmentDB(templateFile, "kmer", kmerSize, 0,0,0,0);
+ templateDB = new AlignmentDB(templateFile, "kmer", kmerSize, 0.0,0.0,0.0,0.0);
mothurOutEndLine();
int numSeqs = querySeqs.size();
/**************************************************************************************************/
-Classify::Classify(string tfile, string tempFile, string method, int kmerSize, int gapOpen, int gapExtend, int match, int misMatch) : taxFile(tfile), templateFile(tempFile) {
+Classify::Classify(string tfile, string tempFile, string method, int kmerSize, float gapOpen, float gapExtend, float match, float misMatch) : taxFile(tfile), templateFile(tempFile) {
try {
readTaxonomy(taxFile);
*/
-/* This class is a parent to phylotyp, bayesian, knn. */
+/* This class is a parent to bayesian, knn. */
#include "mothur.h"
#include "database.hpp"
class Classify {
public:
- Classify(string, string, string, int, int, int, int, int);
- Classify(){ delete phyloTree; }
+ Classify(string, string, string, int, float, float, float, float);
- virtual ~Classify(){};
+ virtual ~Classify(){ delete phyloTree; delete database; };
virtual string getTaxonomy(Sequence*) = 0;
//virtual map<string, int> getConfidenceScores() { return taxConfidenceScore; }
//virtual vector<string> parseTax(string);
inFASTA.close();
lines.push_back(new linePair(0, numFastaSeqs));
-
+
driver(lines[0], newTaxonomyFile, tempTaxonomyFile);
-
}
else{
vector<int> positions;
}
#else
ifstream inFASTA;
- openInputFile(candidateFileName, inFASTA);
+ openInputFile(fastaFileName, inFASTA);
numFastaSeqs=count(istreambuf_iterator<char>(inFASTA),istreambuf_iterator<char>(), '>');
inFASTA.close();
lines.push_back(new linePair(0, numFastaSeqs));
driver(lines[0], newTaxonomyFile, tempTaxonomyFile);
-#endif
-
+#endif
delete classify;
//make taxonomy tree from new taxonomy file
taxonomy = classify->getTaxonomy(candidateSeq);
if (taxonomy != "bad seq") {
- //if (method != "bayesian") {
- outTax << candidateSeq->getName() << '\t' << taxonomy << endl;
- outTaxSimple << candidateSeq->getName() << '\t' << classify->getSimpleTax() << endl;
- //}else{
- //vector<string> pTax = classify->parseTax(taxonomy);
- //map<string, int> confidence = classify->getConfidenceScores();
-
- //outTax << candidateSeq->getName() << '\t';
- //for (int j = 0; j < pTax.size(); j++) {
- //if (confidence[pTax[j]] > cutoff) {
- // outTax << pTax[j] << "(" << confidence[pTax[j]] << ");";
- //}else{ break; }
- //}
- //outTax << endl;
- //}
+ outTax << candidateSeq->getName() << '\t' << taxonomy << endl;
+ outTaxSimple << candidateSeq->getName() << '\t' << classify->getSimpleTax() << endl;
}
delete candidateSeq;
- if(i % 100 == 0){
- mothurOut("Classifying sequence " + toString(i)); mothurOutEndLine();
+ if((i+1) % 100 == 0){
+ mothurOut("Classifying sequence " + toString(i+1)); mothurOutEndLine();
}
}
#include "alignment.hpp"
#include "classify.h"
+//KNN and Bayesian methods modeled from algorithms in
+//Naı¨ve Bayesian Classifier for Rapid Assignment of rRNA Sequences
+//into the New Bacterial Taxonomy†
+//Qiong Wang,1 George M. Garrity,1,2 James M. Tiedje,1,2 and James R. Cole1*
+//Center for Microbial Ecology1 and Department of Microbiology and Molecular Genetics,2 Michigan State University,
+//East Lansing, Michigan 48824
+//Received 10 January 2007/Accepted 18 June 2007
+
+
+
class ClassifySeqsCommand : public Command {
public:
/**************************************************************************************************/
PhyloTree::PhyloTree(){
-
- numNodes = 1;
- numSeqs = 0;
- tree.push_back(TaxNode("Root"));
- tree[0].heirarchyID = "0";
+ try {
+ numNodes = 1;
+ numSeqs = 0;
+ tree.push_back(TaxNode("Root"));
+ tree[0].heirarchyID = "0";
+ }
+ catch(exception& e) {
+ errorOut(e, "PhyloTree", "PhyloTree");
+ exit(1);
+ }
}
/**************************************************************************************************/
string PhyloTree::getNextTaxon(string& heirarchy){
-
- string currentLevel = "";
- if(heirarchy != ""){
- currentLevel=heirarchy.substr(0,heirarchy.find_first_of(';'));
- heirarchy=heirarchy.substr(heirarchy.find_first_of(';')+1);
+ try {
+ string currentLevel = "";
+ if(heirarchy != ""){
+ currentLevel=heirarchy.substr(0,heirarchy.find_first_of(';'));
+ heirarchy=heirarchy.substr(heirarchy.find_first_of(';')+1);
+ }
+ return currentLevel;
+ }
+ catch(exception& e) {
+ errorOut(e, "PhyloTree", "getNextTaxon");
+ exit(1);
}
- return currentLevel;
-
}
/**************************************************************************************************/
void PhyloTree::addSeqToTree(string seqName, string seqTaxonomy){
-
- numSeqs++;
-
- map<string, int>::iterator childPointer;
-
- int currentNode = 0;
- int level = 1;
-
- tree[0].accessions.push_back(seqName);
- string taxon;// = getNextTaxon(seqTaxonomy);
-
- while(seqTaxonomy != ""){
-
- level++;
-
-//somehow the parent is getting one too many accnos
-//use print to reassign the taxa id
- taxon = getNextTaxon(seqTaxonomy);
-
- childPointer = tree[currentNode].children.find(taxon);
-
- if(childPointer != tree[currentNode].children.end()){ //if the node already exists, move on
- currentNode = childPointer->second;
- tree[currentNode].accessions.push_back(seqName);
- name2Taxonomy[seqName] = currentNode;
- }
- else{ //otherwise, create it
- tree.push_back(TaxNode(taxon));
- numNodes++;
- tree[currentNode].children[taxon] = numNodes-1;
- tree[numNodes-1].parent = currentNode;
-
-// int numChildren = tree[currentNode].children.size();
-// string heirarchyID = tree[currentNode].heirarchyID;
-// tree[currentNode].accessions.push_back(seqName);
+ try {
+ numSeqs++;
+
+ map<string, int>::iterator childPointer;
+
+ int currentNode = 0;
+ int level = 1;
+
+ tree[0].accessions.push_back(seqName);
+ string taxon;// = getNextTaxon(seqTaxonomy);
+
+ while(seqTaxonomy != ""){
- currentNode = tree[currentNode].children[taxon];
- tree[currentNode].accessions.push_back(seqName);
- name2Taxonomy[seqName] = currentNode;
-// tree[currentNode].level = level;
-// tree[currentNode].childNumber = numChildren;
-// tree[currentNode].heirarchyID = heirarchyID + '.' + toString(tree[currentNode].childNumber);
- }
+ level++;
+
+ //somehow the parent is getting one too many accnos
+ //use print to reassign the taxa id
+ taxon = getNextTaxon(seqTaxonomy);
+
+ childPointer = tree[currentNode].children.find(taxon);
+
+ if(childPointer != tree[currentNode].children.end()){ //if the node already exists, move on
+ currentNode = childPointer->second;
+ tree[currentNode].accessions.push_back(seqName);
+ name2Taxonomy[seqName] = currentNode;
+ }
+ else{ //otherwise, create it
+ tree.push_back(TaxNode(taxon));
+ numNodes++;
+ tree[currentNode].children[taxon] = numNodes-1;
+ tree[numNodes-1].parent = currentNode;
+
+ // int numChildren = tree[currentNode].children.size();
+ // string heirarchyID = tree[currentNode].heirarchyID;
+ // tree[currentNode].accessions.push_back(seqName);
+
+ currentNode = tree[currentNode].children[taxon];
+ tree[currentNode].accessions.push_back(seqName);
+ name2Taxonomy[seqName] = currentNode;
+ // tree[currentNode].level = level;
+ // tree[currentNode].childNumber = numChildren;
+ // tree[currentNode].heirarchyID = heirarchyID + '.' + toString(tree[currentNode].childNumber);
+ }
- if (seqTaxonomy == "") { uniqueTaxonomies[currentNode] = currentNode; }
+ if (seqTaxonomy == "") { uniqueTaxonomies[currentNode] = currentNode; }
+ }
+
+ }
+ catch(exception& e) {
+ errorOut(e, "PhyloTree", "addSeqToTree");
+ exit(1);
}
}
/**************************************************************************************************/
/**************************************************************************************************/
void PhyloTree::assignHeirarchyIDs(int index){
-
- map<string,int>::iterator it;
- int counter = 1;
-
- for(it=tree[index].children.begin();it!=tree[index].children.end();it++){
- tree[it->second].heirarchyID = tree[index].heirarchyID + '.' + toString(counter);
- counter++;
- tree[it->second].level = tree[index].level + 1;
- assignHeirarchyIDs(it->second);
+ try {
+ map<string,int>::iterator it;
+ int counter = 1;
+
+ for(it=tree[index].children.begin();it!=tree[index].children.end();it++){
+ tree[it->second].heirarchyID = tree[index].heirarchyID + '.' + toString(counter);
+ counter++;
+ tree[it->second].level = tree[index].level + 1;
+ assignHeirarchyIDs(it->second);
+ }
+ }
+ catch(exception& e) {
+ errorOut(e, "PhyloTree", "assignHeirarchyIDs");
+ exit(1);
}
}
/**************************************************************************************************/
void PhyloTree::print(ofstream& out){
-
- out << tree[0].level << '\t'<< tree[0].heirarchyID << '\t' << tree[0].name << '\t' << tree[0].children.size() << '\t' << tree[0].accessions.size() << endl;
- print(0, out);
-
-
+ try {
+ out << tree[0].level << '\t'<< tree[0].heirarchyID << '\t' << tree[0].name << '\t' << tree[0].children.size() << '\t' << tree[0].accessions.size() << endl;
+ print(0, out);
+ }
+ catch(exception& e) {
+ errorOut(e, "PhyloTree", "print");
+ exit(1);
+ }
}
/**************************************************************************************************/
void PhyloTree::print(int i, ofstream& out){
-
- map<string,int>::iterator it;
- for(it=tree[i].children.begin();it!=tree[i].children.end();it++){
- out <<tree[it->second].level << '\t' << tree[it->second].heirarchyID << '\t' << tree[it->second].name << '\t' << tree[it->second].children.size() << '\t' << tree[it->second].accessions.size() << endl;
- print(it->second, out);
+ try {
+ map<string,int>::iterator it;
+ for(it=tree[i].children.begin();it!=tree[i].children.end();it++){
+ out <<tree[it->second].level << '\t' << tree[it->second].heirarchyID << '\t' << tree[it->second].name << '\t' << tree[it->second].children.size() << '\t' << tree[it->second].accessions.size() << endl;
+ print(it->second, out);
+ }
+ }
+ catch(exception& e) {
+ errorOut(e, "PhyloTree", "print");
+ exit(1);
}
-
}
/**************************************************************************************************/
public:
PhyloTree();
+ ~PhyloTree() {};
void addSeqToTree(string, string);
void assignHeirarchyIDs(int);
void print(ofstream&);
map<string,int>::iterator itB = globaldata->nameMap->find(secondName);
if(itA == globaldata->nameMap->end()){
- cerr << "AAError: Sequence '" << firstName << "' was not found in the names file, please correct\n";
+ cerr << "AAError: Sequence '" << firstName << "' was not found in the names file, please correct\n"; exit(1);
}
if(itB == globaldata->nameMap->end()){
- cerr << "ABError: Sequence '" << secondName << "' was not found in the names file, please correct\n";
+ cerr << "ABError: Sequence '" << secondName << "' was not found in the names file, please correct\n"; exit(1);
}
//using cutoff
#include "knn.h"
/**************************************************************************************************/
-Knn::Knn(string tfile, string tempFile, string method, int kmerSize, int gapOpen, int gapExtend, int match, int misMatch, int n)
+Knn::Knn(string tfile, string tempFile, string method, int kmerSize, float gapOpen, float gapExtend, float match, float misMatch, int n)
: Classify(tfile, tempFile, method, kmerSize, gapOpen, gapExtend, match, misMatch), num(n) {}
/**************************************************************************************************/
string Knn::getTaxonomy(Sequence* seq) {
string tax;
//use database to find closest seq
+
vector<int> closest = database->findClosestSequences(seq, num);
-
+
vector<string> closestNames;
for (int i = 0; i < closest.size(); i++) {
//find that sequences taxonomy in map
if (tax == "") { mothurOut("There are no common levels for sequence " + seq->getName() + ". " + seq->getName() + " will be disregarded."); mothurOutEndLine(); tax = "bad seq"; }
}
+ simpleTax = tax;
return tax;
}
catch(exception& e) {
class Knn : public Classify {
public:
- Knn(string, string, string, int, int, int, int, int, int);
+ Knn(string, string, string, int, float, float, float, float, int);
~Knn() {};
string getTaxonomy(Sequence*);
//header
mothurOut("mothur v.1.6.0");
mothurOutEndLine();
- mothurOut("Last updated: 10/6/2009");
+ mothurOut("Last updated: 11/20/2009");
mothurOutEndLine();
mothurOutEndLine();
mothurOut("by");
try {
mothurOut("The read.otu command must be run before you execute a collect.single, rarefaction.single, summary.single, \n");
mothurOut("collect.shared, rarefaction.shared or summary.shared command. Mothur will generate a .list, .rabund and .sabund upon completion of the cluster command \n");
- mothurOut("or you may use your own. The read.otu command parameter options are list, rabund, sabund, shared, group, order and label.\n");
+ mothurOut("or you may use your own. The read.otu command parameter options are list, rabund, sabund, shared, group, order, label and groups.\n");
mothurOut("The read.otu command can be used in two ways. The first is to read a list, rabund or sabund and run the collect.single, rarefaction.single or summary.single.\n");
mothurOut("For this use the read.otu command should be in the following format: read.otu(list=yourListFile, order=yourOrderFile, label=yourLabels).\n");
mothurOut("The list, rabund or sabund parameter is required, but you may only use one of them.\n");
mothurOut("In this case the read.otu command should be in the following format: read.otu(list=yourListFile, group=yourGroupFile) or read.otu(shared=yourSharedFile). \n");
mothurOut("The list parameter and group paramaters or the shared paremeter is required. When using the command the second way with a list and group file read.otu command parses the .list file\n");
mothurOut("and separates it into groups. It outputs a .shared file containing the OTU information for each group. The read.otu command also outputs a .rabund file for each group. \n");
+ mothurOut("You can use the groups parameter to choose only specific groups to be used in the .shared and .rabund files. \n");
mothurOut("Note: No spaces between parameter labels (i.e. list), '=' and parameters (i.e.yourListfile).\n\n");
}