A73329CF1083B3B3003B10C5 /* getlistcountcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A73329CE1083B3B3003B10C5 /* getlistcountcommand.cpp */; };
A751ACEA1098B283003D0911 /* readcluster.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A751ACE91098B283003D0911 /* readcluster.cpp */; };
A75B887E104C16860083C454 /* ccode.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A75B887B104C16860083C454 /* ccode.cpp */; };
+ A773805F10B6D6FC002A6A61 /* taxonomyequalizer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A773805E10B6D6FC002A6A61 /* taxonomyequalizer.cpp */; };
+ A773809010B6EFD1002A6A61 /* phylotypecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A773808F10B6EFD1002A6A61 /* phylotypecommand.cpp */; };
A7861478109F5FA00010AAAF /* classifyseqscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7861477109F5FA00010AAAF /* classifyseqscommand.cpp */; };
A787810310A06D5D0086103D /* classify.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A787810210A06D5D0086103D /* classify.cpp */; };
- A78781BA10A0813E0086103D /* doTaxonomy.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A78781B810A0813D0086103D /* doTaxonomy.cpp */; };
+ A78781BA10A0813E0086103D /* phylotree.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A78781B810A0813D0086103D /* phylotree.cpp */; };
A787821110A0AAE70086103D /* bayesian.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A787821010A0AAE70086103D /* bayesian.cpp */; };
A78782AB10A1B1CB0086103D /* alignmentdb.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A78782AA10A1B1CB0086103D /* alignmentdb.cpp */; };
A787844510A1EBDD0086103D /* knn.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A787844410A1EBDD0086103D /* knn.cpp */; };
A751ACE91098B283003D0911 /* readcluster.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = readcluster.cpp; sourceTree = SOURCE_ROOT; };
A75B887B104C16860083C454 /* ccode.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = ccode.cpp; sourceTree = SOURCE_ROOT; };
A75B887C104C16860083C454 /* ccode.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ccode.h; sourceTree = SOURCE_ROOT; };
+ A773805D10B6D6FC002A6A61 /* taxonomyequalizer.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = taxonomyequalizer.h; sourceTree = SOURCE_ROOT; };
+ A773805E10B6D6FC002A6A61 /* taxonomyequalizer.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = taxonomyequalizer.cpp; sourceTree = SOURCE_ROOT; };
+ A773808E10B6EFD1002A6A61 /* phylotypecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = phylotypecommand.h; sourceTree = SOURCE_ROOT; };
+ A773808F10B6EFD1002A6A61 /* phylotypecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = phylotypecommand.cpp; sourceTree = SOURCE_ROOT; };
A7861476109F5FA00010AAAF /* classifyseqscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = classifyseqscommand.h; sourceTree = SOURCE_ROOT; };
A7861477109F5FA00010AAAF /* classifyseqscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = classifyseqscommand.cpp; sourceTree = SOURCE_ROOT; };
A78780FE10A06B8B0086103D /* classify.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = classify.h; sourceTree = SOURCE_ROOT; };
A787810210A06D5D0086103D /* classify.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = classify.cpp; sourceTree = SOURCE_ROOT; };
- A78781B810A0813D0086103D /* doTaxonomy.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = doTaxonomy.cpp; sourceTree = SOURCE_ROOT; };
- A78781B910A0813D0086103D /* doTaxonomy.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = doTaxonomy.h; sourceTree = SOURCE_ROOT; };
+ A78781B810A0813D0086103D /* phylotree.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = phylotree.cpp; sourceTree = SOURCE_ROOT; };
+ A78781B910A0813D0086103D /* phylotree.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = phylotree.h; sourceTree = SOURCE_ROOT; };
A787820F10A0AAE70086103D /* bayesian.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = bayesian.h; sourceTree = SOURCE_ROOT; };
A787821010A0AAE70086103D /* bayesian.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = bayesian.cpp; sourceTree = SOURCE_ROOT; };
A78782A910A1B1CB0086103D /* alignmentdb.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = alignmentdb.h; sourceTree = SOURCE_ROOT; };
375873F60F7D649C0040F377 /* nocommands.cpp */,
3792946E0F2E191800B9034A /* parsimonycommand.h */,
3792946F0F2E191800B9034A /* parsimonycommand.cpp */,
+ A773808E10B6EFD1002A6A61 /* phylotypecommand.h */,
+ A773808F10B6EFD1002A6A61 /* phylotypecommand.cpp */,
37D927FE0F21331F001D4494 /* quitcommand.h */,
37D927FD0F21331F001D4494 /* quitcommand.cpp */,
37D928080F21331F001D4494 /* rarefactcommand.h */,
A787810210A06D5D0086103D /* classify.cpp */,
A787820F10A0AAE70086103D /* bayesian.h */,
A787821010A0AAE70086103D /* bayesian.cpp */,
- A78781B910A0813D0086103D /* doTaxonomy.h */,
- A78781B810A0813D0086103D /* doTaxonomy.cpp */,
+ A773805D10B6D6FC002A6A61 /* taxonomyequalizer.h */,
+ A773805E10B6D6FC002A6A61 /* taxonomyequalizer.cpp */,
+ A78781B910A0813D0086103D /* phylotree.h */,
+ A78781B810A0813D0086103D /* phylotree.cpp */,
A787844310A1EBDD0086103D /* knn.h */,
A787844410A1EBDD0086103D /* knn.cpp */,
);
A751ACEA1098B283003D0911 /* readcluster.cpp in Sources */,
A7861478109F5FA00010AAAF /* classifyseqscommand.cpp in Sources */,
A787810310A06D5D0086103D /* classify.cpp in Sources */,
- A78781BA10A0813E0086103D /* doTaxonomy.cpp in Sources */,
+ A78781BA10A0813E0086103D /* phylotree.cpp in Sources */,
A787821110A0AAE70086103D /* bayesian.cpp in Sources */,
A78782AB10A1B1CB0086103D /* alignmentdb.cpp in Sources */,
A787844510A1EBDD0086103D /* knn.cpp in Sources */,
+ A773805F10B6D6FC002A6A61 /* taxonomyequalizer.cpp in Sources */,
+ A773809010B6EFD1002A6A61 /* phylotypecommand.cpp in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
#include "mothur.h"
#include "database.hpp"
-#include "doTaxonomy.h"
+#include "phylotree.h"
class Sequence;
#include "classifyseqscommand.h"
#include "sequence.hpp"
#include "bayesian.h"
-#include "doTaxonomy.h"
+#include "phylotree.h"
#include "knn.h"
//**********************************************************************************************************************
ClassifySeqsCommand::ClassifySeqsCommand(string option){
try {
- // globaldata = GlobalData::getInstance();
abort = false;
//allow user to run help
#include "getlistcountcommand.h"
#include "hclustercommand.h"
#include "classifyseqscommand.h"
+#include "phylotypecommand.h"
/***********************************************************/
commands["quit"] = "quit";
commands["hcluster"] = "hcluster";
commands["classify.seqs"] = "classify.seqs";
+ commands["phylotype"] = "phylotype";
}
/***********************************************************/
else if(commandName == "get.listcount") { command = new GetListCountCommand(optionString); }
else if(commandName == "hcluster") { command = new HClusterCommand(optionString); }
else if(commandName == "classify.seqs") { command = new ClassifySeqsCommand(optionString); }
+ else if(commandName == "phylotype") { command = new PhylotypeCommand(optionString); }
else { command = new NoCommand(optionString); }
return command;
+++ /dev/null
-/*
- * doTaxonomy.cpp
- *
- *
- * Created by Pat Schloss on 6/17/09.
- * Copyright 2009 Patrick D. Schloss. All rights reserved.
- *
- */
-
-#include "doTaxonomy.h"
-
-/**************************************************************************************************/
-
-PhyloTree::PhyloTree(){
- try {
- numNodes = 1;
- numSeqs = 0;
- tree.push_back(TaxNode("Root"));
- tree[0].heirarchyID = "0";
- }
- catch(exception& e) {
- errorOut(e, "PhyloTree", "PhyloTree");
- exit(1);
- }
-}
-
-/**************************************************************************************************/
-
-string PhyloTree::getNextTaxon(string& heirarchy){
- try {
- string currentLevel = "";
- if(heirarchy != ""){
- currentLevel=heirarchy.substr(0,heirarchy.find_first_of(';'));
- heirarchy=heirarchy.substr(heirarchy.find_first_of(';')+1);
- }
- return currentLevel;
- }
- catch(exception& e) {
- errorOut(e, "PhyloTree", "getNextTaxon");
- exit(1);
- }
-}
-
-/**************************************************************************************************/
-
-void PhyloTree::addSeqToTree(string seqName, string seqTaxonomy){
- try {
- numSeqs++;
-
- map<string, int>::iterator childPointer;
-
- int currentNode = 0;
- int level = 1;
-
- tree[0].accessions.push_back(seqName);
- string taxon;// = getNextTaxon(seqTaxonomy);
-
- while(seqTaxonomy != ""){
-
- level++;
-
- //somehow the parent is getting one too many accnos
- //use print to reassign the taxa id
- taxon = getNextTaxon(seqTaxonomy);
-
- childPointer = tree[currentNode].children.find(taxon);
-
- if(childPointer != tree[currentNode].children.end()){ //if the node already exists, move on
- currentNode = childPointer->second;
- tree[currentNode].accessions.push_back(seqName);
- name2Taxonomy[seqName] = currentNode;
- }
- else{ //otherwise, create it
- tree.push_back(TaxNode(taxon));
- numNodes++;
- tree[currentNode].children[taxon] = numNodes-1;
- tree[numNodes-1].parent = currentNode;
-
- // int numChildren = tree[currentNode].children.size();
- // string heirarchyID = tree[currentNode].heirarchyID;
- // tree[currentNode].accessions.push_back(seqName);
-
- currentNode = tree[currentNode].children[taxon];
- tree[currentNode].accessions.push_back(seqName);
- name2Taxonomy[seqName] = currentNode;
- // tree[currentNode].level = level;
- // tree[currentNode].childNumber = numChildren;
- // tree[currentNode].heirarchyID = heirarchyID + '.' + toString(tree[currentNode].childNumber);
- }
-
- if (seqTaxonomy == "") { uniqueTaxonomies[currentNode] = currentNode; }
- }
-
- }
- catch(exception& e) {
- errorOut(e, "PhyloTree", "addSeqToTree");
- exit(1);
- }
-}
-/**************************************************************************************************/
-vector<int> PhyloTree::getGenusNodes() {
- try {
- genusIndex.clear();
- //generate genusIndexes
- map<int, int>::iterator it2;
- for (it2=uniqueTaxonomies.begin(); it2!=uniqueTaxonomies.end(); it2++) { genusIndex.push_back(it2->first); }
-
- return genusIndex;
- }
- catch(exception& e) {
- errorOut(e, "PhyloTree", "getGenusNodes");
- exit(1);
- }
-}
-/**************************************************************************************************/
-
-void PhyloTree::assignHeirarchyIDs(int index){
- try {
- map<string,int>::iterator it;
- int counter = 1;
-
- for(it=tree[index].children.begin();it!=tree[index].children.end();it++){
- tree[it->second].heirarchyID = tree[index].heirarchyID + '.' + toString(counter);
- counter++;
- tree[it->second].level = tree[index].level + 1;
- assignHeirarchyIDs(it->second);
- }
- }
- catch(exception& e) {
- errorOut(e, "PhyloTree", "assignHeirarchyIDs");
- exit(1);
- }
-}
-
-/**************************************************************************************************/
-
-void PhyloTree::print(ofstream& out){
- try {
- out << tree[0].level << '\t'<< tree[0].heirarchyID << '\t' << tree[0].name << '\t' << tree[0].children.size() << '\t' << tree[0].accessions.size() << endl;
- print(0, out);
- }
- catch(exception& e) {
- errorOut(e, "PhyloTree", "print");
- exit(1);
- }
-}
-
-/**************************************************************************************************/
-
-void PhyloTree::print(int i, ofstream& out){
- try {
- map<string,int>::iterator it;
- for(it=tree[i].children.begin();it!=tree[i].children.end();it++){
- out <<tree[it->second].level << '\t' << tree[it->second].heirarchyID << '\t' << tree[it->second].name << '\t' << tree[it->second].children.size() << '\t' << tree[it->second].accessions.size() << endl;
- print(it->second, out);
- }
- }
- catch(exception& e) {
- errorOut(e, "PhyloTree", "print");
- exit(1);
- }
-}
-
-/**************************************************************************************************/
-
-
-
+++ /dev/null
-#ifndef DOTAXONOMY_H
-#define DOTAXONOMY_H
-
-/*
- * doTaxonomy.h
- *
- *
- * Created by Pat Schloss on 6/17/09.
- * Copyright 2009 Patrick D. Schloss. All rights reserved.
- *
- */
-
-#include "mothur.h"
-
-/**************************************************************************************************/
-
-struct TaxNode {
- vector<string> accessions; //names of seqs in this branch of tree
- map<string, int> children; //childs name to index in tree
- int parent, childNumber, level;
- string name, heirarchyID;
-
- TaxNode(string n) : name(n), level(0), parent(-1) { }
- TaxNode(){}
-};
-
-/**************************************************************************************************/
-
-class PhyloTree {
-
-public:
- PhyloTree();
- ~PhyloTree() {};
- void addSeqToTree(string, string);
- void assignHeirarchyIDs(int);
- void print(ofstream&);
- vector<int> getGenusNodes();
- TaxNode get(int i) { return tree[i]; }
- TaxNode get(string seqName) { return tree[name2Taxonomy[seqName]]; }
- int getIndex(string seqName) { return name2Taxonomy[seqName]; }
- string getName(int i) { return tree[i].name; }
-private:
- string getNextTaxon(string&);
- vector<TaxNode> tree;
- vector<int> genusIndex; //holds the indexes in tree where the genus level taxonomies are stored
- map<string, int> name2Taxonomy; //maps name to index in tree
- map<int, int> uniqueTaxonomies; //map of unique taxonomies
- void print(int, ofstream&);
- int numNodes;
- int numSeqs;
-};
-
-/**************************************************************************************************/
-
-#endif
-
-
--- /dev/null
+/*
+ * doTaxonomy.cpp
+ *
+ *
+ * Created by Pat Schloss on 6/17/09.
+ * Copyright 2009 Patrick D. Schloss. All rights reserved.
+ *
+ */
+
+#include "phylotree.h"
+
+/**************************************************************************************************/
+
+PhyloTree::PhyloTree(){
+ try {
+ numNodes = 1;
+ numSeqs = 0;
+ tree.push_back(TaxNode("Root"));
+ tree[0].heirarchyID = "0";
+ }
+ catch(exception& e) {
+ errorOut(e, "PhyloTree", "PhyloTree");
+ exit(1);
+ }
+}
+/**************************************************************************************************/
+
+PhyloTree::PhyloTree(string tfile){
+ try {
+ numNodes = 1;
+ numSeqs = 0;
+ tree.push_back(TaxNode("Root"));
+ tree[0].heirarchyID = "0";
+
+ ifstream in;
+ openInputFile(tfile, in);
+
+ //read in users taxonomy file and add sequences to tree
+ string name, tax;
+ while(!in.eof()){
+ in >> name >> tax; gobble(in);
+
+ addSeqToTree(name, tax);
+ }
+ in.close();
+
+ assignHeirarchyIDs(0);
+ }
+ catch(exception& e) {
+ errorOut(e, "PhyloTree", "PhyloTree");
+ exit(1);
+ }
+}
+
+/**************************************************************************************************/
+
+string PhyloTree::getNextTaxon(string& heirarchy){
+ try {
+ string currentLevel = "";
+ if(heirarchy != ""){
+ currentLevel=heirarchy.substr(0,heirarchy.find_first_of(';'));
+ heirarchy=heirarchy.substr(heirarchy.find_first_of(';')+1);
+ }
+ return currentLevel;
+ }
+ catch(exception& e) {
+ errorOut(e, "PhyloTree", "getNextTaxon");
+ exit(1);
+ }
+}
+
+/**************************************************************************************************/
+
+void PhyloTree::addSeqToTree(string seqName, string seqTaxonomy){
+ try {
+ numSeqs++;
+
+ map<string, int>::iterator childPointer;
+
+ int currentNode = 0;
+ int level = 1;
+
+ tree[0].accessions.push_back(seqName);
+ string taxon;// = getNextTaxon(seqTaxonomy);
+
+ while(seqTaxonomy != ""){
+
+ level++;
+
+ //somehow the parent is getting one too many accnos
+ //use print to reassign the taxa id
+ taxon = getNextTaxon(seqTaxonomy);
+
+ childPointer = tree[currentNode].children.find(taxon);
+
+ if(childPointer != tree[currentNode].children.end()){ //if the node already exists, move on
+ currentNode = childPointer->second;
+ tree[currentNode].accessions.push_back(seqName);
+ name2Taxonomy[seqName] = currentNode;
+ }
+ else{ //otherwise, create it
+ tree.push_back(TaxNode(taxon));
+ numNodes++;
+ tree[currentNode].children[taxon] = numNodes-1;
+ tree[numNodes-1].parent = currentNode;
+
+ // int numChildren = tree[currentNode].children.size();
+ // string heirarchyID = tree[currentNode].heirarchyID;
+ // tree[currentNode].accessions.push_back(seqName);
+
+ currentNode = tree[currentNode].children[taxon];
+ tree[currentNode].accessions.push_back(seqName);
+ name2Taxonomy[seqName] = currentNode;
+ // tree[currentNode].level = level;
+ // tree[currentNode].childNumber = numChildren;
+ // tree[currentNode].heirarchyID = heirarchyID + '.' + toString(tree[currentNode].childNumber);
+ }
+
+ if (seqTaxonomy == "") { uniqueTaxonomies[currentNode] = currentNode; }
+ }
+
+ }
+ catch(exception& e) {
+ errorOut(e, "PhyloTree", "addSeqToTree");
+ exit(1);
+ }
+}
+/**************************************************************************************************/
+vector<int> PhyloTree::getGenusNodes() {
+ try {
+ genusIndex.clear();
+ //generate genusIndexes
+ map<int, int>::iterator it2;
+ for (it2=uniqueTaxonomies.begin(); it2!=uniqueTaxonomies.end(); it2++) { genusIndex.push_back(it2->first); }
+
+ return genusIndex;
+ }
+ catch(exception& e) {
+ errorOut(e, "PhyloTree", "getGenusNodes");
+ exit(1);
+ }
+}
+/**************************************************************************************************/
+
+void PhyloTree::assignHeirarchyIDs(int index){
+ try {
+ map<string,int>::iterator it;
+ int counter = 1;
+
+ for(it=tree[index].children.begin();it!=tree[index].children.end();it++){
+ tree[it->second].heirarchyID = tree[index].heirarchyID + '.' + toString(counter);
+ counter++;
+ tree[it->second].level = tree[index].level + 1;
+ assignHeirarchyIDs(it->second);
+ }
+ }
+ catch(exception& e) {
+ errorOut(e, "PhyloTree", "assignHeirarchyIDs");
+ exit(1);
+ }
+}
+
+/**************************************************************************************************/
+
+void PhyloTree::print(ofstream& out){
+ try {
+ out << tree[0].level << '\t'<< tree[0].heirarchyID << '\t' << tree[0].name << '\t' << tree[0].children.size() << '\t' << tree[0].accessions.size() << endl;
+ print(0, out);
+ }
+ catch(exception& e) {
+ errorOut(e, "PhyloTree", "print");
+ exit(1);
+ }
+}
+
+/**************************************************************************************************/
+
+void PhyloTree::print(int i, ofstream& out){
+ try {
+ map<string,int>::iterator it;
+ for(it=tree[i].children.begin();it!=tree[i].children.end();it++){
+ out <<tree[it->second].level << '\t' << tree[it->second].heirarchyID << '\t' << tree[it->second].name << '\t' << tree[it->second].children.size() << '\t' << tree[it->second].accessions.size() << endl;
+ print(it->second, out);
+ }
+ }
+ catch(exception& e) {
+ errorOut(e, "PhyloTree", "print");
+ exit(1);
+ }
+}
+
+/**************************************************************************************************/
+
+
+
--- /dev/null
+#ifndef DOTAXONOMY_H
+#define DOTAXONOMY_H
+
+/*
+ * phylotree.h
+ *
+ *
+ * Created by Pat Schloss on 6/17/09.
+ * Copyright 2009 Patrick D. Schloss. All rights reserved.
+ *
+ */
+
+#include "mothur.h"
+
+/**************************************************************************************************/
+
+struct TaxNode {
+ vector<string> accessions; //names of seqs in this branch of tree
+ map<string, int> children; //childs name to index in tree
+ int parent, childNumber, level;
+ string name, heirarchyID;
+
+ TaxNode(string n) : name(n), level(0), parent(-1) { }
+ TaxNode(){}
+};
+
+/**************************************************************************************************/
+
+class PhyloTree {
+
+public:
+ PhyloTree();
+ PhyloTree(string); //pass it a taxonomy file and it makes the tree
+ ~PhyloTree() {};
+ void addSeqToTree(string, string);
+ void assignHeirarchyIDs(int);
+ void print(ofstream&);
+ vector<int> getGenusNodes();
+ TaxNode get(int i) { return tree[i]; }
+ TaxNode get(string seqName) { return tree[name2Taxonomy[seqName]]; }
+ int getIndex(string seqName) { return name2Taxonomy[seqName]; }
+ string getName(int i) { return tree[i].name; }
+private:
+ string getNextTaxon(string&);
+ vector<TaxNode> tree;
+ vector<int> genusIndex; //holds the indexes in tree where the genus level taxonomies are stored
+ map<string, int> name2Taxonomy; //maps name to index in tree
+ map<int, int> uniqueTaxonomies; //map of unique taxonomies
+ void print(int, ofstream&);
+ int numNodes;
+ int numSeqs;
+};
+
+/**************************************************************************************************/
+
+#endif
+
+
--- /dev/null
+/*
+ * phylotypecommand.cpp
+ * Mothur
+ *
+ * Created by westcott on 11/20/09.
+ * Copyright 2009 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "phylotypecommand.h"
+#include "phylotree.h"
+#include "listvector.hpp"
+#include "rabundvector.hpp"
+#include "sabundvector.hpp"
+
+/**********************************************************************************************************************/
+PhylotypeCommand::PhylotypeCommand(string option){
+ try {
+ abort = false;
+
+ //allow user to run help
+ if(option == "help") { help(); abort = true; }
+
+ else {
+
+ //valid paramters for this command
+ string AlignArray[] = {"taxonomy","cutoff","label"};
+ vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
+
+ OptionParser parser(option);
+ map<string, string> parameters = parser.getParameters();
+
+ ValidParameters validParameter;
+
+ //check to make sure all parameters are valid for command
+ for (map<string, string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
+ if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
+ }
+
+ taxonomyFileName = validParameter.validFile(parameters, "taxonomy", true);
+ if (taxonomyFileName == "not found") {
+ mothurOut("taxonomy is a required parameter for the phylotype command.");
+ mothurOutEndLine();
+ abort = true;
+ }else if (taxonomyFileName == "not open") { abort = true; }
+
+ string temp = validParameter.validFile(parameters, "cutoff", false);
+ if (temp == "not found") { temp = "-1"; }
+ convert(temp, cutoff);
+
+ label = validParameter.validFile(parameters, "label", false);
+ if (label == "not found") { label = ""; allLines = 1; }
+ else {
+ if(label != "all") { splitAtDash(label, labels); allLines = 0; }
+ else { allLines = 1; }
+ }
+
+ }
+ }
+ catch(exception& e) {
+ errorOut(e, "PhylotypeCommand", "PhylotypeCommand");
+ exit(1);
+ }
+}
+/**********************************************************************************************************************/
+
+void PhylotypeCommand::help(){
+ try {
+ mothurOut("The phylotype command reads a taxonomy file and outputs a .list, .rabund and .sabund file. \n");
+ mothurOut("The phylotype command parameter options are taxonomy, cutoff and label. The taxonomy parameter is required.\n");
+ mothurOut("The cutoff parameter allows you to specify the level you want to stop at. The default is the highest level in your taxonomy file. \n");
+ mothurOut("The label parameter allows you to specify which level you would like, and are separated by dashes. The default all levels in your taxonomy file. \n");
+ mothurOut("The phylotype command should be in the following format: \n");
+ mothurOut("phylotype(taxonomy=yourTaxonomyFile, cutoff=yourCutoff, label=yourLabels) \n");
+ mothurOut("Eaxample: phylotype(taxonomy=amazon.taxonomy, cutoff=5, label=1-3-5).\n\n");
+ }
+ catch(exception& e) {
+ errorOut(e, "PhylotypeCommand", "help");
+ exit(1);
+ }
+}
+/**********************************************************************************************************************/
+
+PhylotypeCommand::~PhylotypeCommand(){}
+
+/**********************************************************************************************************************/
+
+int PhylotypeCommand::execute(){
+ try {
+
+ if (abort == true) { return 0; }
+
+ //reads in taxonomy file and makes all the taxonomies the same length
+ //by appending the last taxon to a given taxonomy as many times as needed to
+ //make it as long as the longest taxonomy in the file
+ TaxEqualizer* taxEqual = new TaxEqualizer(taxonomyFileName, cutoff);
+
+ string equalizedTaxFile = taxEqual->getEqualizedTaxFile();
+
+ delete taxEqual;
+
+ //build taxonomy tree from equalized file
+ PhyloTree* tree = new PhyloTree(equalizedTaxFile);
+ vector<int> leaves = tree->getGenusNodes();
+
+ //store leaf nodes in current map
+ for (int i = 0; i < leaves.size(); i++) { currentNodes[leaves[i]] = leaves[i]; }
+
+ bool done = false;
+ if (tree->get(leaves[0]).parent == -1) { mothurOut("Empty Tree"); mothurOutEndLine(); done = true; }
+
+ ofstream outList;
+ string outputListFile = getRootName(taxonomyFileName) + "tax.list";
+ openOutputFile(outputListFile, outList);
+ ofstream outSabund;
+ string outputSabundFile = getRootName(taxonomyFileName) + "tax.sabund";
+ openOutputFile(outputSabundFile, outSabund);
+ ofstream outRabund;
+ string outputRabundFile = getRootName(taxonomyFileName) + "tax.rabund";
+ openOutputFile(outputRabundFile, outRabund);
+
+
+ //start at leaves of tree and work towards root, processing the labels the user wants
+ while((!done) && ((allLines == 1) || (labels.size() != 0))) {
+
+ string level = toString(tree->get(currentNodes.begin()->first).level);
+
+ //is this a level the user want output for
+ if(allLines == 1 || labels.count(level) == 1){
+
+ //output level
+ mothurOut(level); mothurOutEndLine();
+
+ ListVector list;
+ list.setLabel(level);
+ //go through nodes and build listvector
+ for (itCurrent = currentNodes.begin(); itCurrent != currentNodes.end(); itCurrent++) {
+
+ //get parents
+ TaxNode node = tree->get(itCurrent->first);
+ parentNodes[node.parent] = node.parent;
+
+ vector<string> names = node.accessions;
+
+ //make the names compatable with listvector
+ string name = "";
+ for (int i = 0; i < names.size(); i++) { name += names[i] + ","; }
+ name = name.substr(0, name.length()-1); //rip off extra ','
+
+ //add bin to list vector
+ list.push_back(name);
+ }
+
+ //print listvector
+ list.print(outList);
+ //print rabund
+ list.getRAbundVector().print(outRabund);
+ //print sabund
+ list.getSAbundVector().print(outSabund);
+
+ labels.erase(level);
+
+ }else {
+
+ //just get parents
+ for (itCurrent = currentNodes.begin(); itCurrent != currentNodes.end(); itCurrent++) {
+ int parent = tree->get(itCurrent->first).parent;
+ parentNodes[parent] = parent;
+ }
+ }
+
+ //move up a level
+ currentNodes = parentNodes;
+ parentNodes.clear();
+
+ //have we reached the rootnode
+ if (tree->get(currentNodes.begin()->first).parent == -1) { done = true; }
+ }
+
+ outList.close();
+ outSabund.close();
+ outRabund.close();
+
+ delete tree;
+
+ return 0;
+ }
+
+ catch(exception& e) {
+ errorOut(e, "PhylotypeCommand", "execute");
+ exit(1);
+ }
+}
+/**********************************************************************************************************************/
--- /dev/null
+#ifndef PHYLOTYPECOMMAND_H
+#define PHYLOTYPECOMMAND_H
+
+
+/*
+ * phylotypecommand.h
+ * Mothur
+ *
+ * Created by westcott on 11/20/09.
+ * Copyright 2009 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "taxonomyequalizer.h"
+#include "command.hpp"
+
+/*************************************************************************/
+
+class PhylotypeCommand : public Command {
+
+public:
+ PhylotypeCommand(string);
+ ~PhylotypeCommand();
+ int execute();
+ void help();
+
+private:
+ bool abort, allLines;
+ string taxonomyFileName, label;
+ set<string> labels; //holds labels to be used
+ int cutoff;
+
+ map<int, int> currentNodes;
+ map<int, int> parentNodes;
+ map<int, int>::iterator itCurrent;
+
+};
+
+
+/*************************************************************************/
+
+
+#endif
+
+
+
--- /dev/null
+/*
+ * taxonomyequalizer.cpp
+ * Mothur
+ *
+ * Created by westcott on 11/20/09.
+ * Copyright 2009 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "taxonomyequalizer.h"
+
+/**************************************************************************************************/
+TaxEqualizer::TaxEqualizer(string tfile, int c) : cutoff(c) {
+ try {
+ containsConfidence = false;
+
+ ifstream inTax;
+ openInputFile(tfile, inTax);
+
+ int highestLevel = getHighestLevel(inTax);
+
+ //if the user has specified a cutoff and it's smaller than the highest level
+ if ((cutoff != -1) && (cutoff < highestLevel)) {
+ highestLevel = cutoff;
+ }else if (cutoff > highestLevel) {
+ mothurOut("The highest level taxonomy you have is " + toString(highestLevel) + " and your cutoff is " + toString(cutoff) + ". I will set the cutoff to " + toString(highestLevel));
+ mothurOutEndLine();
+ }
+
+ inTax.close();
+ openInputFile(tfile, inTax);
+
+ ofstream out;
+ equalizedFile = getRootName(tfile) + "equalized.taxonomy";
+ openOutputFile(equalizedFile, out);
+
+ string name, tax;
+ while (inTax) {
+ inTax >> name >> tax; gobble(inTax);
+
+ if (containsConfidence) { removeConfidences(tax); }
+
+ //is this a taxonomy that needs to be extended?
+ if (seqLevels[name] < highestLevel) {
+ extendTaxonomy(name, tax, highestLevel);
+ }else if (seqLevels[name] > highestLevel) { //this can happen if hte user enters a cutoff
+ truncateTaxonomy(name, tax, highestLevel);
+ }
+
+ out << name << '\t' << tax << endl;
+ }
+
+ inTax.close();
+ out.close();
+
+ }
+ catch(exception& e) {
+ errorOut(e, "TaxEqualizer", "TaxEqualizer");
+ exit(1);
+ }
+}
+/**************************************************************************************************/
+int TaxEqualizer::getHighestLevel(ifstream& in) {
+ try {
+
+ int level = 0;
+ string name, tax;
+
+ while (in) {
+ in >> name >> tax; gobble(in);
+
+ //count levels in this taxonomy
+ int thisLevel = 0;
+ for (int i = 0; i < tax.length(); i++) {
+ if (tax[i] == ';') { thisLevel++; }
+ }
+
+ //save sequences level
+ seqLevels[name] = thisLevel;
+
+ //is this the longest taxonomy?
+ if (thisLevel > level) {
+ level = thisLevel;
+ testTax = tax; //testTax is used to figure out if this file has confidences we need to strip out
+ }
+ }
+
+ int pos = testTax.find_first_of('(');
+
+ //if there are '(' then there are confidences we need to take out
+ if (pos != -1) { containsConfidence = true; }
+
+ return level;
+
+ }
+ catch(exception& e) {
+ errorOut(e, "TaxEqualizer", "getHighestLevel");
+ exit(1);
+ }
+}
+/**************************************************************************************************/
+void TaxEqualizer::extendTaxonomy(string name, string& tax, int desiredLevel) {
+ try {
+
+ //get last taxon
+ tax = tax.substr(0, tax.length()-1); //take off final ";"
+ int pos = tax.find_last_of(';');
+ string lastTaxon = tax.substr(pos+1);
+ lastTaxon += ";"; //add back on delimiting char
+ tax += ";";
+
+ int currentLevel = seqLevels[name];
+
+ //added last taxon until you get desired level
+ for (int i = currentLevel; i < desiredLevel; i++) {
+ tax += lastTaxon;
+ }
+ }
+ catch(exception& e) {
+ errorOut(e, "TaxEqualizer", "extendTaxonomy");
+ exit(1);
+ }
+}
+/**************************************************************************************************/
+void TaxEqualizer::truncateTaxonomy(string name, string& tax, int desiredLevel) {
+ try {
+
+ int currentLevel = seqLevels[name];
+ tax = tax.substr(0, tax.length()-1); //take off final ";"
+
+ //remove a taxon until you get to desired level
+ for (int i = currentLevel; i > desiredLevel; i--) {
+ tax = tax.substr(0, tax.find_last_of(';'));
+ }
+
+ tax += ";";
+ }
+ catch(exception& e) {
+ errorOut(e, "TaxEqualizer", "truncateTaxonomy");
+ exit(1);
+ }
+}
+/**************************************************************************************************/
+void TaxEqualizer::removeConfidences(string& tax) {
+ try {
+
+ string taxon;
+ string newTax = "";
+
+ while (tax.find_first_of(';') != -1) {
+ //get taxon
+ taxon = tax.substr(0,tax.find_first_of(';'));
+ taxon = taxon.substr(0, taxon.find_first_of('(')); //rip off confidence
+ taxon += ";";
+
+ tax = tax.substr(tax.find_first_of(';')+1, tax.length());
+ newTax += taxon;
+ }
+
+ tax = newTax;
+ }
+ catch(exception& e) {
+ errorOut(e, "TaxEqualizer", "removeConfidences");
+ exit(1);
+ }
+}
+
+/**************************************************************************************************/
+
--- /dev/null
+#ifndef TAXONOMYEQUALIZER_H
+#define TAXONOMYEQUALIZER_H
+
+
+/*
+ * taxonomyequalizer.h
+ * Mothur
+ *
+ * Created by westcott on 11/20/09.
+ * Copyright 2009 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "mothur.h"
+
+//reads in taxonomy file and makes all the taxonomies the same length
+//by appending the last taxon to a given taxonomy as many times as needed to
+//make it as long as the longest taxonomy in the file
+
+/**************************************************************************************************/
+
+class TaxEqualizer {
+
+public:
+ TaxEqualizer(string, int);
+ ~TaxEqualizer() {};
+
+ string getEqualizedTaxFile() { return equalizedFile; }
+
+
+private:
+ string equalizedFile, testTax;
+ bool containsConfidence;
+ int cutoff;
+ map<string, int> seqLevels; //maps name to level of taxonomy
+
+ int getHighestLevel(ifstream&); //scans taxonomy file to find taxonomy with highest level
+ void extendTaxonomy(string, string&, int); //name, taxonomy, desired level
+ void truncateTaxonomy(string, string&, int); //name, taxonomy, desired level
+ void removeConfidences(string&); //removes the confidence limits on the taxon
+
+
+};
+
+/**************************************************************************************************/
+
+
+#endif
+
+