]> git.donarmstrong.com Git - mothur.git/commitdiff
added phylotype command which reads a taxonomy file and creates a .list, .rabund...
authorwestcott <westcott>
Mon, 23 Nov 2009 17:27:06 +0000 (17:27 +0000)
committerwestcott <westcott>
Mon, 23 Nov 2009 17:27:06 +0000 (17:27 +0000)
12 files changed:
Mothur.xcodeproj/project.pbxproj
classify.h
classifyseqscommand.cpp
commandfactory.cpp
doTaxonomy.cpp [deleted file]
doTaxonomy.h [deleted file]
phylotree.cpp [new file with mode: 0644]
phylotree.h [new file with mode: 0644]
phylotypecommand.cpp [new file with mode: 0644]
phylotypecommand.h [new file with mode: 0644]
taxonomyequalizer.cpp [new file with mode: 0644]
taxonomyequalizer.h [new file with mode: 0644]

index 977aee98049b1ee387a34f6ce2cbfaf3efc43990..5dc0ea90e1cc5d932d128d2ec885505ffb5d47b7 100644 (file)
                A73329CF1083B3B3003B10C5 /* getlistcountcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A73329CE1083B3B3003B10C5 /* getlistcountcommand.cpp */; };
                A751ACEA1098B283003D0911 /* readcluster.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A751ACE91098B283003D0911 /* readcluster.cpp */; };
                A75B887E104C16860083C454 /* ccode.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A75B887B104C16860083C454 /* ccode.cpp */; };
+               A773805F10B6D6FC002A6A61 /* taxonomyequalizer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A773805E10B6D6FC002A6A61 /* taxonomyequalizer.cpp */; };
+               A773809010B6EFD1002A6A61 /* phylotypecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A773808F10B6EFD1002A6A61 /* phylotypecommand.cpp */; };
                A7861478109F5FA00010AAAF /* classifyseqscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7861477109F5FA00010AAAF /* classifyseqscommand.cpp */; };
                A787810310A06D5D0086103D /* classify.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A787810210A06D5D0086103D /* classify.cpp */; };
-               A78781BA10A0813E0086103D /* doTaxonomy.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A78781B810A0813D0086103D /* doTaxonomy.cpp */; };
+               A78781BA10A0813E0086103D /* phylotree.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A78781B810A0813D0086103D /* phylotree.cpp */; };
                A787821110A0AAE70086103D /* bayesian.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A787821010A0AAE70086103D /* bayesian.cpp */; };
                A78782AB10A1B1CB0086103D /* alignmentdb.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A78782AA10A1B1CB0086103D /* alignmentdb.cpp */; };
                A787844510A1EBDD0086103D /* knn.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A787844410A1EBDD0086103D /* knn.cpp */; };
                A751ACE91098B283003D0911 /* readcluster.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = readcluster.cpp; sourceTree = SOURCE_ROOT; };
                A75B887B104C16860083C454 /* ccode.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = ccode.cpp; sourceTree = SOURCE_ROOT; };
                A75B887C104C16860083C454 /* ccode.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ccode.h; sourceTree = SOURCE_ROOT; };
+               A773805D10B6D6FC002A6A61 /* taxonomyequalizer.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = taxonomyequalizer.h; sourceTree = SOURCE_ROOT; };
+               A773805E10B6D6FC002A6A61 /* taxonomyequalizer.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = taxonomyequalizer.cpp; sourceTree = SOURCE_ROOT; };
+               A773808E10B6EFD1002A6A61 /* phylotypecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = phylotypecommand.h; sourceTree = SOURCE_ROOT; };
+               A773808F10B6EFD1002A6A61 /* phylotypecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = phylotypecommand.cpp; sourceTree = SOURCE_ROOT; };
                A7861476109F5FA00010AAAF /* classifyseqscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = classifyseqscommand.h; sourceTree = SOURCE_ROOT; };
                A7861477109F5FA00010AAAF /* classifyseqscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = classifyseqscommand.cpp; sourceTree = SOURCE_ROOT; };
                A78780FE10A06B8B0086103D /* classify.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = classify.h; sourceTree = SOURCE_ROOT; };
                A787810210A06D5D0086103D /* classify.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = classify.cpp; sourceTree = SOURCE_ROOT; };
-               A78781B810A0813D0086103D /* doTaxonomy.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = doTaxonomy.cpp; sourceTree = SOURCE_ROOT; };
-               A78781B910A0813D0086103D /* doTaxonomy.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = doTaxonomy.h; sourceTree = SOURCE_ROOT; };
+               A78781B810A0813D0086103D /* phylotree.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = phylotree.cpp; sourceTree = SOURCE_ROOT; };
+               A78781B910A0813D0086103D /* phylotree.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = phylotree.h; sourceTree = SOURCE_ROOT; };
                A787820F10A0AAE70086103D /* bayesian.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = bayesian.h; sourceTree = SOURCE_ROOT; };
                A787821010A0AAE70086103D /* bayesian.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = bayesian.cpp; sourceTree = SOURCE_ROOT; };
                A78782A910A1B1CB0086103D /* alignmentdb.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = alignmentdb.h; sourceTree = SOURCE_ROOT; };
                                375873F60F7D649C0040F377 /* nocommands.cpp */,
                                3792946E0F2E191800B9034A /* parsimonycommand.h */,
                                3792946F0F2E191800B9034A /* parsimonycommand.cpp */,
+                               A773808E10B6EFD1002A6A61 /* phylotypecommand.h */,
+                               A773808F10B6EFD1002A6A61 /* phylotypecommand.cpp */,
                                37D927FE0F21331F001D4494 /* quitcommand.h */,
                                37D927FD0F21331F001D4494 /* quitcommand.cpp */,
                                37D928080F21331F001D4494 /* rarefactcommand.h */,
                                A787810210A06D5D0086103D /* classify.cpp */,
                                A787820F10A0AAE70086103D /* bayesian.h */,
                                A787821010A0AAE70086103D /* bayesian.cpp */,
-                               A78781B910A0813D0086103D /* doTaxonomy.h */,
-                               A78781B810A0813D0086103D /* doTaxonomy.cpp */,
+                               A773805D10B6D6FC002A6A61 /* taxonomyequalizer.h */,
+                               A773805E10B6D6FC002A6A61 /* taxonomyequalizer.cpp */,
+                               A78781B910A0813D0086103D /* phylotree.h */,
+                               A78781B810A0813D0086103D /* phylotree.cpp */,
                                A787844310A1EBDD0086103D /* knn.h */,
                                A787844410A1EBDD0086103D /* knn.cpp */,
                        );
                                A751ACEA1098B283003D0911 /* readcluster.cpp in Sources */,
                                A7861478109F5FA00010AAAF /* classifyseqscommand.cpp in Sources */,
                                A787810310A06D5D0086103D /* classify.cpp in Sources */,
-                               A78781BA10A0813E0086103D /* doTaxonomy.cpp in Sources */,
+                               A78781BA10A0813E0086103D /* phylotree.cpp in Sources */,
                                A787821110A0AAE70086103D /* bayesian.cpp in Sources */,
                                A78782AB10A1B1CB0086103D /* alignmentdb.cpp in Sources */,
                                A787844510A1EBDD0086103D /* knn.cpp in Sources */,
+                               A773805F10B6D6FC002A6A61 /* taxonomyequalizer.cpp in Sources */,
+                               A773809010B6EFD1002A6A61 /* phylotypecommand.cpp in Sources */,
                        );
                        runOnlyForDeploymentPostprocessing = 0;
                };
index f13224a14fe456d8acc0a868047145492b9e62e6..a8978732af92688dfddc2ede9d1d5c13c636ea06 100644 (file)
@@ -15,7 +15,7 @@
 
 #include "mothur.h"
 #include "database.hpp"
-#include "doTaxonomy.h"
+#include "phylotree.h"
 
 
 class Sequence;
index 40bf93817976d1a81a12c9e5604ca470b8bf33cb..d2f8fc748f3064bd03a525ce6d9e6bb7895d55fe 100644 (file)
 #include "classifyseqscommand.h"
 #include "sequence.hpp"
 #include "bayesian.h"
-#include "doTaxonomy.h"
+#include "phylotree.h"
 #include "knn.h"
 
 //**********************************************************************************************************************
 
 ClassifySeqsCommand::ClassifySeqsCommand(string option){
        try {
-               //              globaldata = GlobalData::getInstance();
                abort = false;
                
                //allow user to run help
index 030079ed21e3a6d4f35263f45d46a67878715e5b..ed1801a8bd58bb46fc9afc5e714072547ae4305f 100644 (file)
@@ -59,6 +59,7 @@
 #include "getlistcountcommand.h"
 #include "hclustercommand.h"
 #include "classifyseqscommand.h"
+#include "phylotypecommand.h"
 
 /***********************************************************/
 
@@ -117,6 +118,7 @@ CommandFactory::CommandFactory(){
        commands["quit"]                                = "quit"; 
        commands["hcluster"]                    = "hcluster"; 
        commands["classify.seqs"]               = "classify.seqs"; 
+       commands["phylotype"]                   = "phylotype";
 
 }
 /***********************************************************/
@@ -183,6 +185,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString){
                else if(commandName == "get.listcount")                 {       command = new GetListCountCommand(optionString);                }
                else if(commandName == "hcluster")                              {       command = new HClusterCommand(optionString);                    }
                else if(commandName == "classify.seqs")                 {       command = new ClassifySeqsCommand(optionString);                }
+               else if(commandName == "phylotype")                             {       command = new PhylotypeCommand(optionString);                   }
                else                                                                                    {       command = new NoCommand(optionString);                                  }
 
                return command;
diff --git a/doTaxonomy.cpp b/doTaxonomy.cpp
deleted file mode 100644 (file)
index 40869b9..0000000
+++ /dev/null
@@ -1,167 +0,0 @@
-/*
- *  doTaxonomy.cpp
- *  
- *
- *  Created by Pat Schloss on 6/17/09.
- *  Copyright 2009 Patrick D. Schloss. All rights reserved.
- *
- */
-
-#include "doTaxonomy.h"
-
-/**************************************************************************************************/
-
-PhyloTree::PhyloTree(){
-       try {
-               numNodes = 1;
-               numSeqs = 0;
-               tree.push_back(TaxNode("Root"));
-               tree[0].heirarchyID = "0";
-       }
-       catch(exception& e) {
-               errorOut(e, "PhyloTree", "PhyloTree");
-               exit(1);
-       }
-}
-
-/**************************************************************************************************/
-
-string PhyloTree::getNextTaxon(string& heirarchy){
-       try {
-               string currentLevel = "";
-               if(heirarchy != ""){
-                       currentLevel=heirarchy.substr(0,heirarchy.find_first_of(';'));
-                       heirarchy=heirarchy.substr(heirarchy.find_first_of(';')+1);
-               }
-               return currentLevel;
-       }
-       catch(exception& e) {
-               errorOut(e, "PhyloTree", "getNextTaxon");
-               exit(1);
-       }
-}
-
-/**************************************************************************************************/
-
-void PhyloTree::addSeqToTree(string seqName, string seqTaxonomy){
-       try {
-               numSeqs++;
-               
-               map<string, int>::iterator childPointer;
-               
-               int currentNode = 0;
-               int level = 1;
-               
-               tree[0].accessions.push_back(seqName);
-               string taxon;// = getNextTaxon(seqTaxonomy);
-               
-               while(seqTaxonomy != ""){
-                       
-                       level++;
-                       
-                       //somehow the parent is getting one too many accnos
-                       //use print to reassign the taxa id
-                       taxon = getNextTaxon(seqTaxonomy);
-                       
-                       childPointer = tree[currentNode].children.find(taxon);
-                       
-                       if(childPointer != tree[currentNode].children.end()){   //if the node already exists, move on
-                               currentNode = childPointer->second;
-                               tree[currentNode].accessions.push_back(seqName);
-                               name2Taxonomy[seqName] = currentNode;
-                       }
-                       else{                                                                                   //otherwise, create it
-                               tree.push_back(TaxNode(taxon));
-                               numNodes++;
-                               tree[currentNode].children[taxon] = numNodes-1;
-                               tree[numNodes-1].parent = currentNode;
-                               
-                               //                      int numChildren = tree[currentNode].children.size();
-                               //                      string heirarchyID = tree[currentNode].heirarchyID;
-                               //                      tree[currentNode].accessions.push_back(seqName);
-                               
-                               currentNode = tree[currentNode].children[taxon];
-                               tree[currentNode].accessions.push_back(seqName);
-                               name2Taxonomy[seqName] = currentNode;
-                               //                      tree[currentNode].level = level;
-                               //                      tree[currentNode].childNumber = numChildren;
-                               //                      tree[currentNode].heirarchyID = heirarchyID + '.' + toString(tree[currentNode].childNumber);
-                       }
-               
-                       if (seqTaxonomy == "") {   uniqueTaxonomies[currentNode] = currentNode; }
-               }
-
-       }
-       catch(exception& e) {
-               errorOut(e, "PhyloTree", "addSeqToTree");
-               exit(1);
-       }
-}
-/**************************************************************************************************/
-vector<int> PhyloTree::getGenusNodes() {
-       try {
-               genusIndex.clear();
-               //generate genusIndexes
-               map<int, int>::iterator it2;
-               for (it2=uniqueTaxonomies.begin(); it2!=uniqueTaxonomies.end(); it2++) {  genusIndex.push_back(it2->first);     }
-               
-               return genusIndex;
-       }
-       catch(exception& e) {
-               errorOut(e, "PhyloTree", "getGenusNodes");
-               exit(1);
-       }
-}
-/**************************************************************************************************/
-
-void PhyloTree::assignHeirarchyIDs(int index){
-       try {
-               map<string,int>::iterator it;
-               int counter = 1;
-               
-               for(it=tree[index].children.begin();it!=tree[index].children.end();it++){
-                       tree[it->second].heirarchyID = tree[index].heirarchyID + '.' + toString(counter);
-                       counter++;
-                       tree[it->second].level = tree[index].level + 1;
-                       assignHeirarchyIDs(it->second);
-               }
-       }
-       catch(exception& e) {
-               errorOut(e, "PhyloTree", "assignHeirarchyIDs");
-               exit(1);
-       }
-}
-
-/**************************************************************************************************/
-
-void PhyloTree::print(ofstream& out){
-       try {
-               out << tree[0].level << '\t'<< tree[0].heirarchyID << '\t' << tree[0].name << '\t' << tree[0].children.size() << '\t' << tree[0].accessions.size() << endl;
-               print(0, out);
-       }
-       catch(exception& e) {
-               errorOut(e, "PhyloTree", "print");
-               exit(1);
-       }
-}
-
-/**************************************************************************************************/
-
-void PhyloTree::print(int i, ofstream& out){
-       try {
-               map<string,int>::iterator it;
-               for(it=tree[i].children.begin();it!=tree[i].children.end();it++){
-                       out <<tree[it->second].level << '\t' << tree[it->second].heirarchyID << '\t' << tree[it->second].name << '\t' << tree[it->second].children.size() << '\t' << tree[it->second].accessions.size() << endl;
-                       print(it->second, out);
-               }
-       }
-       catch(exception& e) {
-               errorOut(e, "PhyloTree", "print");
-               exit(1);
-       }
-}
-
-/**************************************************************************************************/
-
-
-       
diff --git a/doTaxonomy.h b/doTaxonomy.h
deleted file mode 100644 (file)
index 6f83f19..0000000
+++ /dev/null
@@ -1,57 +0,0 @@
-#ifndef DOTAXONOMY_H
-#define DOTAXONOMY_H
-
-/*
- *  doTaxonomy.h
- *  
- *
- *  Created by Pat Schloss on 6/17/09.
- *  Copyright 2009 Patrick D. Schloss. All rights reserved.
- *
- */
-
-#include "mothur.h"
-
-/**************************************************************************************************/
-
-struct TaxNode {
-       vector<string> accessions;      //names of seqs in this branch of tree
-       map<string, int> children;  //childs name to index in tree
-       int parent, childNumber, level;
-       string name, heirarchyID;
-       
-       TaxNode(string n) : name(n), level(0), parent(-1) {             }
-       TaxNode(){}
-};
-
-/**************************************************************************************************/
-
-class PhyloTree {
-
-public:
-       PhyloTree();
-       ~PhyloTree() {};
-       void addSeqToTree(string, string);
-       void assignHeirarchyIDs(int);
-       void print(ofstream&);
-       vector<int> getGenusNodes();    
-       TaxNode get(int i)                              {       return tree[i]; }
-       TaxNode get(string seqName)             {       return tree[name2Taxonomy[seqName]];    }
-       int getIndex(string seqName)    {       return name2Taxonomy[seqName];  }
-       string getName(int i)                   {       return tree[i].name;    }
-private:
-       string getNextTaxon(string&);
-       vector<TaxNode> tree;
-       vector<int> genusIndex; //holds the indexes in tree where the genus level taxonomies are stored
-       map<string, int> name2Taxonomy;  //maps name to index in tree
-       map<int, int> uniqueTaxonomies;  //map of unique taxonomies
-       void print(int, ofstream&);
-       int numNodes;
-       int numSeqs;
-};
-
-/**************************************************************************************************/
-
-#endif
-
-
diff --git a/phylotree.cpp b/phylotree.cpp
new file mode 100644 (file)
index 0000000..2a44192
--- /dev/null
@@ -0,0 +1,195 @@
+/*
+ *  doTaxonomy.cpp
+ *  
+ *
+ *  Created by Pat Schloss on 6/17/09.
+ *  Copyright 2009 Patrick D. Schloss. All rights reserved.
+ *
+ */
+
+#include "phylotree.h"
+
+/**************************************************************************************************/
+
+PhyloTree::PhyloTree(){
+       try {
+               numNodes = 1;
+               numSeqs = 0;
+               tree.push_back(TaxNode("Root"));
+               tree[0].heirarchyID = "0";
+       }
+       catch(exception& e) {
+               errorOut(e, "PhyloTree", "PhyloTree");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+
+PhyloTree::PhyloTree(string tfile){
+       try {
+               numNodes = 1;
+               numSeqs = 0;
+               tree.push_back(TaxNode("Root"));
+               tree[0].heirarchyID = "0";
+               
+               ifstream in;
+               openInputFile(tfile, in);
+               
+               //read in users taxonomy file and add sequences to tree
+               string name, tax;
+               while(!in.eof()){
+                       in >> name >> tax; gobble(in);
+                       
+                       addSeqToTree(name, tax);
+               }
+               in.close();
+       
+               assignHeirarchyIDs(0);
+       }
+       catch(exception& e) {
+               errorOut(e, "PhyloTree", "PhyloTree");
+               exit(1);
+       }
+}
+
+/**************************************************************************************************/
+
+string PhyloTree::getNextTaxon(string& heirarchy){
+       try {
+               string currentLevel = "";
+               if(heirarchy != ""){
+                       currentLevel=heirarchy.substr(0,heirarchy.find_first_of(';'));
+                       heirarchy=heirarchy.substr(heirarchy.find_first_of(';')+1);
+               }
+               return currentLevel;
+       }
+       catch(exception& e) {
+               errorOut(e, "PhyloTree", "getNextTaxon");
+               exit(1);
+       }
+}
+
+/**************************************************************************************************/
+
+void PhyloTree::addSeqToTree(string seqName, string seqTaxonomy){
+       try {
+               numSeqs++;
+               
+               map<string, int>::iterator childPointer;
+               
+               int currentNode = 0;
+               int level = 1;
+               
+               tree[0].accessions.push_back(seqName);
+               string taxon;// = getNextTaxon(seqTaxonomy);
+               
+               while(seqTaxonomy != ""){
+                       
+                       level++;
+                       
+                       //somehow the parent is getting one too many accnos
+                       //use print to reassign the taxa id
+                       taxon = getNextTaxon(seqTaxonomy);
+                       
+                       childPointer = tree[currentNode].children.find(taxon);
+                       
+                       if(childPointer != tree[currentNode].children.end()){   //if the node already exists, move on
+                               currentNode = childPointer->second;
+                               tree[currentNode].accessions.push_back(seqName);
+                               name2Taxonomy[seqName] = currentNode;
+                       }
+                       else{                                                                                   //otherwise, create it
+                               tree.push_back(TaxNode(taxon));
+                               numNodes++;
+                               tree[currentNode].children[taxon] = numNodes-1;
+                               tree[numNodes-1].parent = currentNode;
+                               
+                               //                      int numChildren = tree[currentNode].children.size();
+                               //                      string heirarchyID = tree[currentNode].heirarchyID;
+                               //                      tree[currentNode].accessions.push_back(seqName);
+                               
+                               currentNode = tree[currentNode].children[taxon];
+                               tree[currentNode].accessions.push_back(seqName);
+                               name2Taxonomy[seqName] = currentNode;
+                               //                      tree[currentNode].level = level;
+                               //                      tree[currentNode].childNumber = numChildren;
+                               //                      tree[currentNode].heirarchyID = heirarchyID + '.' + toString(tree[currentNode].childNumber);
+                       }
+               
+                       if (seqTaxonomy == "") {   uniqueTaxonomies[currentNode] = currentNode; }
+               }
+
+       }
+       catch(exception& e) {
+               errorOut(e, "PhyloTree", "addSeqToTree");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+vector<int> PhyloTree::getGenusNodes() {
+       try {
+               genusIndex.clear();
+               //generate genusIndexes
+               map<int, int>::iterator it2;
+               for (it2=uniqueTaxonomies.begin(); it2!=uniqueTaxonomies.end(); it2++) {  genusIndex.push_back(it2->first);     }
+               
+               return genusIndex;
+       }
+       catch(exception& e) {
+               errorOut(e, "PhyloTree", "getGenusNodes");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+
+void PhyloTree::assignHeirarchyIDs(int index){
+       try {
+               map<string,int>::iterator it;
+               int counter = 1;
+               
+               for(it=tree[index].children.begin();it!=tree[index].children.end();it++){
+                       tree[it->second].heirarchyID = tree[index].heirarchyID + '.' + toString(counter);
+                       counter++;
+                       tree[it->second].level = tree[index].level + 1;
+                       assignHeirarchyIDs(it->second);
+               }
+       }
+       catch(exception& e) {
+               errorOut(e, "PhyloTree", "assignHeirarchyIDs");
+               exit(1);
+       }
+}
+
+/**************************************************************************************************/
+
+void PhyloTree::print(ofstream& out){
+       try {
+               out << tree[0].level << '\t'<< tree[0].heirarchyID << '\t' << tree[0].name << '\t' << tree[0].children.size() << '\t' << tree[0].accessions.size() << endl;
+               print(0, out);
+       }
+       catch(exception& e) {
+               errorOut(e, "PhyloTree", "print");
+               exit(1);
+       }
+}
+
+/**************************************************************************************************/
+
+void PhyloTree::print(int i, ofstream& out){
+       try {
+               map<string,int>::iterator it;
+               for(it=tree[i].children.begin();it!=tree[i].children.end();it++){
+                       out <<tree[it->second].level << '\t' << tree[it->second].heirarchyID << '\t' << tree[it->second].name << '\t' << tree[it->second].children.size() << '\t' << tree[it->second].accessions.size() << endl;
+                       print(it->second, out);
+               }
+       }
+       catch(exception& e) {
+               errorOut(e, "PhyloTree", "print");
+               exit(1);
+       }
+}
+
+/**************************************************************************************************/
+
+
+       
diff --git a/phylotree.h b/phylotree.h
new file mode 100644 (file)
index 0000000..f8365d5
--- /dev/null
@@ -0,0 +1,58 @@
+#ifndef DOTAXONOMY_H
+#define DOTAXONOMY_H
+
+/*
+ * phylotree.h
+ *  
+ *
+ *  Created by Pat Schloss on 6/17/09.
+ *  Copyright 2009 Patrick D. Schloss. All rights reserved.
+ *
+ */
+
+#include "mothur.h"
+
+/**************************************************************************************************/
+
+struct TaxNode {
+       vector<string> accessions;      //names of seqs in this branch of tree
+       map<string, int> children;  //childs name to index in tree
+       int parent, childNumber, level;
+       string name, heirarchyID;
+       
+       TaxNode(string n) : name(n), level(0), parent(-1) {             }
+       TaxNode(){}
+};
+
+/**************************************************************************************************/
+
+class PhyloTree {
+
+public:
+       PhyloTree();
+       PhyloTree(string);  //pass it a taxonomy file and it makes the tree
+       ~PhyloTree() {};
+       void addSeqToTree(string, string);
+       void assignHeirarchyIDs(int);
+       void print(ofstream&);
+       vector<int> getGenusNodes();    
+       TaxNode get(int i)                              {       return tree[i]; }
+       TaxNode get(string seqName)             {       return tree[name2Taxonomy[seqName]];    }
+       int getIndex(string seqName)    {       return name2Taxonomy[seqName];  }
+       string getName(int i)                   {       return tree[i].name;    }
+private:
+       string getNextTaxon(string&);
+       vector<TaxNode> tree;
+       vector<int> genusIndex; //holds the indexes in tree where the genus level taxonomies are stored
+       map<string, int> name2Taxonomy;  //maps name to index in tree
+       map<int, int> uniqueTaxonomies;  //map of unique taxonomies
+       void print(int, ofstream&);
+       int numNodes;
+       int numSeqs;
+};
+
+/**************************************************************************************************/
+
+#endif
+
+
diff --git a/phylotypecommand.cpp b/phylotypecommand.cpp
new file mode 100644 (file)
index 0000000..ef87282
--- /dev/null
@@ -0,0 +1,194 @@
+/*
+ *  phylotypecommand.cpp
+ *  Mothur
+ *
+ *  Created by westcott on 11/20/09.
+ *  Copyright 2009 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "phylotypecommand.h"
+#include "phylotree.h"
+#include "listvector.hpp"
+#include "rabundvector.hpp"
+#include "sabundvector.hpp"
+
+/**********************************************************************************************************************/
+PhylotypeCommand::PhylotypeCommand(string option){
+       try {
+               abort = false;
+               
+               //allow user to run help
+               if(option == "help") { help(); abort = true; }
+               
+               else {
+                       
+                       //valid paramters for this command
+                       string AlignArray[] =  {"taxonomy","cutoff","label"};
+                       vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
+                       
+                       OptionParser parser(option);
+                       map<string, string> parameters = parser.getParameters(); 
+                       
+                       ValidParameters validParameter;
+                       
+                       //check to make sure all parameters are valid for command
+                       for (map<string, string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
+                               if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
+                       }
+                       
+                       taxonomyFileName = validParameter.validFile(parameters, "taxonomy", true);
+                       if (taxonomyFileName == "not found") { 
+                               mothurOut("taxonomy is a required parameter for the phylotype command."); 
+                               mothurOutEndLine();
+                               abort = true; 
+                       }else if (taxonomyFileName == "not open") { abort = true; }     
+                       
+                       string temp = validParameter.validFile(parameters, "cutoff", false);
+                       if (temp == "not found") { temp = "-1"; }
+                       convert(temp, cutoff); 
+                       
+                       label = validParameter.validFile(parameters, "label", false);                   
+                       if (label == "not found") { label = ""; allLines = 1; }
+                       else { 
+                               if(label != "all") {  splitAtDash(label, labels);  allLines = 0;  }
+                               else { allLines = 1;  }
+                       }
+                       
+               }
+       }
+       catch(exception& e) {
+               errorOut(e, "PhylotypeCommand", "PhylotypeCommand");
+               exit(1);
+       }
+}
+/**********************************************************************************************************************/
+
+void PhylotypeCommand::help(){
+       try {
+               mothurOut("The phylotype command reads a taxonomy file and outputs a .list, .rabund and .sabund file. \n");
+               mothurOut("The phylotype command parameter options are taxonomy, cutoff and label. The taxonomy parameter is required.\n");
+               mothurOut("The cutoff parameter allows you to specify the level you want to stop at.  The default is the highest level in your taxonomy file. \n");
+               mothurOut("The label parameter allows you to specify which level you would like, and are separated by dashes.  The default all levels in your taxonomy file. \n");
+               mothurOut("The phylotype command should be in the following format: \n");
+               mothurOut("phylotype(taxonomy=yourTaxonomyFile, cutoff=yourCutoff, label=yourLabels) \n");
+               mothurOut("Eaxample: phylotype(taxonomy=amazon.taxonomy, cutoff=5, label=1-3-5).\n\n");
+       }
+       catch(exception& e) {
+               errorOut(e, "PhylotypeCommand", "help");
+               exit(1);
+       }
+}
+/**********************************************************************************************************************/
+
+PhylotypeCommand::~PhylotypeCommand(){}
+
+/**********************************************************************************************************************/
+
+int PhylotypeCommand::execute(){
+       try {
+       
+               if (abort == true) { return 0; }
+               
+               //reads in taxonomy file and makes all the taxonomies the same length 
+               //by appending the last taxon to a given taxonomy as many times as needed to 
+               //make it as long as the longest taxonomy in the file 
+               TaxEqualizer* taxEqual = new TaxEqualizer(taxonomyFileName, cutoff);
+               
+               string equalizedTaxFile = taxEqual->getEqualizedTaxFile();
+               
+               delete taxEqual;
+               
+               //build taxonomy tree from equalized file
+               PhyloTree* tree = new PhyloTree(equalizedTaxFile);
+               vector<int> leaves = tree->getGenusNodes();
+               
+               //store leaf nodes in current map
+               for (int i = 0; i < leaves.size(); i++)         {       currentNodes[leaves[i]] = leaves[i];    }
+               
+               bool done = false;
+               if (tree->get(leaves[0]).parent == -1) {  mothurOut("Empty Tree"); mothurOutEndLine();  done = true;    }
+               
+               ofstream outList;
+               string outputListFile = getRootName(taxonomyFileName) + "tax.list";
+               openOutputFile(outputListFile, outList);
+               ofstream outSabund;
+               string outputSabundFile = getRootName(taxonomyFileName) + "tax.sabund";
+               openOutputFile(outputSabundFile, outSabund);
+               ofstream outRabund;
+               string outputRabundFile = getRootName(taxonomyFileName) + "tax.rabund";
+               openOutputFile(outputRabundFile, outRabund);
+               
+                               
+               //start at leaves of tree and work towards root, processing the labels the user wants
+               while((!done) && ((allLines == 1) || (labels.size() != 0))) {
+               
+                       string level = toString(tree->get(currentNodes.begin()->first).level); 
+                       
+                       //is this a level the user want output for
+                       if(allLines == 1 || labels.count(level) == 1){  
+                               
+                               //output level
+                               mothurOut(level); mothurOutEndLine();
+                               
+                               ListVector list;
+                               list.setLabel(level);
+                               //go through nodes and build listvector 
+                               for (itCurrent = currentNodes.begin(); itCurrent != currentNodes.end(); itCurrent++) {
+                       
+                                       //get parents
+                                       TaxNode node = tree->get(itCurrent->first);
+                                       parentNodes[node.parent] = node.parent;
+                                       
+                                       vector<string> names = node.accessions;
+                                       
+                                       //make the names compatable with listvector
+                                       string name = "";
+                                       for (int i = 0; i < names.size(); i++) {  name += names[i] + ",";       }
+                                       name = name.substr(0, name.length()-1);  //rip off extra ','
+                                       
+                                       //add bin to list vector
+                                       list.push_back(name);
+                               }       
+                               
+                               //print listvector
+                               list.print(outList);
+                               //print rabund
+                               list.getRAbundVector().print(outRabund);
+                               //print sabund
+                               list.getSAbundVector().print(outSabund);
+                       
+                               labels.erase(level);
+                               
+                       }else {
+                               
+                               //just get parents
+                               for (itCurrent = currentNodes.begin(); itCurrent != currentNodes.end(); itCurrent++) {
+                                       int parent = tree->get(itCurrent->first).parent;
+                                       parentNodes[parent] = parent;
+                               }
+                       }
+                       
+                       //move up a level
+                       currentNodes = parentNodes;
+                       parentNodes.clear();
+                       
+                       //have we reached the rootnode
+                       if (tree->get(currentNodes.begin()->first).parent == -1)  {  done = true;  }
+               }
+                       
+               outList.close();
+               outSabund.close();
+               outRabund.close();      
+               
+               delete tree;
+               
+               return 0;               
+       }
+
+       catch(exception& e) {
+               errorOut(e, "PhylotypeCommand", "execute");
+               exit(1);
+       }
+}
+/**********************************************************************************************************************/
diff --git a/phylotypecommand.h b/phylotypecommand.h
new file mode 100644 (file)
index 0000000..0322daf
--- /dev/null
@@ -0,0 +1,46 @@
+#ifndef PHYLOTYPECOMMAND_H
+#define PHYLOTYPECOMMAND_H
+
+
+/*
+ *  phylotypecommand.h
+ *  Mothur
+ *
+ *  Created by westcott on 11/20/09.
+ *  Copyright 2009 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "taxonomyequalizer.h"
+#include "command.hpp"
+
+/*************************************************************************/
+
+class PhylotypeCommand : public Command {
+       
+public:
+       PhylotypeCommand(string);       
+       ~PhylotypeCommand();
+       int execute(); 
+       void help();    
+       
+private:
+       bool abort, allLines;
+       string taxonomyFileName, label;
+       set<string> labels; //holds labels to be used
+       int cutoff;
+       
+       map<int, int> currentNodes;
+       map<int, int> parentNodes;
+       map<int, int>::iterator itCurrent;
+
+};
+
+
+/*************************************************************************/
+
+
+#endif
+
+
+
diff --git a/taxonomyequalizer.cpp b/taxonomyequalizer.cpp
new file mode 100644 (file)
index 0000000..37b7720
--- /dev/null
@@ -0,0 +1,169 @@
+/*
+ *  taxonomyequalizer.cpp
+ *  Mothur
+ *
+ *  Created by westcott on 11/20/09.
+ *  Copyright 2009 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "taxonomyequalizer.h"
+
+/**************************************************************************************************/
+TaxEqualizer::TaxEqualizer(string tfile, int c) : cutoff(c) {
+       try {
+               containsConfidence = false;
+               
+               ifstream inTax;
+               openInputFile(tfile, inTax);
+       
+               int highestLevel = getHighestLevel(inTax);
+       
+               //if the user has specified a cutoff and it's smaller than the highest level
+               if ((cutoff != -1) && (cutoff < highestLevel)) { 
+                       highestLevel = cutoff;
+               }else if (cutoff > highestLevel) {
+                       mothurOut("The highest level taxonomy you have is " + toString(highestLevel) + " and your cutoff is " + toString(cutoff) + ". I will set the cutoff to " + toString(highestLevel));
+                       mothurOutEndLine();
+               }
+               
+               inTax.close();  
+               openInputFile(tfile, inTax);
+               
+               ofstream out;
+               equalizedFile = getRootName(tfile) + "equalized.taxonomy";
+               openOutputFile(equalizedFile, out);
+               
+               string name, tax;
+               while (inTax) {
+                       inTax >> name >> tax;   gobble(inTax);
+                       
+                       if (containsConfidence) {  removeConfidences(tax);      }
+                       
+                       //is this a taxonomy that needs to be extended?
+                       if (seqLevels[name] < highestLevel) {
+                               extendTaxonomy(name, tax, highestLevel);
+                       }else if (seqLevels[name] > highestLevel) { //this can happen if hte user enters a cutoff
+                               truncateTaxonomy(name, tax, highestLevel);
+                       }
+                       
+                       out << name << '\t' << tax << endl;
+               }
+               
+               inTax.close();
+               out.close();
+                                       
+       }
+       catch(exception& e) {
+               errorOut(e, "TaxEqualizer", "TaxEqualizer");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+int TaxEqualizer::getHighestLevel(ifstream& in) {
+       try {
+               
+               int level = 0;
+               string name, tax;
+               
+               while (in) {
+                       in >> name >> tax;   gobble(in);
+               
+                       //count levels in this taxonomy
+                       int thisLevel = 0;
+                       for (int i = 0; i < tax.length(); i++) {
+                               if (tax[i] == ';') {  thisLevel++;      }
+                       }
+               
+                       //save sequences level
+                       seqLevels[name] = thisLevel;
+               
+                       //is this the longest taxonomy?
+                       if (thisLevel > level) {  
+                               level = thisLevel;  
+                               testTax = tax; //testTax is used to figure out if this file has confidences we need to strip out
+                       }  
+               }
+               
+               int pos = testTax.find_first_of('(');
+
+               //if there are '(' then there are confidences we need to take out
+               if (pos != -1) {  containsConfidence = true;  }
+               
+               return level;
+                                       
+       }
+       catch(exception& e) {
+               errorOut(e, "TaxEqualizer", "getHighestLevel");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+void TaxEqualizer::extendTaxonomy(string name, string& tax, int desiredLevel) {
+       try {
+                       
+               //get last taxon
+               tax = tax.substr(0, tax.length()-1);  //take off final ";"
+               int pos = tax.find_last_of(';');
+               string lastTaxon = tax.substr(pos+1);  
+               lastTaxon += ";"; //add back on delimiting char
+               tax += ";";
+               
+               int currentLevel = seqLevels[name];
+               
+               //added last taxon until you get desired level
+               for (int i = currentLevel; i < desiredLevel; i++) {
+                       tax += lastTaxon;
+               }
+       }
+       catch(exception& e) {
+               errorOut(e, "TaxEqualizer", "extendTaxonomy");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+void TaxEqualizer::truncateTaxonomy(string name, string& tax, int desiredLevel) {
+       try {
+                       
+               int currentLevel = seqLevels[name];
+               tax = tax.substr(0, tax.length()-1);  //take off final ";"
+               
+               //remove a taxon until you get to desired level
+               for (int i = currentLevel; i > desiredLevel; i--) {
+                       tax = tax.substr(0,  tax.find_last_of(';'));
+               }
+               
+               tax += ";";
+       }
+       catch(exception& e) {
+               errorOut(e, "TaxEqualizer", "truncateTaxonomy");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+void TaxEqualizer::removeConfidences(string& tax) {
+       try {
+               
+               string taxon;
+               string newTax = "";
+               
+               while (tax.find_first_of(';') != -1) {
+                       //get taxon
+                       taxon = tax.substr(0,tax.find_first_of(';'));
+                       taxon = taxon.substr(0, taxon.find_first_of('(')); //rip off confidence
+                       taxon += ";";
+                       
+                       tax = tax.substr(tax.find_first_of(';')+1, tax.length());
+                       newTax += taxon;
+               }
+               
+               tax = newTax;
+       }
+       catch(exception& e) {
+               errorOut(e, "TaxEqualizer", "removeConfidences");
+               exit(1);
+       }
+}
+
+/**************************************************************************************************/
+
diff --git a/taxonomyequalizer.h b/taxonomyequalizer.h
new file mode 100644 (file)
index 0000000..6308d71
--- /dev/null
@@ -0,0 +1,50 @@
+#ifndef TAXONOMYEQUALIZER_H
+#define TAXONOMYEQUALIZER_H
+
+
+/*
+ *  taxonomyequalizer.h
+ *  Mothur
+ *
+ *  Created by westcott on 11/20/09.
+ *  Copyright 2009 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "mothur.h"
+
+//reads in taxonomy file and makes all the taxonomies the same length 
+//by appending the last taxon to a given taxonomy as many times as needed to 
+//make it as long as the longest taxonomy in the file 
+
+/**************************************************************************************************/
+
+class TaxEqualizer  {
+       
+public:
+       TaxEqualizer(string, int);
+       ~TaxEqualizer() {};
+       
+       string getEqualizedTaxFile()    {  return equalizedFile;        }
+       
+       
+private:
+       string equalizedFile, testTax;
+       bool containsConfidence;
+       int cutoff;
+       map<string, int> seqLevels;  //maps name to level of taxonomy
+       
+       int getHighestLevel(ifstream&);  //scans taxonomy file to find taxonomy with highest level
+       void extendTaxonomy(string, string&, int);  //name, taxonomy, desired level
+       void truncateTaxonomy(string, string&, int);  //name, taxonomy, desired level
+       void removeConfidences(string&);  //removes the confidence limits on the taxon 
+
+       
+};
+
+/**************************************************************************************************/
+
+
+#endif
+
+