From 7a2154809d332281cf4006943a9bd94b8208c837 Mon Sep 17 00:00:00 2001 From: westcott Date: Mon, 23 Nov 2009 17:27:06 +0000 Subject: [PATCH] added phylotype command which reads a taxonomy file and creates a .list, .rabund and .sabund file. --- Mothur.xcodeproj/project.pbxproj | 24 +++- classify.h | 2 +- classifyseqscommand.cpp | 3 +- commandfactory.cpp | 3 + doTaxonomy.cpp => phylotree.cpp | 30 ++++- doTaxonomy.h => phylotree.h | 3 +- phylotypecommand.cpp | 194 +++++++++++++++++++++++++++++++ phylotypecommand.h | 46 ++++++++ taxonomyequalizer.cpp | 169 +++++++++++++++++++++++++++ taxonomyequalizer.h | 50 ++++++++ 10 files changed, 513 insertions(+), 11 deletions(-) rename doTaxonomy.cpp => phylotree.cpp (88%) rename doTaxonomy.h => phylotree.h (94%) create mode 100644 phylotypecommand.cpp create mode 100644 phylotypecommand.h create mode 100644 taxonomyequalizer.cpp create mode 100644 taxonomyequalizer.h diff --git a/Mothur.xcodeproj/project.pbxproj b/Mothur.xcodeproj/project.pbxproj index 977aee9..5dc0ea9 100644 --- a/Mothur.xcodeproj/project.pbxproj +++ b/Mothur.xcodeproj/project.pbxproj @@ -168,9 +168,11 @@ A73329CF1083B3B3003B10C5 /* getlistcountcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A73329CE1083B3B3003B10C5 /* getlistcountcommand.cpp */; }; A751ACEA1098B283003D0911 /* readcluster.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A751ACE91098B283003D0911 /* readcluster.cpp */; }; A75B887E104C16860083C454 /* ccode.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A75B887B104C16860083C454 /* ccode.cpp */; }; + A773805F10B6D6FC002A6A61 /* taxonomyequalizer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A773805E10B6D6FC002A6A61 /* taxonomyequalizer.cpp */; }; + A773809010B6EFD1002A6A61 /* phylotypecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A773808F10B6EFD1002A6A61 /* phylotypecommand.cpp */; }; A7861478109F5FA00010AAAF /* classifyseqscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7861477109F5FA00010AAAF /* classifyseqscommand.cpp */; }; A787810310A06D5D0086103D /* classify.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A787810210A06D5D0086103D /* classify.cpp */; }; - A78781BA10A0813E0086103D /* doTaxonomy.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A78781B810A0813D0086103D /* doTaxonomy.cpp */; }; + A78781BA10A0813E0086103D /* phylotree.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A78781B810A0813D0086103D /* phylotree.cpp */; }; A787821110A0AAE70086103D /* bayesian.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A787821010A0AAE70086103D /* bayesian.cpp */; }; A78782AB10A1B1CB0086103D /* alignmentdb.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A78782AA10A1B1CB0086103D /* alignmentdb.cpp */; }; A787844510A1EBDD0086103D /* knn.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A787844410A1EBDD0086103D /* knn.cpp */; }; @@ -541,12 +543,16 @@ A751ACE91098B283003D0911 /* readcluster.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = readcluster.cpp; sourceTree = SOURCE_ROOT; }; A75B887B104C16860083C454 /* ccode.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = ccode.cpp; sourceTree = SOURCE_ROOT; }; A75B887C104C16860083C454 /* ccode.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ccode.h; sourceTree = SOURCE_ROOT; }; + A773805D10B6D6FC002A6A61 /* taxonomyequalizer.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = taxonomyequalizer.h; sourceTree = SOURCE_ROOT; }; + A773805E10B6D6FC002A6A61 /* taxonomyequalizer.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = taxonomyequalizer.cpp; sourceTree = SOURCE_ROOT; }; + A773808E10B6EFD1002A6A61 /* phylotypecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = phylotypecommand.h; sourceTree = SOURCE_ROOT; }; + A773808F10B6EFD1002A6A61 /* phylotypecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = phylotypecommand.cpp; sourceTree = SOURCE_ROOT; }; A7861476109F5FA00010AAAF /* classifyseqscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = classifyseqscommand.h; sourceTree = SOURCE_ROOT; }; A7861477109F5FA00010AAAF /* classifyseqscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = classifyseqscommand.cpp; sourceTree = SOURCE_ROOT; }; A78780FE10A06B8B0086103D /* classify.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = classify.h; sourceTree = SOURCE_ROOT; }; A787810210A06D5D0086103D /* classify.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = classify.cpp; sourceTree = SOURCE_ROOT; }; - A78781B810A0813D0086103D /* doTaxonomy.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = doTaxonomy.cpp; sourceTree = SOURCE_ROOT; }; - A78781B910A0813D0086103D /* doTaxonomy.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = doTaxonomy.h; sourceTree = SOURCE_ROOT; }; + A78781B810A0813D0086103D /* phylotree.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = phylotree.cpp; sourceTree = SOURCE_ROOT; }; + A78781B910A0813D0086103D /* phylotree.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = phylotree.h; sourceTree = SOURCE_ROOT; }; A787820F10A0AAE70086103D /* bayesian.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = bayesian.h; sourceTree = SOURCE_ROOT; }; A787821010A0AAE70086103D /* bayesian.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = bayesian.cpp; sourceTree = SOURCE_ROOT; }; A78782A910A1B1CB0086103D /* alignmentdb.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = alignmentdb.h; sourceTree = SOURCE_ROOT; }; @@ -909,6 +915,8 @@ 375873F60F7D649C0040F377 /* nocommands.cpp */, 3792946E0F2E191800B9034A /* parsimonycommand.h */, 3792946F0F2E191800B9034A /* parsimonycommand.cpp */, + A773808E10B6EFD1002A6A61 /* phylotypecommand.h */, + A773808F10B6EFD1002A6A61 /* phylotypecommand.cpp */, 37D927FE0F21331F001D4494 /* quitcommand.h */, 37D927FD0F21331F001D4494 /* quitcommand.cpp */, 37D928080F21331F001D4494 /* rarefactcommand.h */, @@ -1035,8 +1043,10 @@ A787810210A06D5D0086103D /* classify.cpp */, A787820F10A0AAE70086103D /* bayesian.h */, A787821010A0AAE70086103D /* bayesian.cpp */, - A78781B910A0813D0086103D /* doTaxonomy.h */, - A78781B810A0813D0086103D /* doTaxonomy.cpp */, + A773805D10B6D6FC002A6A61 /* taxonomyequalizer.h */, + A773805E10B6D6FC002A6A61 /* taxonomyequalizer.cpp */, + A78781B910A0813D0086103D /* phylotree.h */, + A78781B810A0813D0086103D /* phylotree.cpp */, A787844310A1EBDD0086103D /* knn.h */, A787844410A1EBDD0086103D /* knn.cpp */, ); @@ -1267,10 +1277,12 @@ A751ACEA1098B283003D0911 /* readcluster.cpp in Sources */, A7861478109F5FA00010AAAF /* classifyseqscommand.cpp in Sources */, A787810310A06D5D0086103D /* classify.cpp in Sources */, - A78781BA10A0813E0086103D /* doTaxonomy.cpp in Sources */, + A78781BA10A0813E0086103D /* phylotree.cpp in Sources */, A787821110A0AAE70086103D /* bayesian.cpp in Sources */, A78782AB10A1B1CB0086103D /* alignmentdb.cpp in Sources */, A787844510A1EBDD0086103D /* knn.cpp in Sources */, + A773805F10B6D6FC002A6A61 /* taxonomyequalizer.cpp in Sources */, + A773809010B6EFD1002A6A61 /* phylotypecommand.cpp in Sources */, ); runOnlyForDeploymentPostprocessing = 0; }; diff --git a/classify.h b/classify.h index f13224a..a897873 100644 --- a/classify.h +++ b/classify.h @@ -15,7 +15,7 @@ #include "mothur.h" #include "database.hpp" -#include "doTaxonomy.h" +#include "phylotree.h" class Sequence; diff --git a/classifyseqscommand.cpp b/classifyseqscommand.cpp index 40bf938..d2f8fc7 100644 --- a/classifyseqscommand.cpp +++ b/classifyseqscommand.cpp @@ -10,14 +10,13 @@ #include "classifyseqscommand.h" #include "sequence.hpp" #include "bayesian.h" -#include "doTaxonomy.h" +#include "phylotree.h" #include "knn.h" //********************************************************************************************************************** ClassifySeqsCommand::ClassifySeqsCommand(string option){ try { - // globaldata = GlobalData::getInstance(); abort = false; //allow user to run help diff --git a/commandfactory.cpp b/commandfactory.cpp index 030079e..ed1801a 100644 --- a/commandfactory.cpp +++ b/commandfactory.cpp @@ -59,6 +59,7 @@ #include "getlistcountcommand.h" #include "hclustercommand.h" #include "classifyseqscommand.h" +#include "phylotypecommand.h" /***********************************************************/ @@ -117,6 +118,7 @@ CommandFactory::CommandFactory(){ commands["quit"] = "quit"; commands["hcluster"] = "hcluster"; commands["classify.seqs"] = "classify.seqs"; + commands["phylotype"] = "phylotype"; } /***********************************************************/ @@ -183,6 +185,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString){ else if(commandName == "get.listcount") { command = new GetListCountCommand(optionString); } else if(commandName == "hcluster") { command = new HClusterCommand(optionString); } else if(commandName == "classify.seqs") { command = new ClassifySeqsCommand(optionString); } + else if(commandName == "phylotype") { command = new PhylotypeCommand(optionString); } else { command = new NoCommand(optionString); } return command; diff --git a/doTaxonomy.cpp b/phylotree.cpp similarity index 88% rename from doTaxonomy.cpp rename to phylotree.cpp index 40869b9..2a44192 100644 --- a/doTaxonomy.cpp +++ b/phylotree.cpp @@ -7,7 +7,7 @@ * */ -#include "doTaxonomy.h" +#include "phylotree.h" /**************************************************************************************************/ @@ -23,6 +23,34 @@ PhyloTree::PhyloTree(){ exit(1); } } +/**************************************************************************************************/ + +PhyloTree::PhyloTree(string tfile){ + try { + numNodes = 1; + numSeqs = 0; + tree.push_back(TaxNode("Root")); + tree[0].heirarchyID = "0"; + + ifstream in; + openInputFile(tfile, in); + + //read in users taxonomy file and add sequences to tree + string name, tax; + while(!in.eof()){ + in >> name >> tax; gobble(in); + + addSeqToTree(name, tax); + } + in.close(); + + assignHeirarchyIDs(0); + } + catch(exception& e) { + errorOut(e, "PhyloTree", "PhyloTree"); + exit(1); + } +} /**************************************************************************************************/ diff --git a/doTaxonomy.h b/phylotree.h similarity index 94% rename from doTaxonomy.h rename to phylotree.h index 6f83f19..f8365d5 100644 --- a/doTaxonomy.h +++ b/phylotree.h @@ -2,7 +2,7 @@ #define DOTAXONOMY_H /* - * doTaxonomy.h + * phylotree.h * * * Created by Pat Schloss on 6/17/09. @@ -30,6 +30,7 @@ class PhyloTree { public: PhyloTree(); + PhyloTree(string); //pass it a taxonomy file and it makes the tree ~PhyloTree() {}; void addSeqToTree(string, string); void assignHeirarchyIDs(int); diff --git a/phylotypecommand.cpp b/phylotypecommand.cpp new file mode 100644 index 0000000..ef87282 --- /dev/null +++ b/phylotypecommand.cpp @@ -0,0 +1,194 @@ +/* + * phylotypecommand.cpp + * Mothur + * + * Created by westcott on 11/20/09. + * Copyright 2009 Schloss Lab. All rights reserved. + * + */ + +#include "phylotypecommand.h" +#include "phylotree.h" +#include "listvector.hpp" +#include "rabundvector.hpp" +#include "sabundvector.hpp" + +/**********************************************************************************************************************/ +PhylotypeCommand::PhylotypeCommand(string option){ + try { + abort = false; + + //allow user to run help + if(option == "help") { help(); abort = true; } + + else { + + //valid paramters for this command + string AlignArray[] = {"taxonomy","cutoff","label"}; + vector myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string))); + + OptionParser parser(option); + map parameters = parser.getParameters(); + + ValidParameters validParameter; + + //check to make sure all parameters are valid for command + for (map::iterator it = parameters.begin(); it != parameters.end(); it++) { + if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; } + } + + taxonomyFileName = validParameter.validFile(parameters, "taxonomy", true); + if (taxonomyFileName == "not found") { + mothurOut("taxonomy is a required parameter for the phylotype command."); + mothurOutEndLine(); + abort = true; + }else if (taxonomyFileName == "not open") { abort = true; } + + string temp = validParameter.validFile(parameters, "cutoff", false); + if (temp == "not found") { temp = "-1"; } + convert(temp, cutoff); + + label = validParameter.validFile(parameters, "label", false); + if (label == "not found") { label = ""; allLines = 1; } + else { + if(label != "all") { splitAtDash(label, labels); allLines = 0; } + else { allLines = 1; } + } + + } + } + catch(exception& e) { + errorOut(e, "PhylotypeCommand", "PhylotypeCommand"); + exit(1); + } +} +/**********************************************************************************************************************/ + +void PhylotypeCommand::help(){ + try { + mothurOut("The phylotype command reads a taxonomy file and outputs a .list, .rabund and .sabund file. \n"); + mothurOut("The phylotype command parameter options are taxonomy, cutoff and label. The taxonomy parameter is required.\n"); + mothurOut("The cutoff parameter allows you to specify the level you want to stop at. The default is the highest level in your taxonomy file. \n"); + mothurOut("The label parameter allows you to specify which level you would like, and are separated by dashes. The default all levels in your taxonomy file. \n"); + mothurOut("The phylotype command should be in the following format: \n"); + mothurOut("phylotype(taxonomy=yourTaxonomyFile, cutoff=yourCutoff, label=yourLabels) \n"); + mothurOut("Eaxample: phylotype(taxonomy=amazon.taxonomy, cutoff=5, label=1-3-5).\n\n"); + } + catch(exception& e) { + errorOut(e, "PhylotypeCommand", "help"); + exit(1); + } +} +/**********************************************************************************************************************/ + +PhylotypeCommand::~PhylotypeCommand(){} + +/**********************************************************************************************************************/ + +int PhylotypeCommand::execute(){ + try { + + if (abort == true) { return 0; } + + //reads in taxonomy file and makes all the taxonomies the same length + //by appending the last taxon to a given taxonomy as many times as needed to + //make it as long as the longest taxonomy in the file + TaxEqualizer* taxEqual = new TaxEqualizer(taxonomyFileName, cutoff); + + string equalizedTaxFile = taxEqual->getEqualizedTaxFile(); + + delete taxEqual; + + //build taxonomy tree from equalized file + PhyloTree* tree = new PhyloTree(equalizedTaxFile); + vector leaves = tree->getGenusNodes(); + + //store leaf nodes in current map + for (int i = 0; i < leaves.size(); i++) { currentNodes[leaves[i]] = leaves[i]; } + + bool done = false; + if (tree->get(leaves[0]).parent == -1) { mothurOut("Empty Tree"); mothurOutEndLine(); done = true; } + + ofstream outList; + string outputListFile = getRootName(taxonomyFileName) + "tax.list"; + openOutputFile(outputListFile, outList); + ofstream outSabund; + string outputSabundFile = getRootName(taxonomyFileName) + "tax.sabund"; + openOutputFile(outputSabundFile, outSabund); + ofstream outRabund; + string outputRabundFile = getRootName(taxonomyFileName) + "tax.rabund"; + openOutputFile(outputRabundFile, outRabund); + + + //start at leaves of tree and work towards root, processing the labels the user wants + while((!done) && ((allLines == 1) || (labels.size() != 0))) { + + string level = toString(tree->get(currentNodes.begin()->first).level); + + //is this a level the user want output for + if(allLines == 1 || labels.count(level) == 1){ + + //output level + mothurOut(level); mothurOutEndLine(); + + ListVector list; + list.setLabel(level); + //go through nodes and build listvector + for (itCurrent = currentNodes.begin(); itCurrent != currentNodes.end(); itCurrent++) { + + //get parents + TaxNode node = tree->get(itCurrent->first); + parentNodes[node.parent] = node.parent; + + vector names = node.accessions; + + //make the names compatable with listvector + string name = ""; + for (int i = 0; i < names.size(); i++) { name += names[i] + ","; } + name = name.substr(0, name.length()-1); //rip off extra ',' + + //add bin to list vector + list.push_back(name); + } + + //print listvector + list.print(outList); + //print rabund + list.getRAbundVector().print(outRabund); + //print sabund + list.getSAbundVector().print(outSabund); + + labels.erase(level); + + }else { + + //just get parents + for (itCurrent = currentNodes.begin(); itCurrent != currentNodes.end(); itCurrent++) { + int parent = tree->get(itCurrent->first).parent; + parentNodes[parent] = parent; + } + } + + //move up a level + currentNodes = parentNodes; + parentNodes.clear(); + + //have we reached the rootnode + if (tree->get(currentNodes.begin()->first).parent == -1) { done = true; } + } + + outList.close(); + outSabund.close(); + outRabund.close(); + + delete tree; + + return 0; + } + + catch(exception& e) { + errorOut(e, "PhylotypeCommand", "execute"); + exit(1); + } +} +/**********************************************************************************************************************/ diff --git a/phylotypecommand.h b/phylotypecommand.h new file mode 100644 index 0000000..0322daf --- /dev/null +++ b/phylotypecommand.h @@ -0,0 +1,46 @@ +#ifndef PHYLOTYPECOMMAND_H +#define PHYLOTYPECOMMAND_H + + +/* + * phylotypecommand.h + * Mothur + * + * Created by westcott on 11/20/09. + * Copyright 2009 Schloss Lab. All rights reserved. + * + */ + +#include "taxonomyequalizer.h" +#include "command.hpp" + +/*************************************************************************/ + +class PhylotypeCommand : public Command { + +public: + PhylotypeCommand(string); + ~PhylotypeCommand(); + int execute(); + void help(); + +private: + bool abort, allLines; + string taxonomyFileName, label; + set labels; //holds labels to be used + int cutoff; + + map currentNodes; + map parentNodes; + map::iterator itCurrent; + +}; + + +/*************************************************************************/ + + +#endif + + + diff --git a/taxonomyequalizer.cpp b/taxonomyequalizer.cpp new file mode 100644 index 0000000..37b7720 --- /dev/null +++ b/taxonomyequalizer.cpp @@ -0,0 +1,169 @@ +/* + * taxonomyequalizer.cpp + * Mothur + * + * Created by westcott on 11/20/09. + * Copyright 2009 Schloss Lab. All rights reserved. + * + */ + +#include "taxonomyequalizer.h" + +/**************************************************************************************************/ +TaxEqualizer::TaxEqualizer(string tfile, int c) : cutoff(c) { + try { + containsConfidence = false; + + ifstream inTax; + openInputFile(tfile, inTax); + + int highestLevel = getHighestLevel(inTax); + + //if the user has specified a cutoff and it's smaller than the highest level + if ((cutoff != -1) && (cutoff < highestLevel)) { + highestLevel = cutoff; + }else if (cutoff > highestLevel) { + mothurOut("The highest level taxonomy you have is " + toString(highestLevel) + " and your cutoff is " + toString(cutoff) + ". I will set the cutoff to " + toString(highestLevel)); + mothurOutEndLine(); + } + + inTax.close(); + openInputFile(tfile, inTax); + + ofstream out; + equalizedFile = getRootName(tfile) + "equalized.taxonomy"; + openOutputFile(equalizedFile, out); + + string name, tax; + while (inTax) { + inTax >> name >> tax; gobble(inTax); + + if (containsConfidence) { removeConfidences(tax); } + + //is this a taxonomy that needs to be extended? + if (seqLevels[name] < highestLevel) { + extendTaxonomy(name, tax, highestLevel); + }else if (seqLevels[name] > highestLevel) { //this can happen if hte user enters a cutoff + truncateTaxonomy(name, tax, highestLevel); + } + + out << name << '\t' << tax << endl; + } + + inTax.close(); + out.close(); + + } + catch(exception& e) { + errorOut(e, "TaxEqualizer", "TaxEqualizer"); + exit(1); + } +} +/**************************************************************************************************/ +int TaxEqualizer::getHighestLevel(ifstream& in) { + try { + + int level = 0; + string name, tax; + + while (in) { + in >> name >> tax; gobble(in); + + //count levels in this taxonomy + int thisLevel = 0; + for (int i = 0; i < tax.length(); i++) { + if (tax[i] == ';') { thisLevel++; } + } + + //save sequences level + seqLevels[name] = thisLevel; + + //is this the longest taxonomy? + if (thisLevel > level) { + level = thisLevel; + testTax = tax; //testTax is used to figure out if this file has confidences we need to strip out + } + } + + int pos = testTax.find_first_of('('); + + //if there are '(' then there are confidences we need to take out + if (pos != -1) { containsConfidence = true; } + + return level; + + } + catch(exception& e) { + errorOut(e, "TaxEqualizer", "getHighestLevel"); + exit(1); + } +} +/**************************************************************************************************/ +void TaxEqualizer::extendTaxonomy(string name, string& tax, int desiredLevel) { + try { + + //get last taxon + tax = tax.substr(0, tax.length()-1); //take off final ";" + int pos = tax.find_last_of(';'); + string lastTaxon = tax.substr(pos+1); + lastTaxon += ";"; //add back on delimiting char + tax += ";"; + + int currentLevel = seqLevels[name]; + + //added last taxon until you get desired level + for (int i = currentLevel; i < desiredLevel; i++) { + tax += lastTaxon; + } + } + catch(exception& e) { + errorOut(e, "TaxEqualizer", "extendTaxonomy"); + exit(1); + } +} +/**************************************************************************************************/ +void TaxEqualizer::truncateTaxonomy(string name, string& tax, int desiredLevel) { + try { + + int currentLevel = seqLevels[name]; + tax = tax.substr(0, tax.length()-1); //take off final ";" + + //remove a taxon until you get to desired level + for (int i = currentLevel; i > desiredLevel; i--) { + tax = tax.substr(0, tax.find_last_of(';')); + } + + tax += ";"; + } + catch(exception& e) { + errorOut(e, "TaxEqualizer", "truncateTaxonomy"); + exit(1); + } +} +/**************************************************************************************************/ +void TaxEqualizer::removeConfidences(string& tax) { + try { + + string taxon; + string newTax = ""; + + while (tax.find_first_of(';') != -1) { + //get taxon + taxon = tax.substr(0,tax.find_first_of(';')); + taxon = taxon.substr(0, taxon.find_first_of('(')); //rip off confidence + taxon += ";"; + + tax = tax.substr(tax.find_first_of(';')+1, tax.length()); + newTax += taxon; + } + + tax = newTax; + } + catch(exception& e) { + errorOut(e, "TaxEqualizer", "removeConfidences"); + exit(1); + } +} + +/**************************************************************************************************/ + diff --git a/taxonomyequalizer.h b/taxonomyequalizer.h new file mode 100644 index 0000000..6308d71 --- /dev/null +++ b/taxonomyequalizer.h @@ -0,0 +1,50 @@ +#ifndef TAXONOMYEQUALIZER_H +#define TAXONOMYEQUALIZER_H + + +/* + * taxonomyequalizer.h + * Mothur + * + * Created by westcott on 11/20/09. + * Copyright 2009 Schloss Lab. All rights reserved. + * + */ + +#include "mothur.h" + +//reads in taxonomy file and makes all the taxonomies the same length +//by appending the last taxon to a given taxonomy as many times as needed to +//make it as long as the longest taxonomy in the file + +/**************************************************************************************************/ + +class TaxEqualizer { + +public: + TaxEqualizer(string, int); + ~TaxEqualizer() {}; + + string getEqualizedTaxFile() { return equalizedFile; } + + +private: + string equalizedFile, testTax; + bool containsConfidence; + int cutoff; + map seqLevels; //maps name to level of taxonomy + + int getHighestLevel(ifstream&); //scans taxonomy file to find taxonomy with highest level + void extendTaxonomy(string, string&, int); //name, taxonomy, desired level + void truncateTaxonomy(string, string&, int); //name, taxonomy, desired level + void removeConfidences(string&); //removes the confidence limits on the taxon + + +}; + +/**************************************************************************************************/ + + +#endif + + -- 2.39.2