2 * taxonomyequalizer.cpp
5 * Created by westcott on 11/20/09.
6 * Copyright 2009 Schloss Lab. All rights reserved.
10 #include "taxonomyequalizer.h"
12 /**************************************************************************************************/
13 TaxEqualizer::TaxEqualizer(string tfile, int c) : cutoff(c) {
15 containsConfidence = false;
18 openInputFile(tfile, inTax);
20 highestLevel = getHighestLevel(inTax);
22 //if the user has specified a cutoff and it's smaller than the highest level
23 if ((cutoff != -1) && (cutoff < highestLevel)) {
24 highestLevel = cutoff;
25 }else if (cutoff > highestLevel) {
26 mothurOut("The highest level taxonomy you have is " + toString(highestLevel) + " and your cutoff is " + toString(cutoff) + ". I will set the cutoff to " + toString(highestLevel));
31 openInputFile(tfile, inTax);
34 equalizedFile = getRootName(tfile) + "equalized.taxonomy";
35 openOutputFile(equalizedFile, out);
39 inTax >> name >> tax; gobble(inTax);
41 if (containsConfidence) { removeConfidences(tax); }
43 //is this a taxonomy that needs to be extended?
44 if (seqLevels[name] < highestLevel) {
45 extendTaxonomy(name, tax, highestLevel);
46 }else if (seqLevels[name] > highestLevel) { //this can happen if the user enters a cutoff
47 truncateTaxonomy(name, tax, highestLevel);
50 out << name << '\t' << tax << endl;
58 errorOut(e, "TaxEqualizer", "TaxEqualizer");
62 /**************************************************************************************************/
63 int TaxEqualizer::getHighestLevel(ifstream& in) {
70 in >> name >> tax; gobble(in);
72 //count levels in this taxonomy
74 for (int i = 0; i < tax.length(); i++) {
75 if (tax[i] == ';') { thisLevel++; }
78 //save sequences level
79 seqLevels[name] = thisLevel;
81 //is this the longest taxonomy?
82 if (thisLevel > level) {
84 testTax = tax; //testTax is used to figure out if this file has confidences we need to strip out
88 int pos = testTax.find_first_of('(');
90 //if there are '(' then there are confidences we need to take out
91 if (pos != -1) { containsConfidence = true; }
97 errorOut(e, "TaxEqualizer", "getHighestLevel");
101 /**************************************************************************************************/
102 void TaxEqualizer::extendTaxonomy(string name, string& tax, int desiredLevel) {
106 tax = tax.substr(0, tax.length()-1); //take off final ";"
107 int pos = tax.find_last_of(';');
108 string lastTaxon = tax.substr(pos+1);
109 lastTaxon += ";"; //add back on delimiting char
112 int currentLevel = seqLevels[name];
114 //added last taxon until you get desired level
115 for (int i = currentLevel; i < desiredLevel; i++) {
119 catch(exception& e) {
120 errorOut(e, "TaxEqualizer", "extendTaxonomy");
124 /**************************************************************************************************/
125 void TaxEqualizer::truncateTaxonomy(string name, string& tax, int desiredLevel) {
128 int currentLevel = seqLevels[name];
129 tax = tax.substr(0, tax.length()-1); //take off final ";"
131 //remove a taxon until you get to desired level
132 for (int i = currentLevel; i > desiredLevel; i--) {
133 tax = tax.substr(0, tax.find_last_of(';'));
138 catch(exception& e) {
139 errorOut(e, "TaxEqualizer", "truncateTaxonomy");
143 /**************************************************************************************************/
144 void TaxEqualizer::removeConfidences(string& tax) {
150 while (tax.find_first_of(';') != -1) {
152 taxon = tax.substr(0,tax.find_first_of(';'));
153 taxon = taxon.substr(0, taxon.find_first_of('(')); //rip off confidence
156 tax = tax.substr(tax.find_first_of(';')+1, tax.length());
162 catch(exception& e) {
163 errorOut(e, "TaxEqualizer", "removeConfidences");
168 /**************************************************************************************************/