]> git.donarmstrong.com Git - mothur.git/blobdiff - taxonomyequalizer.cpp
Revert to previous commit
[mothur.git] / taxonomyequalizer.cpp
diff --git a/taxonomyequalizer.cpp b/taxonomyequalizer.cpp
new file mode 100644 (file)
index 0000000..c050726
--- /dev/null
@@ -0,0 +1,155 @@
+/*
+ *  taxonomyequalizer.cpp
+ *  Mothur
+ *
+ *  Created by westcott on 11/20/09.
+ *  Copyright 2009 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "taxonomyequalizer.h"
+
+/**************************************************************************************************/
+TaxEqualizer::TaxEqualizer(string tfile, int c, string o) : cutoff(c), outputDir(o) {
+       try {
+               m = MothurOut::getInstance();
+               containsConfidence = false;
+               
+               ifstream inTax;
+               m->openInputFile(tfile, inTax);
+       
+               highestLevel = getHighestLevel(inTax);
+               
+               if (!m->control_pressed) { 
+                       
+                       //if the user has specified a cutoff and it's smaller than the highest level
+                       if ((cutoff != -1) && (cutoff < highestLevel)) { 
+                               highestLevel = cutoff;
+                       }else if (cutoff > highestLevel) {
+                               m->mothurOut("The highest level taxonomy you have is " + toString(highestLevel) + " and your cutoff is " + toString(cutoff) + ". I will set the cutoff to " + toString(highestLevel));
+                               m->mothurOutEndLine();
+                       }
+                       
+                       inTax.close(); 
+                       ifstream in; 
+                       m->openInputFile(tfile, in);
+                       
+                       ofstream out;
+                       equalizedFile = outputDir + m->getRootName(m->getSimpleName(tfile)) + "equalized.taxonomy";
+                       m->openOutputFile(equalizedFile, out);
+                       
+       
+                       string name, tax;
+                       while (in) {
+                       
+                               if (m->control_pressed) {  break; }
+                               
+                               in >> name >> tax;   m->gobble(in);
+                               
+                               if (containsConfidence) {  m->removeConfidences(tax);   }
+                               
+                               //is this a taxonomy that needs to be extended?
+                               if (seqLevels[name] < highestLevel) {
+                                       extendTaxonomy(name, tax, highestLevel);
+                               }else if (seqLevels[name] > highestLevel) { //this can happen if the user enters a cutoff
+                                       truncateTaxonomy(name, tax, highestLevel);
+                               }
+                               
+                               out << name << '\t' << tax << endl;
+                       }
+                       
+                       in.close();
+                       out.close();
+                       
+                       if (m->control_pressed) { m->mothurRemove(equalizedFile);  }
+               }else { inTax.close(); }
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "TaxEqualizer", "TaxEqualizer");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+int TaxEqualizer::getHighestLevel(ifstream& in) {
+       try {
+               
+               int level = 0;
+               string name, tax;
+               
+               while (in) {
+                       in >> name >> tax;   m->gobble(in);
+               
+                       //count levels in this taxonomy
+                       int thisLevel = 0;
+                       for (int i = 0; i < tax.length(); i++) {
+                               if (tax[i] == ';') {  thisLevel++;      }
+                       }
+               
+                       //save sequences level
+                       seqLevels[name] = thisLevel;
+       
+                       //is this the longest taxonomy?
+                       if (thisLevel > level) {  
+                               level = thisLevel;  
+                               testTax = tax; //testTax is used to figure out if this file has confidences we need to strip out
+                       } 
+               }
+               
+               int pos = testTax.find_first_of('(');
+
+               //if there are '(' then there are confidences we need to take out
+               if (pos != -1) {  containsConfidence = true;  }
+       
+               return level;
+                                       
+       }
+       catch(exception& e) {
+               m->errorOut(e, "TaxEqualizer", "getHighestLevel");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+void TaxEqualizer::extendTaxonomy(string name, string& tax, int desiredLevel) {
+       try {
+                       
+               //get last taxon
+               tax = tax.substr(0, tax.length()-1);  //take off final ";"
+               int pos = tax.find_last_of(';');
+               string lastTaxon = tax.substr(pos+1);  
+               lastTaxon += ";"; //add back on delimiting char
+               tax += ";";
+               
+               int currentLevel = seqLevels[name];
+               
+               //added last taxon until you get desired level
+               for (int i = currentLevel; i < desiredLevel; i++) {
+                       tax += lastTaxon;
+               }
+       }
+       catch(exception& e) {
+               m->errorOut(e, "TaxEqualizer", "extendTaxonomy");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+void TaxEqualizer::truncateTaxonomy(string name, string& tax, int desiredLevel) {
+       try {
+                       
+               int currentLevel = seqLevels[name];
+               tax = tax.substr(0, tax.length()-1);  //take off final ";"
+               
+               //remove a taxon until you get to desired level
+               for (int i = currentLevel; i > desiredLevel; i--) {
+                       tax = tax.substr(0,  tax.find_last_of(';'));
+               }
+               
+               tax += ";";
+       }
+       catch(exception& e) {
+               m->errorOut(e, "TaxEqualizer", "truncateTaxonomy");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+