]> git.donarmstrong.com Git - mothur.git/blob - taxonomyequalizer.cpp
created mothurOut class to handle logfiles
[mothur.git] / taxonomyequalizer.cpp
1 /*
2  *  taxonomyequalizer.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 11/20/09.
6  *  Copyright 2009 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "taxonomyequalizer.h"
11
12 /**************************************************************************************************/
13 TaxEqualizer::TaxEqualizer(string tfile, int c) : cutoff(c) {
14         try {
15                 m = MothurOut::getInstance();
16                 containsConfidence = false;
17                 
18                 ifstream inTax;
19                 openInputFile(tfile, inTax);
20         
21                 highestLevel = getHighestLevel(inTax);
22         
23                 //if the user has specified a cutoff and it's smaller than the highest level
24                 if ((cutoff != -1) && (cutoff < highestLevel)) { 
25                         highestLevel = cutoff;
26                 }else if (cutoff > highestLevel) {
27                         m->mothurOut("The highest level taxonomy you have is " + toString(highestLevel) + " and your cutoff is " + toString(cutoff) + ". I will set the cutoff to " + toString(highestLevel));
28                         m->mothurOutEndLine();
29                 }
30                 
31                 inTax.close(); 
32                 ifstream in; 
33                 openInputFile(tfile, in);
34                 
35                 ofstream out;
36                 equalizedFile = getRootName(tfile) + "equalized.taxonomy";
37                 openOutputFile(equalizedFile, out);
38                 
39                 string name, tax;
40                 while (in) {
41                         in >> name >> tax;   gobble(in);
42                         
43                         if (containsConfidence) {  removeConfidences(tax);      }
44                         
45                         //is this a taxonomy that needs to be extended?
46                         if (seqLevels[name] < highestLevel) {
47                                 extendTaxonomy(name, tax, highestLevel);
48                         }else if (seqLevels[name] > highestLevel) { //this can happen if the user enters a cutoff
49                                 truncateTaxonomy(name, tax, highestLevel);
50                         }
51                         
52                         out << name << '\t' << tax << endl;
53                 }
54                 
55                 in.close();
56                 out.close();
57                                         
58         }
59         catch(exception& e) {
60                 m->errorOut(e, "TaxEqualizer", "TaxEqualizer");
61                 exit(1);
62         }
63 }
64 /**************************************************************************************************/
65 int TaxEqualizer::getHighestLevel(ifstream& in) {
66         try {
67                 
68                 int level = 0;
69                 string name, tax;
70                 
71                 while (in) {
72                         in >> name >> tax;   gobble(in);
73                 
74                         //count levels in this taxonomy
75                         int thisLevel = 0;
76                         for (int i = 0; i < tax.length(); i++) {
77                                 if (tax[i] == ';') {  thisLevel++;      }
78                         }
79                 
80                         //save sequences level
81                         seqLevels[name] = thisLevel;
82                 
83                         //is this the longest taxonomy?
84                         if (thisLevel > level) {  
85                                 level = thisLevel;  
86                                 testTax = tax; //testTax is used to figure out if this file has confidences we need to strip out
87                         }  
88                 }
89                 
90                 int pos = testTax.find_first_of('(');
91
92                 //if there are '(' then there are confidences we need to take out
93                 if (pos != -1) {  containsConfidence = true;  }
94                 
95                 return level;
96                                         
97         }
98         catch(exception& e) {
99                 m->errorOut(e, "TaxEqualizer", "getHighestLevel");
100                 exit(1);
101         }
102 }
103 /**************************************************************************************************/
104 void TaxEqualizer::extendTaxonomy(string name, string& tax, int desiredLevel) {
105         try {
106                         
107                 //get last taxon
108                 tax = tax.substr(0, tax.length()-1);  //take off final ";"
109                 int pos = tax.find_last_of(';');
110                 string lastTaxon = tax.substr(pos+1);  
111                 lastTaxon += ";"; //add back on delimiting char
112                 tax += ";";
113                 
114                 int currentLevel = seqLevels[name];
115                 
116                 //added last taxon until you get desired level
117                 for (int i = currentLevel; i < desiredLevel; i++) {
118                         tax += lastTaxon;
119                 }
120         }
121         catch(exception& e) {
122                 m->errorOut(e, "TaxEqualizer", "extendTaxonomy");
123                 exit(1);
124         }
125 }
126 /**************************************************************************************************/
127 void TaxEqualizer::truncateTaxonomy(string name, string& tax, int desiredLevel) {
128         try {
129                         
130                 int currentLevel = seqLevels[name];
131                 tax = tax.substr(0, tax.length()-1);  //take off final ";"
132                 
133                 //remove a taxon until you get to desired level
134                 for (int i = currentLevel; i > desiredLevel; i--) {
135                         tax = tax.substr(0,  tax.find_last_of(';'));
136                 }
137                 
138                 tax += ";";
139         }
140         catch(exception& e) {
141                 m->errorOut(e, "TaxEqualizer", "truncateTaxonomy");
142                 exit(1);
143         }
144 }
145 /**************************************************************************************************/
146 void TaxEqualizer::removeConfidences(string& tax) {
147         try {
148                 
149                 string taxon;
150                 string newTax = "";
151                 
152                 while (tax.find_first_of(';') != -1) {
153                         //get taxon
154                         taxon = tax.substr(0,tax.find_first_of(';'));
155                         taxon = taxon.substr(0, taxon.find_first_of('(')); //rip off confidence
156                         taxon += ";";
157                         
158                         tax = tax.substr(tax.find_first_of(';')+1, tax.length());
159                         newTax += taxon;
160                 }
161                 
162                 tax = newTax;
163         }
164         catch(exception& e) {
165                 m->errorOut(e, "TaxEqualizer", "removeConfidences");
166                 exit(1);
167         }
168 }
169
170 /**************************************************************************************************/
171