]> git.donarmstrong.com Git - mothur.git/blob - taxonomyequalizer.cpp
edited taxonomy equalizer to fix bug with windows
[mothur.git] / taxonomyequalizer.cpp
1 /*
2  *  taxonomyequalizer.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 11/20/09.
6  *  Copyright 2009 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "taxonomyequalizer.h"
11
12 /**************************************************************************************************/
13 TaxEqualizer::TaxEqualizer(string tfile, int c) : cutoff(c) {
14         try {
15                 containsConfidence = false;
16                 
17                 ifstream inTax;
18                 openInputFile(tfile, inTax);
19         
20                 highestLevel = getHighestLevel(inTax);
21         
22                 //if the user has specified a cutoff and it's smaller than the highest level
23                 if ((cutoff != -1) && (cutoff < highestLevel)) { 
24                         highestLevel = cutoff;
25                 }else if (cutoff > highestLevel) {
26                         mothurOut("The highest level taxonomy you have is " + toString(highestLevel) + " and your cutoff is " + toString(cutoff) + ". I will set the cutoff to " + toString(highestLevel));
27                         mothurOutEndLine();
28                 }
29                 
30                 inTax.close(); 
31                 ifstream in; 
32                 openInputFile(tfile, in);
33                 
34                 ofstream out;
35                 equalizedFile = getRootName(tfile) + "equalized.taxonomy";
36                 openOutputFile(equalizedFile, out);
37                 
38                 string name, tax;
39                 while (in) {
40                         in >> name >> tax;   gobble(in);
41                         
42                         if (containsConfidence) {  removeConfidences(tax);      }
43                         
44                         //is this a taxonomy that needs to be extended?
45                         if (seqLevels[name] < highestLevel) {
46                                 extendTaxonomy(name, tax, highestLevel);
47                         }else if (seqLevels[name] > highestLevel) { //this can happen if the user enters a cutoff
48                                 truncateTaxonomy(name, tax, highestLevel);
49                         }
50                         
51                         out << name << '\t' << tax << endl;
52                 }
53                 
54                 in.close();
55                 out.close();
56                                         
57         }
58         catch(exception& e) {
59                 errorOut(e, "TaxEqualizer", "TaxEqualizer");
60                 exit(1);
61         }
62 }
63 /**************************************************************************************************/
64 int TaxEqualizer::getHighestLevel(ifstream& in) {
65         try {
66                 
67                 int level = 0;
68                 string name, tax;
69                 
70                 while (in) {
71                         in >> name >> tax;   gobble(in);
72                 
73                         //count levels in this taxonomy
74                         int thisLevel = 0;
75                         for (int i = 0; i < tax.length(); i++) {
76                                 if (tax[i] == ';') {  thisLevel++;      }
77                         }
78                 
79                         //save sequences level
80                         seqLevels[name] = thisLevel;
81                 
82                         //is this the longest taxonomy?
83                         if (thisLevel > level) {  
84                                 level = thisLevel;  
85                                 testTax = tax; //testTax is used to figure out if this file has confidences we need to strip out
86                         }  
87                 }
88                 
89                 int pos = testTax.find_first_of('(');
90
91                 //if there are '(' then there are confidences we need to take out
92                 if (pos != -1) {  containsConfidence = true;  }
93                 
94                 return level;
95                                         
96         }
97         catch(exception& e) {
98                 errorOut(e, "TaxEqualizer", "getHighestLevel");
99                 exit(1);
100         }
101 }
102 /**************************************************************************************************/
103 void TaxEqualizer::extendTaxonomy(string name, string& tax, int desiredLevel) {
104         try {
105                         
106                 //get last taxon
107                 tax = tax.substr(0, tax.length()-1);  //take off final ";"
108                 int pos = tax.find_last_of(';');
109                 string lastTaxon = tax.substr(pos+1);  
110                 lastTaxon += ";"; //add back on delimiting char
111                 tax += ";";
112                 
113                 int currentLevel = seqLevels[name];
114                 
115                 //added last taxon until you get desired level
116                 for (int i = currentLevel; i < desiredLevel; i++) {
117                         tax += lastTaxon;
118                 }
119         }
120         catch(exception& e) {
121                 errorOut(e, "TaxEqualizer", "extendTaxonomy");
122                 exit(1);
123         }
124 }
125 /**************************************************************************************************/
126 void TaxEqualizer::truncateTaxonomy(string name, string& tax, int desiredLevel) {
127         try {
128                         
129                 int currentLevel = seqLevels[name];
130                 tax = tax.substr(0, tax.length()-1);  //take off final ";"
131                 
132                 //remove a taxon until you get to desired level
133                 for (int i = currentLevel; i > desiredLevel; i--) {
134                         tax = tax.substr(0,  tax.find_last_of(';'));
135                 }
136                 
137                 tax += ";";
138         }
139         catch(exception& e) {
140                 errorOut(e, "TaxEqualizer", "truncateTaxonomy");
141                 exit(1);
142         }
143 }
144 /**************************************************************************************************/
145 void TaxEqualizer::removeConfidences(string& tax) {
146         try {
147                 
148                 string taxon;
149                 string newTax = "";
150                 
151                 while (tax.find_first_of(';') != -1) {
152                         //get taxon
153                         taxon = tax.substr(0,tax.find_first_of(';'));
154                         taxon = taxon.substr(0, taxon.find_first_of('(')); //rip off confidence
155                         taxon += ";";
156                         
157                         tax = tax.substr(tax.find_first_of(';')+1, tax.length());
158                         newTax += taxon;
159                 }
160                 
161                 tax = newTax;
162         }
163         catch(exception& e) {
164                 errorOut(e, "TaxEqualizer", "removeConfidences");
165                 exit(1);
166         }
167 }
168
169 /**************************************************************************************************/
170