]> git.donarmstrong.com Git - mothur.git/blob - taxonomyequalizer.cpp
changed label outputs for phylotype command
[mothur.git] / taxonomyequalizer.cpp
1 /*
2  *  taxonomyequalizer.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 11/20/09.
6  *  Copyright 2009 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "taxonomyequalizer.h"
11
12 /**************************************************************************************************/
13 TaxEqualizer::TaxEqualizer(string tfile, int c) : cutoff(c) {
14         try {
15                 containsConfidence = false;
16                 
17                 ifstream inTax;
18                 openInputFile(tfile, inTax);
19         
20                 highestLevel = getHighestLevel(inTax);
21         
22                 //if the user has specified a cutoff and it's smaller than the highest level
23                 if ((cutoff != -1) && (cutoff < highestLevel)) { 
24                         highestLevel = cutoff;
25                 }else if (cutoff > highestLevel) {
26                         mothurOut("The highest level taxonomy you have is " + toString(highestLevel) + " and your cutoff is " + toString(cutoff) + ". I will set the cutoff to " + toString(highestLevel));
27                         mothurOutEndLine();
28                 }
29                 
30                 inTax.close();  
31                 openInputFile(tfile, inTax);
32                 
33                 ofstream out;
34                 equalizedFile = getRootName(tfile) + "equalized.taxonomy";
35                 openOutputFile(equalizedFile, out);
36                 
37                 string name, tax;
38                 while (inTax) {
39                         inTax >> name >> tax;   gobble(inTax);
40                         
41                         if (containsConfidence) {  removeConfidences(tax);      }
42                         
43                         //is this a taxonomy that needs to be extended?
44                         if (seqLevels[name] < highestLevel) {
45                                 extendTaxonomy(name, tax, highestLevel);
46                         }else if (seqLevels[name] > highestLevel) { //this can happen if the user enters a cutoff
47                                 truncateTaxonomy(name, tax, highestLevel);
48                         }
49                         
50                         out << name << '\t' << tax << endl;
51                 }
52                 
53                 inTax.close();
54                 out.close();
55                                         
56         }
57         catch(exception& e) {
58                 errorOut(e, "TaxEqualizer", "TaxEqualizer");
59                 exit(1);
60         }
61 }
62 /**************************************************************************************************/
63 int TaxEqualizer::getHighestLevel(ifstream& in) {
64         try {
65                 
66                 int level = 0;
67                 string name, tax;
68                 
69                 while (in) {
70                         in >> name >> tax;   gobble(in);
71                 
72                         //count levels in this taxonomy
73                         int thisLevel = 0;
74                         for (int i = 0; i < tax.length(); i++) {
75                                 if (tax[i] == ';') {  thisLevel++;      }
76                         }
77                 
78                         //save sequences level
79                         seqLevels[name] = thisLevel;
80                 
81                         //is this the longest taxonomy?
82                         if (thisLevel > level) {  
83                                 level = thisLevel;  
84                                 testTax = tax; //testTax is used to figure out if this file has confidences we need to strip out
85                         }  
86                 }
87                 
88                 int pos = testTax.find_first_of('(');
89
90                 //if there are '(' then there are confidences we need to take out
91                 if (pos != -1) {  containsConfidence = true;  }
92                 
93                 return level;
94                                         
95         }
96         catch(exception& e) {
97                 errorOut(e, "TaxEqualizer", "getHighestLevel");
98                 exit(1);
99         }
100 }
101 /**************************************************************************************************/
102 void TaxEqualizer::extendTaxonomy(string name, string& tax, int desiredLevel) {
103         try {
104                         
105                 //get last taxon
106                 tax = tax.substr(0, tax.length()-1);  //take off final ";"
107                 int pos = tax.find_last_of(';');
108                 string lastTaxon = tax.substr(pos+1);  
109                 lastTaxon += ";"; //add back on delimiting char
110                 tax += ";";
111                 
112                 int currentLevel = seqLevels[name];
113                 
114                 //added last taxon until you get desired level
115                 for (int i = currentLevel; i < desiredLevel; i++) {
116                         tax += lastTaxon;
117                 }
118         }
119         catch(exception& e) {
120                 errorOut(e, "TaxEqualizer", "extendTaxonomy");
121                 exit(1);
122         }
123 }
124 /**************************************************************************************************/
125 void TaxEqualizer::truncateTaxonomy(string name, string& tax, int desiredLevel) {
126         try {
127                         
128                 int currentLevel = seqLevels[name];
129                 tax = tax.substr(0, tax.length()-1);  //take off final ";"
130                 
131                 //remove a taxon until you get to desired level
132                 for (int i = currentLevel; i > desiredLevel; i--) {
133                         tax = tax.substr(0,  tax.find_last_of(';'));
134                 }
135                 
136                 tax += ";";
137         }
138         catch(exception& e) {
139                 errorOut(e, "TaxEqualizer", "truncateTaxonomy");
140                 exit(1);
141         }
142 }
143 /**************************************************************************************************/
144 void TaxEqualizer::removeConfidences(string& tax) {
145         try {
146                 
147                 string taxon;
148                 string newTax = "";
149                 
150                 while (tax.find_first_of(';') != -1) {
151                         //get taxon
152                         taxon = tax.substr(0,tax.find_first_of(';'));
153                         taxon = taxon.substr(0, taxon.find_first_of('(')); //rip off confidence
154                         taxon += ";";
155                         
156                         tax = tax.substr(tax.find_first_of(';')+1, tax.length());
157                         newTax += taxon;
158                 }
159                 
160                 tax = newTax;
161         }
162         catch(exception& e) {
163                 errorOut(e, "TaxEqualizer", "removeConfidences");
164                 exit(1);
165         }
166 }
167
168 /**************************************************************************************************/
169