]> git.donarmstrong.com Git - mothur.git/blob - taxonomyequalizer.cpp
working on pam
[mothur.git] / taxonomyequalizer.cpp
1 /*
2  *  taxonomyequalizer.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 11/20/09.
6  *  Copyright 2009 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "taxonomyequalizer.h"
11
12 /**************************************************************************************************/
13 TaxEqualizer::TaxEqualizer(string tfile, int c, string o) : cutoff(c), outputDir(o) {
14         try {
15                 m = MothurOut::getInstance();
16                 containsConfidence = false;
17                 
18                 ifstream inTax;
19                 m->openInputFile(tfile, inTax);
20         
21                 highestLevel = getHighestLevel(inTax);
22                 
23                 if (!m->control_pressed) { 
24                         
25                         //if the user has specified a cutoff and it's smaller than the highest level
26                         if ((cutoff != -1) && (cutoff < highestLevel)) { 
27                                 highestLevel = cutoff;
28                         }else if (cutoff > highestLevel) {
29                                 m->mothurOut("The highest level taxonomy you have is " + toString(highestLevel) + " and your cutoff is " + toString(cutoff) + ". I will set the cutoff to " + toString(highestLevel));
30                                 m->mothurOutEndLine();
31                         }
32                         
33                         inTax.close(); 
34                         ifstream in; 
35                         m->openInputFile(tfile, in);
36                         
37                         ofstream out;
38                         equalizedFile = outputDir + m->getRootName(m->getSimpleName(tfile)) + "equalized.taxonomy";
39                         m->openOutputFile(equalizedFile, out);
40                         
41         
42                         string name, tax;
43                         while (in) {
44                         
45                                 if (m->control_pressed) {  break; }
46                                 
47                                 in >> name >> tax;   m->gobble(in);
48                                 
49                                 if (containsConfidence) {  m->removeConfidences(tax);   }
50                                 
51                                 //is this a taxonomy that needs to be extended?
52                                 if (seqLevels[name] < highestLevel) {
53                                         extendTaxonomy(name, tax, highestLevel);
54                                 }else if (seqLevels[name] > highestLevel) { //this can happen if the user enters a cutoff
55                                         truncateTaxonomy(name, tax, highestLevel);
56                                 }
57                                 
58                                 out << name << '\t' << tax << endl;
59                         }
60                         
61                         in.close();
62                         out.close();
63                         
64                         if (m->control_pressed) { m->mothurRemove(equalizedFile);  }
65                 }else { inTax.close(); }
66                 
67         }
68         catch(exception& e) {
69                 m->errorOut(e, "TaxEqualizer", "TaxEqualizer");
70                 exit(1);
71         }
72 }
73 /**************************************************************************************************/
74 int TaxEqualizer::getHighestLevel(ifstream& in) {
75         try {
76                 
77                 int level = 0;
78                 string name, tax;
79                 
80                 while (in) {
81                         in >> name >> tax;   m->gobble(in);
82                 
83                         //count levels in this taxonomy
84                         int thisLevel = 0;
85                         for (int i = 0; i < tax.length(); i++) {
86                                 if (tax[i] == ';') {  thisLevel++;      }
87                         }
88                 
89                         //save sequences level
90                         seqLevels[name] = thisLevel;
91         
92                         //is this the longest taxonomy?
93                         if (thisLevel > level) {  
94                                 level = thisLevel;  
95                                 testTax = tax; //testTax is used to figure out if this file has confidences we need to strip out
96                         } 
97                 }
98                 
99                 int pos = testTax.find_first_of('(');
100
101                 //if there are '(' then there are confidences we need to take out
102                 if (pos != -1) {  containsConfidence = true;  }
103         
104                 return level;
105                                         
106         }
107         catch(exception& e) {
108                 m->errorOut(e, "TaxEqualizer", "getHighestLevel");
109                 exit(1);
110         }
111 }
112 /**************************************************************************************************/
113 void TaxEqualizer::extendTaxonomy(string name, string& tax, int desiredLevel) {
114         try {
115                         
116                 //get last taxon
117                 tax = tax.substr(0, tax.length()-1);  //take off final ";"
118                 int pos = tax.find_last_of(';');
119                 string lastTaxon = tax.substr(pos+1);  
120                 lastTaxon += ";"; //add back on delimiting char
121                 tax += ";";
122                 
123                 int currentLevel = seqLevels[name];
124                 
125                 //added last taxon until you get desired level
126                 for (int i = currentLevel; i < desiredLevel; i++) {
127                         tax += lastTaxon;
128                 }
129         }
130         catch(exception& e) {
131                 m->errorOut(e, "TaxEqualizer", "extendTaxonomy");
132                 exit(1);
133         }
134 }
135 /**************************************************************************************************/
136 void TaxEqualizer::truncateTaxonomy(string name, string& tax, int desiredLevel) {
137         try {
138                         
139                 int currentLevel = seqLevels[name];
140                 tax = tax.substr(0, tax.length()-1);  //take off final ";"
141                 
142                 //remove a taxon until you get to desired level
143                 for (int i = currentLevel; i > desiredLevel; i--) {
144                         tax = tax.substr(0,  tax.find_last_of(';'));
145                 }
146                 
147                 tax += ";";
148         }
149         catch(exception& e) {
150                 m->errorOut(e, "TaxEqualizer", "truncateTaxonomy");
151                 exit(1);
152         }
153 }
154 /**************************************************************************************************/
155