]> git.donarmstrong.com Git - mothur.git/blob - sequence.cpp
fixed a bug in calculating the # of ambig bases
[mothur.git] / sequence.cpp
1 /*
2  *  sequence.cpp
3  *  
4  *
5  *  Created by Pat Schloss on 12/15/08.
6  *  Copyright 2008 Patrick D. Schloss. All rights reserved.
7  *
8  */
9
10 using namespace std;
11
12 #include "sequence.hpp"
13
14 /***********************************************************************/
15
16 Sequence::Sequence(){
17         initialize();
18 }
19
20 /***********************************************************************/
21
22 Sequence::Sequence(string newName, string sequence) {
23
24         initialize();   
25         name = newName;
26         if(sequence.find_first_of('-') != string::npos) {
27                 setAligned(sequence);
28                 isAligned = 1;
29         }
30         setUnaligned(sequence);
31         
32 }
33 //********************************************************************************************************************
34
35 Sequence::Sequence(ifstream& fastaFile){
36         
37         string accession;                               //      provided a file handle to a fasta-formatted sequence file, read in the next
38         fastaFile >> accession;                 //      accession number and sequence we find...
39         setName(accession);
40
41         char letter;
42         string sequence;
43         
44         while(fastaFile){
45                 letter= fastaFile.get();
46                 if(letter == '>'){
47                         fastaFile.putback(letter);
48                         break;
49                 }
50                 else if(isprint(letter)){
51                         letter = toupper(letter);
52                         if(letter == 'U'){letter = 'T';}
53                         sequence += letter;
54                 }
55                 
56         }
57
58         if(sequence.find_first_of('-') != string::npos){        //      if there are any gaps in the sequence, assume that it is
59                 setAligned(sequence);                                                   //      an alignment file
60         }
61         setUnaligned(sequence);                                                         //      also set the unaligned sequence file
62 }
63
64 //********************************************************************************************************************
65
66 void Sequence::initialize(){
67         
68         name = "";
69         unaligned = "";
70         aligned = "";
71         pairwise = "";
72         
73         numBases = 0;
74         alignmentLength = 0;
75         isAligned = 0;
76         startPos = -1;
77         endPos = -1;
78         longHomoPolymer = -1;
79         ambigBases = -1;
80         
81 }       
82
83 //********************************************************************************************************************
84
85 void Sequence::setName(string seqName) {
86         if(seqName[0] == '>')   {       name = seqName.substr(1);       }
87         else                                    {       name = seqName;                         }
88 }
89
90 //********************************************************************************************************************
91
92 void Sequence::setUnaligned(string sequence){
93         
94         if(sequence.find_first_of('-') != string::npos) {
95                 string temp = "";
96                 for(int j=0;j<sequence.length();j++) {
97                         if(isalpha(sequence[j]))        {       temp += sequence[j];    }
98                 }
99                 unaligned = temp;
100         }
101         else {
102                 unaligned = sequence;
103         }
104         numBases = unaligned.length();
105         
106 }
107
108 //********************************************************************************************************************
109
110 void Sequence::setAligned(string sequence){
111         
112         //if the alignment starts or ends with a gap, replace it with a period to indicate missing data
113         aligned = sequence;
114         alignmentLength = aligned.length();
115
116         if(aligned[0] == '-'){
117                 for(int i=0;i<alignmentLength;i++){
118                         if(aligned[i] == '-'){
119                                 aligned[i] = '.';
120                         }
121                         else{
122                                 break;
123                         }
124                 }
125                 for(int i=alignmentLength-1;i>=0;i--){
126                         if(aligned[i] == '-'){
127                                 aligned[i] = '.';
128                         }
129                         else{
130                                 break;
131                         }
132                 }
133         }
134         
135 }
136
137 //********************************************************************************************************************
138
139 void Sequence::setPairwise(string sequence){
140         pairwise = sequence;
141 }
142
143 //********************************************************************************************************************
144
145 string Sequence::convert2ints() {
146         
147         if(unaligned == "")     {       /* need to throw an error */    }
148         
149         string processed;
150         
151         for(int i=0;i<unaligned.length();i++) {
152                 if(toupper(unaligned[i]) == 'A')                {       processed += '0';       }
153                 else if(toupper(unaligned[i]) == 'C')   {       processed += '1';       }
154                 else if(toupper(unaligned[i]) == 'G')   {       processed += '2';       }
155                 else if(toupper(unaligned[i]) == 'T')   {       processed += '3';       }
156                 else if(toupper(unaligned[i]) == 'U')   {       processed += '3';       }
157                 else                                                                    {       processed += '4';       }
158         }
159         return processed;
160 }
161
162 //********************************************************************************************************************
163
164 string Sequence::getName(){
165         return name;
166 }
167
168 //********************************************************************************************************************
169
170 string Sequence::getAligned(){
171         return aligned;
172 }
173
174 //********************************************************************************************************************
175
176 string Sequence::getPairwise(){
177         return pairwise;
178 }
179
180 //********************************************************************************************************************
181
182 string Sequence::getUnaligned(){
183         return unaligned;
184 }
185
186 //********************************************************************************************************************
187
188 int Sequence::getNumBases(){
189         return numBases;
190 }
191
192 //********************************************************************************************************************
193
194 void Sequence::printSequence(ostream& out){
195
196         out << ">" << name << endl;
197         if(isAligned){
198                 out << aligned << endl;
199         }
200         else{
201                 out << unaligned << endl;
202         }
203 }
204
205 //********************************************************************************************************************
206
207 int Sequence::getAlignLength(){
208         return alignmentLength;
209 }
210
211 //********************************************************************************************************************
212
213 int Sequence::getAmbigBases(){
214         if(ambigBases == -1){
215                 ambigBases = 0;
216                 for(int j=0;j<numBases;j++){
217                         if(unaligned[j] != 'A' && unaligned[j] != 'T' && unaligned[j] != 'G' && unaligned[j] != 'C'){
218                                 ambigBases++;
219                         }
220                 }
221         }       
222         
223         return ambigBases;
224 }
225
226 //********************************************************************************************************************
227
228 int Sequence::getLongHomoPolymer(){
229         if(longHomoPolymer == -1){
230                 longHomoPolymer = 1;
231                 int homoPolymer = 1;
232                 for(int j=1;j<numBases;j++){
233                         if(unaligned[j] == unaligned[j-1]){
234                                 homoPolymer++;
235                         }
236                         else{
237                                 if(homoPolymer > longHomoPolymer){      longHomoPolymer = homoPolymer;  }
238                                 homoPolymer = 1;
239                         }
240                 }
241                 if(homoPolymer > longHomoPolymer){      longHomoPolymer = homoPolymer;  }
242         }
243         return longHomoPolymer;
244 }
245
246 //********************************************************************************************************************
247
248 int Sequence::getStartPos(){
249         if(endPos == -1){
250                 for(int j = 0; j < alignmentLength; j++) {
251                         if(aligned[j] != '.'){
252                                 startPos = j;
253                                 break;
254                         }
255                 }
256         }       
257         return startPos;
258 }
259
260 //********************************************************************************************************************
261
262 int Sequence::getEndPos(){
263         if(endPos == -1){
264                 for(int j=alignmentLength-1;j>=0;j--){
265                         if(aligned[j] != '.'){
266                                 endPos = j;
267                                 break;
268                         }
269                 }
270         }
271         return endPos;
272 }
273
274 //********************************************************************************************************************
275
276 bool Sequence::getIsAligned(){
277         return isAligned;
278 }
279
280 //********************************************************************************************************************