]> git.donarmstrong.com Git - mothur.git/blob - sequence.cpp
changed sequence class so that when constructor is called aligned and unaligned value...
[mothur.git] / sequence.cpp
1 /*
2  *  sequence.cpp
3  *  
4  *
5  *  Created by Pat Schloss on 12/15/08.
6  *  Copyright 2008 Patrick D. Schloss. All rights reserved.
7  *
8  */
9
10 #include "sequence.hpp"
11
12 /***********************************************************************/
13
14 Sequence::Sequence(){
15         initialize();
16 }
17
18 /***********************************************************************/
19
20 Sequence::Sequence(string newName, string sequence) {
21
22         initialize();   
23         name = newName;
24         
25         //setUnaligned removes any gap characters for us
26         setUnaligned(sequence);
27         setAligned(sequence);
28         
29 }
30 //********************************************************************************************************************
31
32 Sequence::Sequence(ifstream& fastaFile){
33
34         initialize();
35         fastaFile >> name;
36         name = name.substr(1);
37         
38         while (!fastaFile.eof())        {       char c = fastaFile.get(); if (c == 10 || c == 13){      break;  }       } // get rest of line if there's any crap there
39
40         char letter;
41         string sequence;
42         
43         while(fastaFile){
44                 letter= fastaFile.get();
45                 if(letter == '>'){
46                         fastaFile.putback(letter);
47                         break;
48                 }
49                 else if(isprint(letter)){
50                         letter = toupper(letter);
51                         if(letter == 'U'){letter = 'T';}
52                         sequence += letter;
53                 }
54         }
55
56         setAligned(sequence);   
57         //setUnaligned removes any gap characters for us                                                
58         setUnaligned(sequence);                                                         
59 }
60
61 //********************************************************************************************************************
62
63 void Sequence::initialize(){
64         
65         name = "";
66         unaligned = "";
67         aligned = "";
68         pairwise = "";
69         
70         numBases = 0;
71         alignmentLength = 0;
72         isAligned = 0;
73         startPos = -1;
74         endPos = -1;
75         longHomoPolymer = -1;
76         ambigBases = -1;
77         
78 }       
79
80 //********************************************************************************************************************
81
82 void Sequence::setName(string seqName) {
83         if(seqName[0] == '>')   {       name = seqName.substr(1);       }
84         else                                    {       name = seqName;                         }
85 }
86
87 //********************************************************************************************************************
88
89 void Sequence::setUnaligned(string sequence){
90         
91         if(sequence.find_first_of('.') != string::npos || sequence.find_first_of('-') != string::npos) {
92                 string temp = "";
93                 for(int j=0;j<sequence.length();j++) {
94                         if(isalpha(sequence[j]))        {       temp += sequence[j];    }
95                 }
96                 unaligned = temp;
97         }
98         else {
99                 unaligned = sequence;
100         }
101         numBases = unaligned.length();
102         
103 }
104
105 //********************************************************************************************************************
106
107 void Sequence::setAligned(string sequence){
108         
109         //if the alignment starts or ends with a gap, replace it with a period to indicate missing data
110         aligned = sequence;
111         alignmentLength = aligned.length();
112
113         if(aligned[0] == '-'){
114                 for(int i=0;i<alignmentLength;i++){
115                         if(aligned[i] == '-'){
116                                 aligned[i] = '.';
117                         }
118                         else{
119                                 break;
120                         }
121                 }
122                 for(int i=alignmentLength-1;i>=0;i--){
123                         if(aligned[i] == '-'){
124                                 aligned[i] = '.';
125                         }
126                         else{
127                                 break;
128                         }
129                 }
130         }
131         isAligned = 1;  
132 }
133
134 //********************************************************************************************************************
135
136 void Sequence::setPairwise(string sequence){
137         pairwise = sequence;
138 }
139
140 //********************************************************************************************************************
141
142 string Sequence::convert2ints() {
143         
144         if(unaligned == "")     {       /* need to throw an error */    }
145         
146         string processed;
147         
148         for(int i=0;i<unaligned.length();i++) {
149                 if(toupper(unaligned[i]) == 'A')                {       processed += '0';       }
150                 else if(toupper(unaligned[i]) == 'C')   {       processed += '1';       }
151                 else if(toupper(unaligned[i]) == 'G')   {       processed += '2';       }
152                 else if(toupper(unaligned[i]) == 'T')   {       processed += '3';       }
153                 else if(toupper(unaligned[i]) == 'U')   {       processed += '3';       }
154                 else                                                                    {       processed += '4';       }
155         }
156         return processed;
157 }
158
159 //********************************************************************************************************************
160
161 string Sequence::getName(){
162         return name;
163 }
164
165 //********************************************************************************************************************
166
167 string Sequence::getAligned(){
168         return aligned;
169 }
170
171 //********************************************************************************************************************
172
173 string Sequence::getPairwise(){
174         return pairwise;
175 }
176
177 //********************************************************************************************************************
178
179 string Sequence::getUnaligned(){
180         return unaligned;
181 }
182
183 //********************************************************************************************************************
184
185 int Sequence::getNumBases(){
186         return numBases;
187 }
188
189 //********************************************************************************************************************
190
191 void Sequence::printSequence(ostream& out){
192
193         out << ">" << name << endl;
194         if(isAligned){
195                 out << aligned << endl;
196         }
197         else{
198                 out << unaligned << endl;
199         }
200 }
201
202 //********************************************************************************************************************
203
204 int Sequence::getAlignLength(){
205         return alignmentLength;
206 }
207
208 //********************************************************************************************************************
209
210 int Sequence::getAmbigBases(){
211         if(ambigBases == -1){
212                 ambigBases = 0;
213                 for(int j=0;j<numBases;j++){
214                         if(unaligned[j] != 'A' && unaligned[j] != 'T' && unaligned[j] != 'G' && unaligned[j] != 'C'){
215                                 ambigBases++;
216                         }
217                 }
218         }       
219         
220         return ambigBases;
221 }
222
223 //********************************************************************************************************************
224
225 int Sequence::getLongHomoPolymer(){
226         if(longHomoPolymer == -1){
227                 longHomoPolymer = 1;
228                 int homoPolymer = 1;
229                 for(int j=1;j<numBases;j++){
230                         if(unaligned[j] == unaligned[j-1]){
231                                 homoPolymer++;
232                         }
233                         else{
234                                 if(homoPolymer > longHomoPolymer){      longHomoPolymer = homoPolymer;  }
235                                 homoPolymer = 1;
236                         }
237                 }
238                 if(homoPolymer > longHomoPolymer){      longHomoPolymer = homoPolymer;  }
239         }
240         return longHomoPolymer;
241 }
242
243 //********************************************************************************************************************
244
245 int Sequence::getStartPos(){
246         if(endPos == -1){
247                 for(int j = 0; j < alignmentLength; j++) {
248                         if(aligned[j] != '.'){
249                                 startPos = j + 1;
250                                 break;
251                         }
252                 }
253         }
254         if(isAligned == 0){     startPos = 1;   }
255
256         return startPos;
257 }
258
259 //********************************************************************************************************************
260
261 int Sequence::getEndPos(){
262         if(endPos == -1){
263                 for(int j=alignmentLength-1;j>=0;j--){
264                         if(aligned[j] != '.'){
265                                 endPos = j + 1;
266                                 break;
267                         }
268                 }
269         }
270         if(isAligned == 0){     endPos = numBases;      }
271         
272         return endPos;
273 }
274
275 //********************************************************************************************************************
276
277 bool Sequence::getIsAligned(){
278         return isAligned;
279 }
280
281 //********************************************************************************************************************
282
283 void Sequence::reverseComplement(){
284
285         string temp;
286         for(int i=numBases-1;i>=0;i--){
287                 if(unaligned[i] == 'A')         {       temp += 'T';    }
288                 else if(unaligned[i] == 'T'){   temp += 'A';    }
289                 else if(unaligned[i] == 'G'){   temp += 'C';    }
290                 else if(unaligned[i] == 'C'){   temp += 'G';    }
291                 else                                            {       temp += 'N';    }
292         }
293         unaligned = temp;
294         
295 }
296
297 //********************************************************************************************************************