]> git.donarmstrong.com Git - mothur.git/blob - sequence.cpp
fixed some bugs
[mothur.git] / sequence.cpp
1 /*
2  *  sequence.cpp
3  *  
4  *
5  *  Created by Pat Schloss on 12/15/08.
6  *  Copyright 2008 Patrick D. Schloss. All rights reserved.
7  *
8  */
9
10 #include "sequence.hpp"
11
12 /***********************************************************************/
13
14 Sequence::Sequence(){
15         initialize();
16 }
17
18 /***********************************************************************/
19
20 Sequence::Sequence(string newName, string sequence) {
21
22         initialize();   
23         name = newName;
24         if(sequence.find_first_of('-') != string::npos) {
25                 setAligned(sequence);
26         }
27         setUnaligned(sequence);
28         
29 }
30 //********************************************************************************************************************
31
32 Sequence::Sequence(ifstream& fastaFile){
33         initialize();
34         
35         string accession;                               //      provided a file handle to a fasta-formatted sequence file, read in the next
36         fastaFile >> accession;                 //      accession number and sequence we find...
37         setName(accession);
38
39         char letter;
40         string sequence;
41         
42         while(fastaFile){
43                 letter= fastaFile.get();
44                 if(letter == '>'){
45                         fastaFile.putback(letter);
46                         break;
47                 }
48                 else if(isprint(letter)){
49                         letter = toupper(letter);
50                         if(letter == 'U'){letter = 'T';}
51                         sequence += letter;
52                 }
53                 
54         }
55
56         if(sequence.find_first_of('-') != string::npos){        //      if there are any gaps in the sequence, assume that it is
57                 setAligned(sequence);                                                   //      an alignment file
58         }
59         setUnaligned(sequence);                                                         //      also set the unaligned sequence file
60 }
61
62 //********************************************************************************************************************
63
64 void Sequence::initialize(){
65         
66         name = "";
67         unaligned = "";
68         aligned = "";
69         pairwise = "";
70         
71         numBases = 0;
72         alignmentLength = 0;
73         isAligned = 0;
74         startPos = -1;
75         endPos = -1;
76         longHomoPolymer = -1;
77         ambigBases = -1;
78         
79 }       
80
81 //********************************************************************************************************************
82
83 void Sequence::setName(string seqName) {
84         if(seqName[0] == '>')   {       name = seqName.substr(1);       }
85         else                                    {       name = seqName;                         }
86 }
87
88 //********************************************************************************************************************
89
90 void Sequence::setUnaligned(string sequence){
91         
92         if(sequence.find_first_of('-') != string::npos) {
93                 string temp = "";
94                 for(int j=0;j<sequence.length();j++) {
95                         if(isalpha(sequence[j]))        {       temp += sequence[j];    }
96                 }
97                 unaligned = temp;
98         }
99         else {
100                 unaligned = sequence;
101         }
102         numBases = unaligned.length();
103         
104 }
105
106 //********************************************************************************************************************
107
108 void Sequence::setAligned(string sequence){
109         
110         //if the alignment starts or ends with a gap, replace it with a period to indicate missing data
111         aligned = sequence;
112         alignmentLength = aligned.length();
113
114         if(aligned[0] == '-'){
115                 for(int i=0;i<alignmentLength;i++){
116                         if(aligned[i] == '-'){
117                                 aligned[i] = '.';
118                         }
119                         else{
120                                 break;
121                         }
122                 }
123                 for(int i=alignmentLength-1;i>=0;i--){
124                         if(aligned[i] == '-'){
125                                 aligned[i] = '.';
126                         }
127                         else{
128                                 break;
129                         }
130                 }
131         }
132         isAligned = 1;  
133 }
134
135 //********************************************************************************************************************
136
137 void Sequence::setPairwise(string sequence){
138         pairwise = sequence;
139 }
140
141 //********************************************************************************************************************
142
143 string Sequence::convert2ints() {
144         
145         if(unaligned == "")     {       /* need to throw an error */    }
146         
147         string processed;
148         
149         for(int i=0;i<unaligned.length();i++) {
150                 if(toupper(unaligned[i]) == 'A')                {       processed += '0';       }
151                 else if(toupper(unaligned[i]) == 'C')   {       processed += '1';       }
152                 else if(toupper(unaligned[i]) == 'G')   {       processed += '2';       }
153                 else if(toupper(unaligned[i]) == 'T')   {       processed += '3';       }
154                 else if(toupper(unaligned[i]) == 'U')   {       processed += '3';       }
155                 else                                                                    {       processed += '4';       }
156         }
157         return processed;
158 }
159
160 //********************************************************************************************************************
161
162 string Sequence::getName(){
163         return name;
164 }
165
166 //********************************************************************************************************************
167
168 string Sequence::getAligned(){
169         return aligned;
170 }
171
172 //********************************************************************************************************************
173
174 string Sequence::getPairwise(){
175         return pairwise;
176 }
177
178 //********************************************************************************************************************
179
180 string Sequence::getUnaligned(){
181         return unaligned;
182 }
183
184 //********************************************************************************************************************
185
186 int Sequence::getNumBases(){
187         return numBases;
188 }
189
190 //********************************************************************************************************************
191
192 void Sequence::printSequence(ostream& out){
193
194         out << ">" << name << endl;
195         if(isAligned){
196                 out << aligned << endl;
197         }
198         else{
199                 out << unaligned << endl;
200         }
201 }
202
203 //********************************************************************************************************************
204
205 int Sequence::getAlignLength(){
206         return alignmentLength;
207 }
208
209 //********************************************************************************************************************
210
211 int Sequence::getAmbigBases(){
212         if(ambigBases == -1){
213                 ambigBases = 0;
214                 for(int j=0;j<numBases;j++){
215                         if(unaligned[j] != 'A' && unaligned[j] != 'T' && unaligned[j] != 'G' && unaligned[j] != 'C'){
216                                 ambigBases++;
217                         }
218                 }
219         }       
220         
221         return ambigBases;
222 }
223
224 //********************************************************************************************************************
225
226 int Sequence::getLongHomoPolymer(){
227         if(longHomoPolymer == -1){
228                 longHomoPolymer = 1;
229                 int homoPolymer = 1;
230                 for(int j=1;j<numBases;j++){
231                         if(unaligned[j] == unaligned[j-1]){
232                                 homoPolymer++;
233                         }
234                         else{
235                                 if(homoPolymer > longHomoPolymer){      longHomoPolymer = homoPolymer;  }
236                                 homoPolymer = 1;
237                         }
238                 }
239                 if(homoPolymer > longHomoPolymer){      longHomoPolymer = homoPolymer;  }
240         }
241         return longHomoPolymer;
242 }
243
244 //********************************************************************************************************************
245
246 int Sequence::getStartPos(){
247         if(endPos == -1){
248                 for(int j = 0; j < alignmentLength; j++) {
249                         if(aligned[j] != '.'){
250                                 startPos = j + 1;
251                                 break;
252                         }
253                 }
254         }
255         if(isAligned == 0){     startPos = 1;   }
256
257         return startPos;
258 }
259
260 //********************************************************************************************************************
261
262 int Sequence::getEndPos(){
263         if(endPos == -1){
264                 for(int j=alignmentLength-1;j>=0;j--){
265                         if(aligned[j] != '.'){
266                                 endPos = j + 1;
267                                 break;
268                         }
269                 }
270         }
271         if(isAligned == 0){     endPos = numBases;      }
272         
273         return endPos;
274 }
275
276 //********************************************************************************************************************
277
278 bool Sequence::getIsAligned(){
279         return isAligned;
280 }
281
282 //********************************************************************************************************************
283
284 void Sequence::reverseComplement(){
285
286         string temp;
287         for(int i=numBases-1;i>=0;i--){
288                 if(unaligned[i] == 'A')         {       temp += 'T';    }
289                 else if(unaligned[i] == 'T'){   temp += 'A';    }
290                 else if(unaligned[i] == 'G'){   temp += 'C';    }
291                 else if(unaligned[i] == 'C'){   temp += 'G';    }
292                 else                                            {       temp += 'N';    }
293         }
294         unaligned = temp;
295         
296 }
297
298 //********************************************************************************************************************