]> git.donarmstrong.com Git - mothur.git/blob - sequence.cpp
added trim.seqs command
[mothur.git] / sequence.cpp
1 /*
2  *  sequence.cpp
3  *  
4  *
5  *  Created by Pat Schloss on 12/15/08.
6  *  Copyright 2008 Patrick D. Schloss. All rights reserved.
7  *
8  */
9
10 #include "sequence.hpp"
11
12 /***********************************************************************/
13
14 Sequence::Sequence(){
15         initialize();
16 }
17
18 /***********************************************************************/
19
20 Sequence::Sequence(string newName, string sequence) {
21
22         initialize();   
23         name = newName;
24         if(sequence.find_first_of('-') != string::npos) {
25                 setAligned(sequence);
26         }
27         setUnaligned(sequence);
28         
29 }
30 //********************************************************************************************************************
31
32 Sequence::Sequence(ifstream& fastaFile){
33
34         initialize();
35         fastaFile >> name;
36         name = name.substr(1);
37         char c;
38         
39         while ((c = fastaFile.get()) != EOF)    {       if (c == 10){   break;  }       } // get rest of line if there's any crap there
40         
41         char letter;
42         string sequence;
43         
44         while(fastaFile){
45                 letter= fastaFile.get();
46                 if(letter == '>'){
47                         fastaFile.putback(letter);
48                         break;
49                 }
50                 else if(isprint(letter)){
51                         letter = toupper(letter);
52                         if(letter == 'U'){letter = 'T';}
53                         sequence += letter;
54                 }
55         }
56
57         if(sequence.find_first_of('-') != string::npos){        //      if there are any gaps in the sequence, assume that it is
58                 setAligned(sequence);                                                   //      an alignment file
59         }
60         setUnaligned(sequence);                                                         //      also set the unaligned sequence file
61 }
62
63 //********************************************************************************************************************
64
65 void Sequence::initialize(){
66         
67         name = "";
68         unaligned = "";
69         aligned = "";
70         pairwise = "";
71         
72         numBases = 0;
73         alignmentLength = 0;
74         isAligned = 0;
75         startPos = -1;
76         endPos = -1;
77         longHomoPolymer = -1;
78         ambigBases = -1;
79         
80 }       
81
82 //********************************************************************************************************************
83
84 void Sequence::setName(string seqName) {
85         if(seqName[0] == '>')   {       name = seqName.substr(1);       }
86         else                                    {       name = seqName;                         }
87 }
88
89 //********************************************************************************************************************
90
91 void Sequence::setUnaligned(string sequence){
92         
93         if(sequence.find_first_of('-') != string::npos) {
94                 string temp = "";
95                 for(int j=0;j<sequence.length();j++) {
96                         if(isalpha(sequence[j]))        {       temp += sequence[j];    }
97                 }
98                 unaligned = temp;
99         }
100         else {
101                 unaligned = sequence;
102         }
103         numBases = unaligned.length();
104         
105 }
106
107 //********************************************************************************************************************
108
109 void Sequence::setAligned(string sequence){
110         
111         //if the alignment starts or ends with a gap, replace it with a period to indicate missing data
112         aligned = sequence;
113         alignmentLength = aligned.length();
114
115         if(aligned[0] == '-'){
116                 for(int i=0;i<alignmentLength;i++){
117                         if(aligned[i] == '-'){
118                                 aligned[i] = '.';
119                         }
120                         else{
121                                 break;
122                         }
123                 }
124                 for(int i=alignmentLength-1;i>=0;i--){
125                         if(aligned[i] == '-'){
126                                 aligned[i] = '.';
127                         }
128                         else{
129                                 break;
130                         }
131                 }
132         }
133         isAligned = 1;  
134 }
135
136 //********************************************************************************************************************
137
138 void Sequence::setPairwise(string sequence){
139         pairwise = sequence;
140 }
141
142 //********************************************************************************************************************
143
144 string Sequence::convert2ints() {
145         
146         if(unaligned == "")     {       /* need to throw an error */    }
147         
148         string processed;
149         
150         for(int i=0;i<unaligned.length();i++) {
151                 if(toupper(unaligned[i]) == 'A')                {       processed += '0';       }
152                 else if(toupper(unaligned[i]) == 'C')   {       processed += '1';       }
153                 else if(toupper(unaligned[i]) == 'G')   {       processed += '2';       }
154                 else if(toupper(unaligned[i]) == 'T')   {       processed += '3';       }
155                 else if(toupper(unaligned[i]) == 'U')   {       processed += '3';       }
156                 else                                                                    {       processed += '4';       }
157         }
158         return processed;
159 }
160
161 //********************************************************************************************************************
162
163 string Sequence::getName(){
164         return name;
165 }
166
167 //********************************************************************************************************************
168
169 string Sequence::getAligned(){
170         return aligned;
171 }
172
173 //********************************************************************************************************************
174
175 string Sequence::getPairwise(){
176         return pairwise;
177 }
178
179 //********************************************************************************************************************
180
181 string Sequence::getUnaligned(){
182         return unaligned;
183 }
184
185 //********************************************************************************************************************
186
187 int Sequence::getNumBases(){
188         return numBases;
189 }
190
191 //********************************************************************************************************************
192
193 void Sequence::printSequence(ostream& out){
194
195         out << ">" << name << endl;
196         if(isAligned){
197                 out << aligned << endl;
198         }
199         else{
200                 out << unaligned << endl;
201         }
202 }
203
204 //********************************************************************************************************************
205
206 int Sequence::getAlignLength(){
207         return alignmentLength;
208 }
209
210 //********************************************************************************************************************
211
212 int Sequence::getAmbigBases(){
213         if(ambigBases == -1){
214                 ambigBases = 0;
215                 for(int j=0;j<numBases;j++){
216                         if(unaligned[j] != 'A' && unaligned[j] != 'T' && unaligned[j] != 'G' && unaligned[j] != 'C'){
217                                 ambigBases++;
218                         }
219                 }
220         }       
221         
222         return ambigBases;
223 }
224
225 //********************************************************************************************************************
226
227 int Sequence::getLongHomoPolymer(){
228         if(longHomoPolymer == -1){
229                 longHomoPolymer = 1;
230                 int homoPolymer = 1;
231                 for(int j=1;j<numBases;j++){
232                         if(unaligned[j] == unaligned[j-1]){
233                                 homoPolymer++;
234                         }
235                         else{
236                                 if(homoPolymer > longHomoPolymer){      longHomoPolymer = homoPolymer;  }
237                                 homoPolymer = 1;
238                         }
239                 }
240                 if(homoPolymer > longHomoPolymer){      longHomoPolymer = homoPolymer;  }
241         }
242         return longHomoPolymer;
243 }
244
245 //********************************************************************************************************************
246
247 int Sequence::getStartPos(){
248         if(endPos == -1){
249                 for(int j = 0; j < alignmentLength; j++) {
250                         if(aligned[j] != '.'){
251                                 startPos = j + 1;
252                                 break;
253                         }
254                 }
255         }
256         if(isAligned == 0){     startPos = 1;   }
257
258         return startPos;
259 }
260
261 //********************************************************************************************************************
262
263 int Sequence::getEndPos(){
264         if(endPos == -1){
265                 for(int j=alignmentLength-1;j>=0;j--){
266                         if(aligned[j] != '.'){
267                                 endPos = j + 1;
268                                 break;
269                         }
270                 }
271         }
272         if(isAligned == 0){     endPos = numBases;      }
273         
274         return endPos;
275 }
276
277 //********************************************************************************************************************
278
279 bool Sequence::getIsAligned(){
280         return isAligned;
281 }
282
283 //********************************************************************************************************************
284
285 void Sequence::reverseComplement(){
286
287         string temp;
288         for(int i=numBases-1;i>=0;i--){
289                 if(unaligned[i] == 'A')         {       temp += 'T';    }
290                 else if(unaligned[i] == 'T'){   temp += 'A';    }
291                 else if(unaligned[i] == 'G'){   temp += 'C';    }
292                 else if(unaligned[i] == 'C'){   temp += 'G';    }
293                 else                                            {       temp += 'N';    }
294         }
295         unaligned = temp;
296         
297 }
298
299 //********************************************************************************************************************