5 * Created by Pat Schloss on 12/15/08.
6 * Copyright 2008 Patrick D. Schloss. All rights reserved.
10 #include "sequence.hpp"
12 /***********************************************************************/
18 /***********************************************************************/
20 Sequence::Sequence(string newName, string sequence) {
24 if(sequence.find_first_of('-') != string::npos) {
27 setUnaligned(sequence);
30 //********************************************************************************************************************
32 Sequence::Sequence(ifstream& fastaFile){
36 name = name.substr(1);
39 while ((c = fastaFile.get()) != EOF) { if (c == 10){ break; } } // get rest of line if there's any crap there
45 letter= fastaFile.get();
47 fastaFile.putback(letter);
50 else if(isprint(letter)){
51 letter = toupper(letter);
52 if(letter == 'U'){letter = 'T';}
57 if(sequence.find_first_of('-') != string::npos){ // if there are any gaps in the sequence, assume that it is
58 setAligned(sequence); // an alignment file
60 setUnaligned(sequence); // also set the unaligned sequence file
63 //********************************************************************************************************************
65 void Sequence::initialize(){
82 //********************************************************************************************************************
84 void Sequence::setName(string seqName) {
85 if(seqName[0] == '>') { name = seqName.substr(1); }
86 else { name = seqName; }
89 //********************************************************************************************************************
91 void Sequence::setUnaligned(string sequence){
93 if(sequence.find_first_of('-') != string::npos) {
95 for(int j=0;j<sequence.length();j++) {
96 if(isalpha(sequence[j])) { temp += sequence[j]; }
101 unaligned = sequence;
103 numBases = unaligned.length();
107 //********************************************************************************************************************
109 void Sequence::setAligned(string sequence){
111 //if the alignment starts or ends with a gap, replace it with a period to indicate missing data
113 alignmentLength = aligned.length();
115 if(aligned[0] == '-'){
116 for(int i=0;i<alignmentLength;i++){
117 if(aligned[i] == '-'){
124 for(int i=alignmentLength-1;i>=0;i--){
125 if(aligned[i] == '-'){
136 //********************************************************************************************************************
138 void Sequence::setPairwise(string sequence){
142 //********************************************************************************************************************
144 string Sequence::convert2ints() {
146 if(unaligned == "") { /* need to throw an error */ }
150 for(int i=0;i<unaligned.length();i++) {
151 if(toupper(unaligned[i]) == 'A') { processed += '0'; }
152 else if(toupper(unaligned[i]) == 'C') { processed += '1'; }
153 else if(toupper(unaligned[i]) == 'G') { processed += '2'; }
154 else if(toupper(unaligned[i]) == 'T') { processed += '3'; }
155 else if(toupper(unaligned[i]) == 'U') { processed += '3'; }
156 else { processed += '4'; }
161 //********************************************************************************************************************
163 string Sequence::getName(){
167 //********************************************************************************************************************
169 string Sequence::getAligned(){
173 //********************************************************************************************************************
175 string Sequence::getPairwise(){
179 //********************************************************************************************************************
181 string Sequence::getUnaligned(){
185 //********************************************************************************************************************
187 int Sequence::getNumBases(){
191 //********************************************************************************************************************
193 void Sequence::printSequence(ostream& out){
195 out << ">" << name << endl;
197 out << aligned << endl;
200 out << unaligned << endl;
204 //********************************************************************************************************************
206 int Sequence::getAlignLength(){
207 return alignmentLength;
210 //********************************************************************************************************************
212 int Sequence::getAmbigBases(){
213 if(ambigBases == -1){
215 for(int j=0;j<numBases;j++){
216 if(unaligned[j] != 'A' && unaligned[j] != 'T' && unaligned[j] != 'G' && unaligned[j] != 'C'){
225 //********************************************************************************************************************
227 int Sequence::getLongHomoPolymer(){
228 if(longHomoPolymer == -1){
231 for(int j=1;j<numBases;j++){
232 if(unaligned[j] == unaligned[j-1]){
236 if(homoPolymer > longHomoPolymer){ longHomoPolymer = homoPolymer; }
240 if(homoPolymer > longHomoPolymer){ longHomoPolymer = homoPolymer; }
242 return longHomoPolymer;
245 //********************************************************************************************************************
247 int Sequence::getStartPos(){
249 for(int j = 0; j < alignmentLength; j++) {
250 if(aligned[j] != '.'){
256 if(isAligned == 0){ startPos = 1; }
261 //********************************************************************************************************************
263 int Sequence::getEndPos(){
265 for(int j=alignmentLength-1;j>=0;j--){
266 if(aligned[j] != '.'){
272 if(isAligned == 0){ endPos = numBases; }
277 //********************************************************************************************************************
279 bool Sequence::getIsAligned(){
283 //********************************************************************************************************************
285 void Sequence::reverseComplement(){
288 for(int i=numBases-1;i>=0;i--){
289 if(unaligned[i] == 'A') { temp += 'T'; }
290 else if(unaligned[i] == 'T'){ temp += 'A'; }
291 else if(unaligned[i] == 'G'){ temp += 'C'; }
292 else if(unaligned[i] == 'C'){ temp += 'G'; }
293 else { temp += 'N'; }
299 //********************************************************************************************************************