5 * Created by Pat Schloss on 12/15/08.
6 * Copyright 2008 Patrick D. Schloss. All rights reserved.
10 #include "sequence.hpp"
12 /***********************************************************************/
14 m = MothurOut::getInstance();
17 /***********************************************************************/
18 Sequence::Sequence(string newName, string sequence) {
20 m = MothurOut::getInstance();
24 //setUnaligned removes any gap characters for us
25 setUnaligned(sequence);
29 m->errorOut(e, "Sequence", "Sequence");
33 /***********************************************************************/
34 Sequence::Sequence(string newName, string sequence, string justUnAligned) {
36 m = MothurOut::getInstance();
40 //setUnaligned removes any gap characters for us
41 setUnaligned(sequence);
44 m->errorOut(e, "Sequence", "Sequence");
49 //********************************************************************************************************************
50 //this function will jump over commented out sequences, but if the last sequence in a file is commented out it makes a blank seq
51 Sequence::Sequence(istringstream& fastaString){
53 m = MothurOut::getInstance();
58 if (name.length() != 0) {
60 name = name.substr(1);
64 while ((name[0] == '#') && fastaString) {
65 while (!fastaString.eof()) { char c = fastaString.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there
66 sequence = getCommentString(fastaString);
70 name = name.substr(1);
77 while (!fastaString.eof()) { char c = fastaString.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there
79 sequence = getSequenceString(fastaString);
81 //setUnaligned removes any gap characters for us
82 setUnaligned(sequence);
83 }else{ m->mothurOut("Error in reading your fastafile, at position " + toString(fastaString.tellg()) + ". Blank name."); m->mothurOutEndLine(); }
87 m->errorOut(e, "Sequence", "Sequence");
91 //********************************************************************************************************************
92 //this function will jump over commented out sequences, but if the last sequence in a file is commented out it makes a blank seq
93 Sequence::Sequence(istringstream& fastaString, string JustUnaligned){
95 m = MothurOut::getInstance();
100 if (name.length() != 0) {
102 name = name.substr(1);
106 while ((name[0] == '#') && fastaString) {
107 while (!fastaString.eof()) { char c = fastaString.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there
108 sequence = getCommentString(fastaString);
112 name = name.substr(1);
119 while (!fastaString.eof()) { char c = fastaString.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there
121 sequence = getSequenceString(fastaString);
123 //setUnaligned removes any gap characters for us
124 setUnaligned(sequence);
125 }else{ m->mothurOut("Error in reading your fastafile, at position " + toString(fastaString.tellg()) + ". Blank name."); m->mothurOutEndLine(); }
128 catch(exception& e) {
129 m->errorOut(e, "Sequence", "Sequence");
135 //********************************************************************************************************************
136 //this function will jump over commented out sequences, but if the last sequence in a file is commented out it makes a blank seq
137 Sequence::Sequence(ifstream& fastaFile){
139 m = MothurOut::getInstance();
143 if (name.length() != 0) {
145 name = name.substr(1);
150 while ((name[0] == '#') && fastaFile) {
151 while (!fastaFile.eof()) { char c = fastaFile.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there
152 sequence = getCommentString(fastaFile);
156 name = name.substr(1);
164 while (!fastaFile.eof()) { char c = fastaFile.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there
166 sequence = getSequenceString(fastaFile);
168 setAligned(sequence);
169 //setUnaligned removes any gap characters for us
170 setUnaligned(sequence);
171 }else{ m->mothurOut("Error in reading your fastafile, at position " + toString(fastaFile.tellg()) + ". Blank name."); m->mothurOutEndLine(); }
174 catch(exception& e) {
175 m->errorOut(e, "Sequence", "Sequence");
179 //********************************************************************************************************************
180 //this function will jump over commented out sequences, but if the last sequence in a file is commented out it makes a blank seq
181 Sequence::Sequence(ifstream& fastaFile, string JustUnaligned){
183 m = MothurOut::getInstance();
187 if (name.length() != 0) {
188 name = name.substr(1);
192 while ((name[0] == '#') && fastaFile) {
193 while (!fastaFile.eof()) { char c = fastaFile.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there
194 sequence = getCommentString(fastaFile);
198 name = name.substr(1);
206 while (!fastaFile.eof()) { char c = fastaFile.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there
208 sequence = getSequenceString(fastaFile);
210 //setUnaligned removes any gap characters for us
211 setUnaligned(sequence);
212 }else{ m->mothurOut("Error in reading your fastafile, at position " + toString(fastaFile.tellg()) + ". Blank name."); m->mothurOutEndLine(); }
215 catch(exception& e) {
216 m->errorOut(e, "Sequence", "Sequence");
221 //********************************************************************************************************************
222 string Sequence::getSequenceString(ifstream& fastaFile) {
225 string sequence = "";
228 letter= fastaFile.get();
230 fastaFile.putback(letter);
233 else if(isprint(letter)){
234 letter = toupper(letter);
235 if(letter == 'U'){letter = 'T';}
236 if(letter != '.' && letter != '-' && letter != 'A' && letter != 'T' && letter != 'G' && letter != 'C'){
245 catch(exception& e) {
246 m->errorOut(e, "Sequence", "getSequenceString");
250 //********************************************************************************************************************
251 //comment can contain '>' so we need to account for that
252 string Sequence::getCommentString(ifstream& fastaFile) {
255 string sequence = "";
258 letter=fastaFile.get();
259 if((letter == '\r') || (letter == '\n')){
260 m->gobble(fastaFile); //in case its a \r\n situation
267 catch(exception& e) {
268 m->errorOut(e, "Sequence", "getCommentString");
272 //********************************************************************************************************************
273 string Sequence::getSequenceString(istringstream& fastaFile) {
276 string sequence = "";
278 while(!fastaFile.eof()){
279 letter= fastaFile.get();
282 fastaFile.putback(letter);
285 else if(isprint(letter)){
286 letter = toupper(letter);
287 if(letter == 'U'){letter = 'T';}
294 catch(exception& e) {
295 m->errorOut(e, "Sequence", "getSequenceString");
299 //********************************************************************************************************************
300 //comment can contain '>' so we need to account for that
301 string Sequence::getCommentString(istringstream& fastaFile) {
304 string sequence = "";
307 letter=fastaFile.get();
308 if((letter == '\r') || (letter == '\n')){
309 m->gobble(fastaFile); //in case its a \r\n situation
316 catch(exception& e) {
317 m->errorOut(e, "Sequence", "getCommentString");
321 //********************************************************************************************************************
323 void Sequence::initialize(){
335 longHomoPolymer = -1;
340 //********************************************************************************************************************
342 void Sequence::setName(string seqName) {
343 if(seqName[0] == '>') { name = seqName.substr(1); }
344 else { name = seqName; }
347 //********************************************************************************************************************
349 void Sequence::setUnaligned(string sequence){
351 if(sequence.find_first_of('.') != string::npos || sequence.find_first_of('-') != string::npos) {
353 for(int j=0;j<sequence.length();j++) {
354 if(isalpha(sequence[j])) { temp += sequence[j]; }
359 unaligned = sequence;
361 numBases = unaligned.length();
365 //********************************************************************************************************************
367 void Sequence::setAligned(string sequence){
369 //if the alignment starts or ends with a gap, replace it with a period to indicate missing data
371 alignmentLength = aligned.length();
372 setUnaligned(sequence);
374 if(aligned[0] == '-'){
375 for(int i=0;i<alignmentLength;i++){
376 if(aligned[i] == '-'){
383 for(int i=alignmentLength-1;i>=0;i--){
384 if(aligned[i] == '-'){
395 //********************************************************************************************************************
397 void Sequence::setPairwise(string sequence){
401 //********************************************************************************************************************
403 string Sequence::convert2ints() {
405 if(unaligned == "") { /* need to throw an error */ }
409 for(int i=0;i<unaligned.length();i++) {
410 if(toupper(unaligned[i]) == 'A') { processed += '0'; }
411 else if(toupper(unaligned[i]) == 'C') { processed += '1'; }
412 else if(toupper(unaligned[i]) == 'G') { processed += '2'; }
413 else if(toupper(unaligned[i]) == 'T') { processed += '3'; }
414 else if(toupper(unaligned[i]) == 'U') { processed += '3'; }
415 else { processed += '4'; }
420 //********************************************************************************************************************
422 string Sequence::getName(){
426 //********************************************************************************************************************
428 string Sequence::getAligned(){
429 if(isAligned == 0) { return unaligned; }
430 else { return aligned; }
433 //********************************************************************************************************************
435 string Sequence::getInlineSeq(){
436 return name + '\t' + aligned;
440 //********************************************************************************************************************
442 string Sequence::getPairwise(){
446 //********************************************************************************************************************
448 string Sequence::getUnaligned(){
452 //********************************************************************************************************************
454 int Sequence::getNumBases(){
458 //********************************************************************************************************************
460 void Sequence::printSequence(ostream& out){
462 out << ">" << name << endl;
464 out << aligned << endl;
467 out << unaligned << endl;
471 //********************************************************************************************************************
473 int Sequence::getAlignLength(){
474 return alignmentLength;
477 //********************************************************************************************************************
479 int Sequence::getAmbigBases(){
480 if(ambigBases == -1){
482 for(int j=0;j<numBases;j++){
483 if(unaligned[j] != 'A' && unaligned[j] != 'T' && unaligned[j] != 'G' && unaligned[j] != 'C'){
492 //********************************************************************************************************************
494 void Sequence::removeAmbigBases(){
496 for(int j=0;j<alignmentLength;j++){
497 if(aligned[j] != 'A' && aligned[j] != 'T' && aligned[j] != 'G' && aligned[j] != 'C'){
501 setUnaligned(aligned);
504 //********************************************************************************************************************
506 int Sequence::getLongHomoPolymer(){
507 if(longHomoPolymer == -1){
510 for(int j=1;j<numBases;j++){
511 if(unaligned[j] == unaligned[j-1]){
515 if(homoPolymer > longHomoPolymer){ longHomoPolymer = homoPolymer; }
519 if(homoPolymer > longHomoPolymer){ longHomoPolymer = homoPolymer; }
521 return longHomoPolymer;
524 //********************************************************************************************************************
526 int Sequence::getStartPos(){
528 for(int j = 0; j < alignmentLength; j++) {
529 if(aligned[j] != '.'){
535 if(isAligned == 0){ startPos = 1; }
540 //********************************************************************************************************************
542 void Sequence::padToPos(int start){
544 for(int j = startPos-1; j < start-1; j++) {
551 //********************************************************************************************************************
553 int Sequence::getEndPos(){
555 for(int j=alignmentLength-1;j>=0;j--){
556 if(aligned[j] != '.'){
562 if(isAligned == 0){ endPos = numBases; }
567 //********************************************************************************************************************
569 void Sequence::padFromPos(int end){
571 for(int j = end; j < endPos; j++) {
578 //********************************************************************************************************************
580 bool Sequence::getIsAligned(){
584 //********************************************************************************************************************
586 void Sequence::reverseComplement(){
589 for(int i=numBases-1;i>=0;i--){
590 if(unaligned[i] == 'A') { temp += 'T'; }
591 else if(unaligned[i] == 'T'){ temp += 'A'; }
592 else if(unaligned[i] == 'G'){ temp += 'C'; }
593 else if(unaligned[i] == 'C'){ temp += 'G'; }
594 else { temp += 'N'; }
601 //********************************************************************************************************************
603 void Sequence::trim(int length){
605 if(numBases > length){
606 unaligned = unaligned.substr(0,length);
612 ///**************************************************************************************************/