5 * Created by Pat Schloss on 12/15/08.
6 * Copyright 2008 Patrick D. Schloss. All rights reserved.
10 #include "sequence.hpp"
12 /***********************************************************************/
14 m = MothurOut::getInstance();
17 /***********************************************************************/
18 Sequence::Sequence(string newName, string sequence) {
20 m = MothurOut::getInstance();
24 //setUnaligned removes any gap characters for us
25 setUnaligned(sequence);
29 m->errorOut(e, "Sequence", "Sequence");
33 /***********************************************************************/
34 Sequence::Sequence(string newName, string sequence, string justUnAligned) {
36 m = MothurOut::getInstance();
40 //setUnaligned removes any gap characters for us
41 setUnaligned(sequence);
44 m->errorOut(e, "Sequence", "Sequence");
49 //********************************************************************************************************************
50 //this function will jump over commented out sequences, but if the last sequence in a file is commented out it makes a blank seq
51 Sequence::Sequence(istringstream& fastaString){
53 m = MothurOut::getInstance();
58 if (name.length() != 0) {
60 name = name.substr(1);
64 while ((name[0] == '#') && fastaString) {
65 while (!fastaString.eof()) { char c = fastaString.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there
66 sequence = getCommentString(fastaString);
70 name = name.substr(1);
77 while (!fastaString.eof()) { char c = fastaString.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there
79 sequence = getSequenceString(fastaString);
81 //setUnaligned removes any gap characters for us
82 setUnaligned(sequence);
83 }else{ m->mothurOut("Error in reading your fastafile, at position " + toString(fastaString.tellg()) + ". Blank name."); m->mothurOutEndLine(); }
87 m->errorOut(e, "Sequence", "Sequence");
91 //********************************************************************************************************************
92 //this function will jump over commented out sequences, but if the last sequence in a file is commented out it makes a blank seq
93 Sequence::Sequence(istringstream& fastaString, string JustUnaligned){
95 m = MothurOut::getInstance();
100 if (name.length() != 0) {
102 name = name.substr(1);
106 while ((name[0] == '#') && fastaString) {
107 while (!fastaString.eof()) { char c = fastaString.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there
108 sequence = getCommentString(fastaString);
112 name = name.substr(1);
119 while (!fastaString.eof()) { char c = fastaString.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there
121 sequence = getSequenceString(fastaString);
123 //setUnaligned removes any gap characters for us
124 setUnaligned(sequence);
125 }else{ m->mothurOut("Error in reading your fastafile, at position " + toString(fastaString.tellg()) + ". Blank name."); m->mothurOutEndLine(); }
128 catch(exception& e) {
129 m->errorOut(e, "Sequence", "Sequence");
135 //********************************************************************************************************************
136 //this function will jump over commented out sequences, but if the last sequence in a file is commented out it makes a blank seq
137 Sequence::Sequence(ifstream& fastaFile){
139 m = MothurOut::getInstance();
143 if (name.length() != 0) {
145 name = name.substr(1);
150 while ((name[0] == '#') && fastaFile) {
151 while (!fastaFile.eof()) { char c = fastaFile.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there
152 sequence = getCommentString(fastaFile);
156 name = name.substr(1);
164 while (!fastaFile.eof()) { char c = fastaFile.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there
166 sequence = getSequenceString(fastaFile);
168 setAligned(sequence);
169 //setUnaligned removes any gap characters for us
170 setUnaligned(sequence);
171 }else{ m->mothurOut("Error in reading your fastafile, at position " + toString(fastaFile.tellg()) + ". Blank name."); m->mothurOutEndLine(); }
174 catch(exception& e) {
175 m->errorOut(e, "Sequence", "Sequence");
179 //********************************************************************************************************************
180 //this function will jump over commented out sequences, but if the last sequence in a file is commented out it makes a blank seq
181 Sequence::Sequence(ifstream& fastaFile, string JustUnaligned){
183 m = MothurOut::getInstance();
187 if (name.length() != 0) {
188 name = name.substr(1);
192 while ((name[0] == '#') && fastaFile) {
193 while (!fastaFile.eof()) { char c = fastaFile.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there
194 sequence = getCommentString(fastaFile);
198 name = name.substr(1);
206 while (!fastaFile.eof()) { char c = fastaFile.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there
208 sequence = getSequenceString(fastaFile);
210 //setUnaligned removes any gap characters for us
211 setUnaligned(sequence);
212 }else{ m->mothurOut("Error in reading your fastafile, at position " + toString(fastaFile.tellg()) + ". Blank name."); m->mothurOutEndLine(); }
215 catch(exception& e) {
216 m->errorOut(e, "Sequence", "Sequence");
221 //********************************************************************************************************************
222 string Sequence::getSequenceString(ifstream& fastaFile) {
225 string sequence = "";
228 letter= fastaFile.get();
230 fastaFile.putback(letter);
233 else if(isprint(letter)){
234 letter = toupper(letter);
235 if(letter == 'U'){letter = 'T';}
242 catch(exception& e) {
243 m->errorOut(e, "Sequence", "getSequenceString");
247 //********************************************************************************************************************
248 //comment can contain '>' so we need to account for that
249 string Sequence::getCommentString(ifstream& fastaFile) {
252 string sequence = "";
255 letter=fastaFile.get();
256 if((letter == '\r') || (letter == '\n')){
257 m->gobble(fastaFile); //in case its a \r\n situation
264 catch(exception& e) {
265 m->errorOut(e, "Sequence", "getCommentString");
269 //********************************************************************************************************************
270 string Sequence::getSequenceString(istringstream& fastaFile) {
273 string sequence = "";
275 while(!fastaFile.eof()){
276 letter= fastaFile.get();
279 fastaFile.putback(letter);
282 else if(isprint(letter)){
283 letter = toupper(letter);
284 if(letter == 'U'){letter = 'T';}
291 catch(exception& e) {
292 m->errorOut(e, "Sequence", "getSequenceString");
296 //********************************************************************************************************************
297 //comment can contain '>' so we need to account for that
298 string Sequence::getCommentString(istringstream& fastaFile) {
301 string sequence = "";
304 letter=fastaFile.get();
305 if((letter == '\r') || (letter == '\n')){
306 m->gobble(fastaFile); //in case its a \r\n situation
313 catch(exception& e) {
314 m->errorOut(e, "Sequence", "getCommentString");
318 //********************************************************************************************************************
320 void Sequence::initialize(){
332 longHomoPolymer = -1;
337 //********************************************************************************************************************
339 void Sequence::setName(string seqName) {
340 if(seqName[0] == '>') { name = seqName.substr(1); }
341 else { name = seqName; }
344 //********************************************************************************************************************
346 void Sequence::setUnaligned(string sequence){
348 if(sequence.find_first_of('.') != string::npos || sequence.find_first_of('-') != string::npos) {
350 for(int j=0;j<sequence.length();j++) {
351 if(isalpha(sequence[j])) { temp += sequence[j]; }
356 unaligned = sequence;
358 numBases = unaligned.length();
362 //********************************************************************************************************************
364 void Sequence::setAligned(string sequence){
366 //if the alignment starts or ends with a gap, replace it with a period to indicate missing data
368 alignmentLength = aligned.length();
369 setUnaligned(sequence);
371 if(aligned[0] == '-'){
372 for(int i=0;i<alignmentLength;i++){
373 if(aligned[i] == '-'){
380 for(int i=alignmentLength-1;i>=0;i--){
381 if(aligned[i] == '-'){
392 //********************************************************************************************************************
394 void Sequence::setPairwise(string sequence){
398 //********************************************************************************************************************
400 string Sequence::convert2ints() {
402 if(unaligned == "") { /* need to throw an error */ }
406 for(int i=0;i<unaligned.length();i++) {
407 if(toupper(unaligned[i]) == 'A') { processed += '0'; }
408 else if(toupper(unaligned[i]) == 'C') { processed += '1'; }
409 else if(toupper(unaligned[i]) == 'G') { processed += '2'; }
410 else if(toupper(unaligned[i]) == 'T') { processed += '3'; }
411 else if(toupper(unaligned[i]) == 'U') { processed += '3'; }
412 else { processed += '4'; }
417 //********************************************************************************************************************
419 string Sequence::getName(){
423 //********************************************************************************************************************
425 string Sequence::getAligned(){
426 if(isAligned == 0) { return unaligned; }
427 else { return aligned; }
430 //********************************************************************************************************************
432 string Sequence::getPairwise(){
436 //********************************************************************************************************************
438 string Sequence::getUnaligned(){
442 //********************************************************************************************************************
444 int Sequence::getNumBases(){
448 //********************************************************************************************************************
450 void Sequence::printSequence(ostream& out){
452 out << ">" << name << endl;
454 out << aligned << endl;
457 out << unaligned << endl;
461 //********************************************************************************************************************
463 int Sequence::getAlignLength(){
464 return alignmentLength;
467 //********************************************************************************************************************
469 int Sequence::getAmbigBases(){
470 if(ambigBases == -1){
472 for(int j=0;j<numBases;j++){
473 if(unaligned[j] != 'A' && unaligned[j] != 'T' && unaligned[j] != 'G' && unaligned[j] != 'C'){
482 //********************************************************************************************************************
484 void Sequence::removeAmbigBases(){
486 for(int j=0;j<alignmentLength;j++){
487 if(aligned[j] != 'A' && aligned[j] != 'T' && aligned[j] != 'G' && aligned[j] != 'C'){
491 setUnaligned(aligned);
494 //********************************************************************************************************************
496 int Sequence::getLongHomoPolymer(){
497 if(longHomoPolymer == -1){
500 for(int j=1;j<numBases;j++){
501 if(unaligned[j] == unaligned[j-1]){
505 if(homoPolymer > longHomoPolymer){ longHomoPolymer = homoPolymer; }
509 if(homoPolymer > longHomoPolymer){ longHomoPolymer = homoPolymer; }
511 return longHomoPolymer;
514 //********************************************************************************************************************
516 int Sequence::getStartPos(){
518 for(int j = 0; j < alignmentLength; j++) {
519 if(aligned[j] != '.'){
525 if(isAligned == 0){ startPos = 1; }
530 //********************************************************************************************************************
532 int Sequence::getEndPos(){
534 for(int j=alignmentLength-1;j>=0;j--){
535 if(aligned[j] != '.'){
541 if(isAligned == 0){ endPos = numBases; }
546 //********************************************************************************************************************
548 bool Sequence::getIsAligned(){
552 //********************************************************************************************************************
554 void Sequence::reverseComplement(){
557 for(int i=numBases-1;i>=0;i--){
558 if(unaligned[i] == 'A') { temp += 'T'; }
559 else if(unaligned[i] == 'T'){ temp += 'A'; }
560 else if(unaligned[i] == 'G'){ temp += 'C'; }
561 else if(unaligned[i] == 'C'){ temp += 'G'; }
562 else { temp += 'N'; }
569 //********************************************************************************************************************
571 void Sequence::trim(int length){
573 if(numBases > length){
574 unaligned = unaligned.substr(0,length);
580 ///**************************************************************************************************/