5 * Created by Pat Schloss on 12/15/08.
6 * Copyright 2008 Patrick D. Schloss. All rights reserved.
10 #include "sequence.hpp"
12 /***********************************************************************/
14 m = MothurOut::getInstance();
17 /***********************************************************************/
18 Sequence::Sequence(string newName, string sequence) {
20 m = MothurOut::getInstance();
24 //setUnaligned removes any gap characters for us
25 setUnaligned(sequence);
29 m->errorOut(e, "Sequence", "Sequence");
33 /***********************************************************************/
34 Sequence::Sequence(string newName, string sequence, string justUnAligned) {
36 m = MothurOut::getInstance();
40 //setUnaligned removes any gap characters for us
41 setUnaligned(sequence);
44 m->errorOut(e, "Sequence", "Sequence");
49 //********************************************************************************************************************
50 //this function will jump over commented out sequences, but if the last sequence in a file is commented out it makes a blank seq
51 Sequence::Sequence(istringstream& fastaString){
53 m = MothurOut::getInstance();
58 if (name.length() != 0) {
60 name = name.substr(1);
64 while ((name[0] == '#') && fastaString) {
65 while (!fastaString.eof()) { char c = fastaString.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there
66 sequence = getCommentString(fastaString);
70 name = name.substr(1);
77 while (!fastaString.eof()) { char c = fastaString.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there
79 sequence = getSequenceString(fastaString);
81 //setUnaligned removes any gap characters for us
82 setUnaligned(sequence);
83 }else{ m->mothurOut("Error in reading your fastafile, at position " + toString(fastaString.tellg()) + ". Blank name."); m->mothurOutEndLine(); }
87 m->errorOut(e, "Sequence", "Sequence");
91 //********************************************************************************************************************
92 //this function will jump over commented out sequences, but if the last sequence in a file is commented out it makes a blank seq
93 Sequence::Sequence(istringstream& fastaString, string JustUnaligned){
95 m = MothurOut::getInstance();
100 if (name.length() != 0) {
102 name = name.substr(1);
106 while ((name[0] == '#') && fastaString) {
107 while (!fastaString.eof()) { char c = fastaString.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there
108 sequence = getCommentString(fastaString);
112 name = name.substr(1);
119 while (!fastaString.eof()) { char c = fastaString.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there
121 sequence = getSequenceString(fastaString);
123 //setUnaligned removes any gap characters for us
124 setUnaligned(sequence);
125 }else{ m->mothurOut("Error in reading your fastafile, at position " + toString(fastaString.tellg()) + ". Blank name."); m->mothurOutEndLine(); }
127 catch(exception& e) {
128 m->errorOut(e, "Sequence", "Sequence");
134 //********************************************************************************************************************
135 //this function will jump over commented out sequences, but if the last sequence in a file is commented out it makes a blank seq
136 Sequence::Sequence(ifstream& fastaFile){
138 m = MothurOut::getInstance();
142 if (name.length() != 0) {
144 name = name.substr(1);
149 while ((name[0] == '#') && fastaFile) {
150 while (!fastaFile.eof()) { char c = fastaFile.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there
151 sequence = getCommentString(fastaFile);
155 name = name.substr(1);
163 while (!fastaFile.eof()) { char c = fastaFile.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there
165 sequence = getSequenceString(fastaFile);
167 setAligned(sequence);
168 //setUnaligned removes any gap characters for us
169 setUnaligned(sequence);
170 }else{ m->mothurOut("Error in reading your fastafile, at position " + toString(fastaFile.tellg()) + ". Blank name."); m->mothurOutEndLine(); }
173 catch(exception& e) {
174 m->errorOut(e, "Sequence", "Sequence");
178 //********************************************************************************************************************
179 //this function will jump over commented out sequences, but if the last sequence in a file is commented out it makes a blank seq
180 Sequence::Sequence(ifstream& fastaFile, string JustUnaligned){
182 m = MothurOut::getInstance();
186 if (name.length() != 0) {
187 name = name.substr(1);
191 while ((name[0] == '#') && fastaFile) {
192 while (!fastaFile.eof()) { char c = fastaFile.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there
193 sequence = getCommentString(fastaFile);
197 name = name.substr(1);
205 while (!fastaFile.eof()) { char c = fastaFile.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there
207 sequence = getSequenceString(fastaFile);
209 //setUnaligned removes any gap characters for us
210 setUnaligned(sequence);
211 }else{ m->mothurOut("Error in reading your fastafile, at position " + toString(fastaFile.tellg()) + ". Blank name."); m->mothurOutEndLine(); }
214 catch(exception& e) {
215 m->errorOut(e, "Sequence", "Sequence");
220 //********************************************************************************************************************
221 string Sequence::getSequenceString(ifstream& fastaFile) {
224 string sequence = "";
227 letter= fastaFile.get();
229 fastaFile.putback(letter);
232 else if(isprint(letter)){
233 letter = toupper(letter);
234 if(letter == 'U'){letter = 'T';}
241 catch(exception& e) {
242 m->errorOut(e, "Sequence", "getSequenceString");
246 //********************************************************************************************************************
247 //comment can contain '>' so we need to account for that
248 string Sequence::getCommentString(ifstream& fastaFile) {
251 string sequence = "";
254 letter=fastaFile.get();
255 if((letter == '\r') || (letter == '\n')){
256 gobble(fastaFile); //in case its a \r\n situation
263 catch(exception& e) {
264 m->errorOut(e, "Sequence", "getCommentString");
268 //********************************************************************************************************************
269 string Sequence::getSequenceString(istringstream& fastaFile) {
272 string sequence = "";
274 while(!fastaFile.eof()){
275 letter= fastaFile.get();
278 fastaFile.putback(letter);
281 else if(isprint(letter)){
282 letter = toupper(letter);
283 if(letter == 'U'){letter = 'T';}
290 catch(exception& e) {
291 m->errorOut(e, "Sequence", "getSequenceString");
295 //********************************************************************************************************************
296 //comment can contain '>' so we need to account for that
297 string Sequence::getCommentString(istringstream& fastaFile) {
300 string sequence = "";
303 letter=fastaFile.get();
304 if((letter == '\r') || (letter == '\n')){
305 gobble(fastaFile); //in case its a \r\n situation
312 catch(exception& e) {
313 m->errorOut(e, "Sequence", "getCommentString");
317 //********************************************************************************************************************
319 void Sequence::initialize(){
331 longHomoPolymer = -1;
336 //********************************************************************************************************************
338 void Sequence::setName(string seqName) {
339 if(seqName[0] == '>') { name = seqName.substr(1); }
340 else { name = seqName; }
343 //********************************************************************************************************************
345 void Sequence::setUnaligned(string sequence){
347 if(sequence.find_first_of('.') != string::npos || sequence.find_first_of('-') != string::npos) {
349 for(int j=0;j<sequence.length();j++) {
350 if(isalpha(sequence[j])) { temp += sequence[j]; }
355 unaligned = sequence;
357 numBases = unaligned.length();
361 //********************************************************************************************************************
363 void Sequence::setAligned(string sequence){
365 //if the alignment starts or ends with a gap, replace it with a period to indicate missing data
367 alignmentLength = aligned.length();
368 setUnaligned(sequence);
370 if(aligned[0] == '-'){
371 for(int i=0;i<alignmentLength;i++){
372 if(aligned[i] == '-'){
379 for(int i=alignmentLength-1;i>=0;i--){
380 if(aligned[i] == '-'){
391 //********************************************************************************************************************
393 void Sequence::setPairwise(string sequence){
397 //********************************************************************************************************************
399 string Sequence::convert2ints() {
401 if(unaligned == "") { /* need to throw an error */ }
405 for(int i=0;i<unaligned.length();i++) {
406 if(toupper(unaligned[i]) == 'A') { processed += '0'; }
407 else if(toupper(unaligned[i]) == 'C') { processed += '1'; }
408 else if(toupper(unaligned[i]) == 'G') { processed += '2'; }
409 else if(toupper(unaligned[i]) == 'T') { processed += '3'; }
410 else if(toupper(unaligned[i]) == 'U') { processed += '3'; }
411 else { processed += '4'; }
416 //********************************************************************************************************************
418 string Sequence::getName(){
422 //********************************************************************************************************************
424 string Sequence::getAligned(){
425 if(isAligned == 0) { return unaligned; }
426 else { return aligned; }
429 //********************************************************************************************************************
431 string Sequence::getPairwise(){
435 //********************************************************************************************************************
437 string Sequence::getUnaligned(){
441 //********************************************************************************************************************
443 int Sequence::getNumBases(){
447 //********************************************************************************************************************
449 void Sequence::printSequence(ostream& out){
451 out << ">" << name << endl;
453 out << aligned << endl;
456 out << unaligned << endl;
460 //********************************************************************************************************************
462 int Sequence::getAlignLength(){
463 return alignmentLength;
466 //********************************************************************************************************************
468 int Sequence::getAmbigBases(){
469 if(ambigBases == -1){
471 for(int j=0;j<numBases;j++){
472 if(unaligned[j] != 'A' && unaligned[j] != 'T' && unaligned[j] != 'G' && unaligned[j] != 'C'){
481 //********************************************************************************************************************
483 void Sequence::removeAmbigBases(){
485 for(int j=0;j<alignmentLength;j++){
486 if(aligned[j] != 'A' && aligned[j] != 'T' && aligned[j] != 'G' && aligned[j] != 'C'){
490 setUnaligned(aligned);
493 //********************************************************************************************************************
495 int Sequence::getLongHomoPolymer(){
496 if(longHomoPolymer == -1){
499 for(int j=1;j<numBases;j++){
500 if(unaligned[j] == unaligned[j-1]){
504 if(homoPolymer > longHomoPolymer){ longHomoPolymer = homoPolymer; }
508 if(homoPolymer > longHomoPolymer){ longHomoPolymer = homoPolymer; }
510 return longHomoPolymer;
513 //********************************************************************************************************************
515 int Sequence::getStartPos(){
517 for(int j = 0; j < alignmentLength; j++) {
518 if(aligned[j] != '.'){
524 if(isAligned == 0){ startPos = 1; }
529 //********************************************************************************************************************
531 int Sequence::getEndPos(){
533 for(int j=alignmentLength-1;j>=0;j--){
534 if(aligned[j] != '.'){
540 if(isAligned == 0){ endPos = numBases; }
545 //********************************************************************************************************************
547 bool Sequence::getIsAligned(){
551 //********************************************************************************************************************
553 void Sequence::reverseComplement(){
556 for(int i=numBases-1;i>=0;i--){
557 if(unaligned[i] == 'A') { temp += 'T'; }
558 else if(unaligned[i] == 'T'){ temp += 'A'; }
559 else if(unaligned[i] == 'G'){ temp += 'C'; }
560 else if(unaligned[i] == 'C'){ temp += 'G'; }
561 else { temp += 'N'; }
567 /**************************************************************************************************/