*
*/
-using namespace std;
-
#include "sequence.hpp"
/***********************************************************************/
initialize();
name = newName;
- if(sequence.find_first_of('-') != string::npos) {
- setAligned(sequence);
- }
+
+ //setUnaligned removes any gap characters for us
setUnaligned(sequence);
+ setAligned(sequence);
}
//********************************************************************************************************************
-
+//this function will jump over commented out sequences, but if the last sequence in a file is commented out it makes a blank seq
Sequence::Sequence(ifstream& fastaFile){
- initialize();
-
- string accession; // provided a file handle to a fasta-formatted sequence file, read in the next
- fastaFile >> accession; // accession number and sequence we find...
- setName(accession);
- char letter;
+ initialize();
+ fastaFile >> name;
+ name = name.substr(1);
string sequence;
- while(fastaFile){
- letter= fastaFile.get();
- if(letter == '>'){
- fastaFile.putback(letter);
+ //read comments
+ while ((name[0] == '#') && fastaFile) {
+ while (!fastaFile.eof()) { char c = fastaFile.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there
+ sequence = getCommentString(fastaFile);
+
+ if (fastaFile) {
+ fastaFile >> name;
+ name = name.substr(1);
+ }else {
+ name = "";
break;
}
- else if(isprint(letter)){
- letter = toupper(letter);
- if(letter == 'U'){letter = 'T';}
- sequence += letter;
+ }
+
+ //read real sequence
+ while (!fastaFile.eof()) { char c = fastaFile.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there
+
+ sequence = getSequenceString(fastaFile);
+
+ setAligned(sequence);
+ //setUnaligned removes any gap characters for us
+ setUnaligned(sequence);
+}
+//********************************************************************************************************************
+string Sequence::getSequenceString(ifstream& fastaFile) {
+ try {
+ char letter;
+ string sequence = "";
+
+ while(fastaFile){
+ letter= fastaFile.get();
+ if(letter == '>'){
+ fastaFile.putback(letter);
+ break;
+ }
+ else if(isprint(letter)){
+ letter = toupper(letter);
+ if(letter == 'U'){letter = 'T';}
+ sequence += letter;
+ }
}
+ return sequence;
}
-
- if(sequence.find_first_of('-') != string::npos){ // if there are any gaps in the sequence, assume that it is
- setAligned(sequence); // an alignment file
+ catch(exception& e) {
+ errorOut(e, "Sequence", "getSequenceString");
+ exit(1);
+ }
+}
+//********************************************************************************************************************
+//comment can contain '>' so we need to account for that
+string Sequence::getCommentString(ifstream& fastaFile) {
+ try {
+ char letter;
+ string sequence = "";
+
+ while(fastaFile){
+ letter=fastaFile.get();
+ if((letter == '\r') || (letter == '\n')){
+ gobble(fastaFile); //in case its a \r\n situation
+ break;
+ }
+ }
+
+ return sequence;
+ }
+ catch(exception& e) {
+ errorOut(e, "Sequence", "getCommentString");
+ exit(1);
}
- setUnaligned(sequence); // also set the unaligned sequence file
}
//********************************************************************************************************************
void Sequence::setUnaligned(string sequence){
- if(sequence.find_first_of('-') != string::npos) {
+ if(sequence.find_first_of('.') != string::npos || sequence.find_first_of('-') != string::npos) {
string temp = "";
for(int j=0;j<sequence.length();j++) {
if(isalpha(sequence[j])) { temp += sequence[j]; }
//if the alignment starts or ends with a gap, replace it with a period to indicate missing data
aligned = sequence;
alignmentLength = aligned.length();
+ setUnaligned(sequence);
if(aligned[0] == '-'){
for(int i=0;i<alignmentLength;i++){
else { temp += 'N'; }
}
unaligned = temp;
+ aligned = temp;
}