X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=sequence.cpp;h=752e081e367fc6d572b5622e0b4eb6003a28f9bb;hb=96b36196d49a3d1f6bc49a26a9d2aa2da7ff876e;hp=e9487c519e0d3e0378266eb537d15e28979546dc;hpb=a6c698b20eda3671d22466ab6b98b36331a30804;p=mothur.git diff --git a/sequence.cpp b/sequence.cpp index e9487c5..752e081 100644 --- a/sequence.cpp +++ b/sequence.cpp @@ -7,8 +7,6 @@ * */ -using namespace std; - #include "sequence.hpp" /***********************************************************************/ @@ -23,42 +21,91 @@ Sequence::Sequence(string newName, string sequence) { initialize(); name = newName; - if(sequence.find_first_of('-') != string::npos) { - setAligned(sequence); - isAligned = 1; - } + + //setUnaligned removes any gap characters for us setUnaligned(sequence); + setAligned(sequence); } //******************************************************************************************************************** - +//this function will jump over commented out sequences, but if the last sequence in a file is commented out it makes a blank seq Sequence::Sequence(ifstream& fastaFile){ - - string accession; // provided a file handle to a fasta-formatted sequence file, read in the next - fastaFile >> accession; // accession number and sequence we find... - setName(accession); - char letter; + initialize(); + fastaFile >> name; + name = name.substr(1); string sequence; - while(fastaFile){ - letter= fastaFile.get(); - if(letter == '>'){ - fastaFile.putback(letter); + //read comments + while ((name[0] == '#') && fastaFile) { + while (!fastaFile.eof()) { char c = fastaFile.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there + sequence = getCommentString(fastaFile); + + if (fastaFile) { + fastaFile >> name; + name = name.substr(1); + }else { + name = ""; break; } - else if(isprint(letter)){ - letter = toupper(letter); - if(letter == 'U'){letter = 'T';} - sequence += letter; + } + + //read real sequence + while (!fastaFile.eof()) { char c = fastaFile.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there + + sequence = getSequenceString(fastaFile); + + setAligned(sequence); + //setUnaligned removes any gap characters for us + setUnaligned(sequence); +} +//******************************************************************************************************************** +string Sequence::getSequenceString(ifstream& fastaFile) { + try { + char letter; + string sequence = ""; + + while(fastaFile){ + letter= fastaFile.get(); + if(letter == '>'){ + fastaFile.putback(letter); + break; + } + else if(isprint(letter)){ + letter = toupper(letter); + if(letter == 'U'){letter = 'T';} + sequence += letter; + } } + return sequence; } - - if(sequence.find_first_of('-') != string::npos){ // if there are any gaps in the sequence, assume that it is - setAligned(sequence); // an alignment file + catch(exception& e) { + errorOut(e, "Sequence", "getSequenceString"); + exit(1); + } +} +//******************************************************************************************************************** +//comment can contain '>' so we need to account for that +string Sequence::getCommentString(ifstream& fastaFile) { + try { + char letter; + string sequence = ""; + + while(fastaFile){ + letter=fastaFile.get(); + if((letter == '\r') || (letter == '\n')){ + gobble(fastaFile); //in case its a \r\n situation + break; + } + } + + return sequence; + } + catch(exception& e) { + errorOut(e, "Sequence", "getCommentString"); + exit(1); } - setUnaligned(sequence); // also set the unaligned sequence file } //******************************************************************************************************************** @@ -91,7 +138,7 @@ void Sequence::setName(string seqName) { void Sequence::setUnaligned(string sequence){ - if(sequence.find_first_of('-') != string::npos) { + if(sequence.find_first_of('.') != string::npos || sequence.find_first_of('-') != string::npos) { string temp = ""; for(int j=0;j=0;j--){ if(aligned[j] != '.'){ - endPos = j; + endPos = j + 1; break; } } } + if(isAligned == 0){ endPos = numBases; } + return endPos; } @@ -278,3 +330,20 @@ bool Sequence::getIsAligned(){ } //******************************************************************************************************************** + +void Sequence::reverseComplement(){ + + string temp; + for(int i=numBases-1;i>=0;i--){ + if(unaligned[i] == 'A') { temp += 'T'; } + else if(unaligned[i] == 'T'){ temp += 'A'; } + else if(unaligned[i] == 'G'){ temp += 'C'; } + else if(unaligned[i] == 'C'){ temp += 'G'; } + else { temp += 'N'; } + } + unaligned = temp; + aligned = temp; + +} + +//********************************************************************************************************************