]> git.donarmstrong.com Git - mothur.git/blobdiff - sequence.cpp
added filter.shared command. fixed lci and uci for thetayc calc
[mothur.git] / sequence.cpp
index 9cdbfb91cf6ba148dbf4e2f5b4252b33604c906c..96662bc36d4b77e1cb8b7a884f4e8460969033f4 100644 (file)
@@ -191,6 +191,59 @@ Sequence::Sequence(ifstream& fastaFile){
 }
 //********************************************************************************************************************
 //this function will jump over commented out sequences, but if the last sequence in a file is commented out it makes a blank seq
+Sequence::Sequence(ifstream& fastaFile, string& extraInfo, bool getInfo){
+       try {
+               m = MothurOut::getInstance();
+               initialize();
+               fastaFile >> name;
+        extraInfo = "";
+               
+               if (name.length() != 0) { 
+            
+                       name = name.substr(1); 
+                       
+                       string sequence;
+            
+                       //read comments
+                       while ((name[0] == '#') && fastaFile) { 
+                               while (!fastaFile.eof())        {       char c = fastaFile.get(); if (c == 10 || c == 13){      break;  }       } // get rest of line if there's any crap there
+                               sequence = getCommentString(fastaFile);
+                               
+                               if (fastaFile) {  
+                                       fastaFile >> name;  
+                                       name = name.substr(1);  
+                               }else { 
+                                       name = "";
+                                       break;
+                               }
+                       }
+                       
+                       //read info after sequence name
+                       while (!fastaFile.eof())        {       
+                char c = fastaFile.get(); 
+                if (c == 10 || c == 13){  break;       }       
+                extraInfo += c;
+            } 
+                       
+                       int numAmbig = 0;
+                       sequence = getSequenceString(fastaFile, numAmbig);
+                       
+                       setAligned(sequence);   
+                       //setUnaligned removes any gap characters for us                                                
+                       setUnaligned(sequence); 
+                       
+                       if ((numAmbig / (float) numBases) > 0.25) { m->mothurOut("[WARNING]: We found more than 25% of the bases in sequence " + name + " to be ambiguous. Mothur is not setup to process protein sequences."); m->mothurOutEndLine(); }
+                       
+               }else{ m->mothurOut("Error in reading your fastafile, at position " + toString(fastaFile.tellg()) + ". Blank name."); m->mothurOutEndLine(); }
+        
+       }
+       catch(exception& e) {
+               m->errorOut(e, "Sequence", "Sequence");
+               exit(1);
+       }                                                       
+}
+//********************************************************************************************************************
+//this function will jump over commented out sequences, but if the last sequence in a file is commented out it makes a blank seq
 Sequence::Sequence(ifstream& fastaFile, string JustUnaligned){
        try {
                m = MothurOut::getInstance();
@@ -247,7 +300,7 @@ string Sequence::getSequenceString(ifstream& fastaFile, int& numAmbig) {
                        if(letter == '>'){
                                fastaFile.putback(letter);
                                break;
-                       }
+                       }else if (letter == ' ') {;}
                        else if(isprint(letter)){
                                letter = toupper(letter);
                                if(letter == 'U'){letter = 'T';}
@@ -301,7 +354,7 @@ string Sequence::getSequenceString(istringstream& fastaFile, int& numAmbig) {
                        if(letter == '>'){
                                fastaFile.putback(letter);
                                break;
-                       }
+                       }else if (letter == ' ') {;}
                        else if(isprint(letter)){
                                letter = toupper(letter);
                                if(letter == 'U'){letter = 'T';}
@@ -550,7 +603,7 @@ int Sequence::getLongHomoPolymer(){
 int Sequence::getStartPos(){
        if(startPos == -1){
                for(int j = 0; j < alignmentLength; j++) {
-                       if(aligned[j] != '.'){
+                       if((aligned[j] != '.')&&(aligned[j] != '-')){
                                startPos = j + 1;
                                break;
                        }
@@ -615,7 +668,7 @@ int Sequence::filterFromPos(int end){
 int Sequence::getEndPos(){
        if(endPos == -1){
                for(int j=alignmentLength-1;j>=0;j--){
-                       if(aligned[j] != '.'){
+                       if((aligned[j] != '.')&&(aligned[j] != '-')){
                                endPos = j + 1;
                                break;
                        }
@@ -629,7 +682,7 @@ int Sequence::getEndPos(){
 //********************************************************************************************************************
 
 void Sequence::padFromPos(int end){
-       cout << end << '\t' << endPos << endl;
+       //cout << end << '\t' << endPos << endl;
        for(int j = end; j < endPos; j++) {
                aligned[j] = '.';
        }