]> git.donarmstrong.com Git - mothur.git/blobdiff - sequence.cpp
added otu.association command. added calcSpearman, calcKendall and calcPearson functi...
[mothur.git] / sequence.cpp
index 78fa266105c6be21ce5442250ef8c00f389ab078..090ee14c1c838098aaaeb1a831ec01e3272ed512 100644 (file)
@@ -76,10 +76,15 @@ Sequence::Sequence(istringstream& fastaString){
                        
                        while (!fastaString.eof())      {       char c = fastaString.get();  if (c == 10 || c == 13){ break;    }       } // get rest of line if there's any crap there
                        
-                       sequence = getSequenceString(fastaString);              
+                       int numAmbig = 0;
+                       sequence = getSequenceString(fastaString, numAmbig);
+                       
                        setAligned(sequence);   
                        //setUnaligned removes any gap characters for us                                                
-                       setUnaligned(sequence);         
+                       setUnaligned(sequence); 
+                       
+                       if ((numAmbig / (float) numBases) > 0.25) { m->mothurOut("[WARNING]: We found more than 25% of the bases in sequence " + name + " to be ambiguous. Mothur is not setup to process protein sequences."); m->mothurOutEndLine(); }
+               
                }else{ m->mothurOut("Error in reading your fastafile, at position " + toString(fastaString.tellg()) + ". Blank name."); m->mothurOutEndLine(); }
                
        }
@@ -118,10 +123,14 @@ Sequence::Sequence(istringstream& fastaString, string JustUnaligned){
                        
                        while (!fastaString.eof())      {       char c = fastaString.get();  if (c == 10 || c == 13){ break;    }       } // get rest of line if there's any crap there
                        
-                       sequence = getSequenceString(fastaString);              
+                       int numAmbig = 0;
+                       sequence = getSequenceString(fastaString, numAmbig);
                        
                        //setUnaligned removes any gap characters for us                                                
-                       setUnaligned(sequence);         
+                       setUnaligned(sequence); 
+                       
+                       if ((numAmbig / (float) numBases) > 0.25) { m->mothurOut("[WARNING]: We found more than 25% of the bases in sequence " + name + " to be ambiguous. Mothur is not setup to process protein sequences."); m->mothurOutEndLine(); }
+                       
                }else{ m->mothurOut("Error in reading your fastafile, at position " + toString(fastaString.tellg()) + ". Blank name."); m->mothurOutEndLine(); }
                
        }
@@ -139,7 +148,7 @@ Sequence::Sequence(ifstream& fastaFile){
                m = MothurOut::getInstance();
                initialize();
                fastaFile >> name;
-
+               
                if (name.length() != 0) { 
                
                        name = name.substr(1); 
@@ -163,11 +172,15 @@ Sequence::Sequence(ifstream& fastaFile){
                        //read real sequence
                        while (!fastaFile.eof())        {       char c = fastaFile.get(); if (c == 10 || c == 13){  break;      }       } // get rest of line if there's any crap there
                        
-                       sequence = getSequenceString(fastaFile);                
-       
+                       int numAmbig = 0;
+                       sequence = getSequenceString(fastaFile, numAmbig);
+                       
                        setAligned(sequence);   
                        //setUnaligned removes any gap characters for us                                                
                        setUnaligned(sequence); 
+                       
+                       if ((numAmbig / (float) numBases) > 0.25) { m->mothurOut("[WARNING]: We found more than 25% of the bases in sequence " + name + " to be ambiguous. Mothur is not setup to process protein sequences."); m->mothurOutEndLine(); }
+                       
                }else{ m->mothurOut("Error in reading your fastafile, at position " + toString(fastaFile.tellg()) + ". Blank name."); m->mothurOutEndLine(); }
 
        }
@@ -205,10 +218,14 @@ Sequence::Sequence(ifstream& fastaFile, string JustUnaligned){
                        //read real sequence
                        while (!fastaFile.eof())        {       char c = fastaFile.get(); if (c == 10 || c == 13){       break; }       } // get rest of line if there's any crap there
                        
-                       sequence = getSequenceString(fastaFile);                
+                       int numAmbig = 0;
+                       sequence = getSequenceString(fastaFile, numAmbig);
                        
                        //setUnaligned removes any gap characters for us                                                
                        setUnaligned(sequence); 
+                       
+                       if ((numAmbig / (float) numBases) > 0.25) { m->mothurOut("[WARNING]: We found more than 25% of the bases in sequence " + name + " to be ambiguous. Mothur is not setup to process protein sequences."); m->mothurOutEndLine(); }
+                       
                }else{ m->mothurOut("Error in reading your fastafile, at position " + toString(fastaFile.tellg()) + ". Blank name."); m->mothurOutEndLine(); }
                
        }
@@ -219,10 +236,11 @@ Sequence::Sequence(ifstream& fastaFile, string JustUnaligned){
 }
 
 //********************************************************************************************************************
-string Sequence::getSequenceString(ifstream& fastaFile) {
+string Sequence::getSequenceString(ifstream& fastaFile, int& numAmbig) {
        try {
                char letter;
                string sequence = "";   
+               numAmbig = 0;
                
                while(fastaFile){
                        letter= fastaFile.get();
@@ -233,6 +251,10 @@ string Sequence::getSequenceString(ifstream& fastaFile) {
                        else if(isprint(letter)){
                                letter = toupper(letter);
                                if(letter == 'U'){letter = 'T';}
+                               if(letter != '.' && letter != '-' && letter != 'A' && letter != 'T' && letter != 'G'  && letter != 'C' && letter != 'N'){
+                                       letter = 'N';
+                                       numAmbig++;
+                               }
                                sequence += letter;
                        }
                }
@@ -267,10 +289,11 @@ string Sequence::getCommentString(ifstream& fastaFile) {
        }
 }
 //********************************************************************************************************************
-string Sequence::getSequenceString(istringstream& fastaFile) {
+string Sequence::getSequenceString(istringstream& fastaFile, int& numAmbig) {
        try {
                char letter;
-               string sequence = "";   
+               string sequence = "";
+               numAmbig = 0;
                
                while(!fastaFile.eof()){
                        letter= fastaFile.get();
@@ -282,6 +305,10 @@ string Sequence::getSequenceString(istringstream& fastaFile) {
                        else if(isprint(letter)){
                                letter = toupper(letter);
                                if(letter == 'U'){letter = 'T';}
+                               if(letter != '.' && letter != '-' && letter != 'A' && letter != 'T' && letter != 'G'  && letter != 'C' && letter != 'N'){
+                                       letter = 'N';
+                                       numAmbig++;
+                               }
                                sequence += letter;
                        }
                }
@@ -427,6 +454,13 @@ string Sequence::getAligned(){
        else                            {  return aligned;  }
 }
 
+//********************************************************************************************************************
+
+string Sequence::getInlineSeq(){
+       return name + '\t' + aligned;   
+}
+
+
 //********************************************************************************************************************
 
 string Sequence::getPairwise(){
@@ -514,7 +548,7 @@ int Sequence::getLongHomoPolymer(){
 //********************************************************************************************************************
 
 int Sequence::getStartPos(){
-       if(endPos == -1){
+       if(startPos == -1){
                for(int j = 0; j < alignmentLength; j++) {
                        if(aligned[j] != '.'){
                                startPos = j + 1;
@@ -529,6 +563,17 @@ int Sequence::getStartPos(){
 
 //********************************************************************************************************************
 
+void Sequence::padToPos(int start){
+
+       for(int j = startPos-1; j < start-1; j++) {
+               aligned[j] = '.';
+       }
+       startPos = start;
+
+}
+
+//********************************************************************************************************************
+
 int Sequence::getEndPos(){
        if(endPos == -1){
                for(int j=alignmentLength-1;j>=0;j--){
@@ -545,10 +590,20 @@ int Sequence::getEndPos(){
 
 //********************************************************************************************************************
 
+void Sequence::padFromPos(int end){
+       
+       for(int j = end; j < endPos; j++) {
+               aligned[j] = '.';
+       }
+       endPos = end;
+       
+}
+
+//********************************************************************************************************************
+
 bool Sequence::getIsAligned(){
        return isAligned;
 }
-
 //********************************************************************************************************************
 
 void Sequence::reverseComplement(){
@@ -565,4 +620,16 @@ void Sequence::reverseComplement(){
        aligned = temp;
        
 }
-/**************************************************************************************************/
+
+//********************************************************************************************************************
+
+void Sequence::trim(int length){
+       
+       if(numBases > length){
+               unaligned = unaligned.substr(0,length);
+               numBases = length;
+       }
+       
+}
+
+///**************************************************************************************************/