]> git.donarmstrong.com Git - mothur.git/blobdiff - nast.cpp
working on pam
[mothur.git] / nast.cpp
index 4aed5903aec4985f92d5668d39301cabe3d779e6..647e0e4f1e17e205c183c78615a966b7a98545cc 100644 (file)
--- a/nast.cpp
+++ b/nast.cpp
 
 Nast::Nast(Alignment* method, Sequence* cand, Sequence* temp) : alignment(method), candidateSeq(cand), templateSeq(temp) {
        try {
+               m = MothurOut::getInstance();
                maxInsertLength = 0;
+       
                pairwiseAlignSeqs();    //      This is part A in Fig. 2 of DeSantis et al.
                regapSequences();               //      This is parts B-F in Fig. 2 of DeSantis et al.
-
+               
        }
        catch(exception& e) {
-               errorOut(e, "Nast", "Nast");
+               m->errorOut(e, "Nast", "Nast");
                exit(1);
        }
-
 }
 
 /**************************************************************************************************/
 
 void Nast::pairwiseAlignSeqs(){        //      Here we call one of the pairwise alignment methods to align our unaligned candidate
                                                                //      and template sequences
-       try {
-               
+       try {   
                alignment->align(candidateSeq->getUnaligned(), templateSeq->getUnaligned());
-       
+
                string candAln = alignment->getSeqAAln();
                string tempAln = alignment->getSeqBAln();
-
+       
                if(candAln == ""){
 
                        candidateSeq->setPairwise("");
@@ -53,7 +53,6 @@ void Nast::pairwiseAlignSeqs(){       //      Here we call one of the pairwise alignment me
 
                }
                else{
-
                        if(tempAln[0] == '-'){
                                int pairwiseAlignmentLength = tempAln.length(); //      we need to make sure that the candidate sequence alignment
                                for(int i=0;i<pairwiseAlignmentLength;i++){             //      starts where the template sequence alignment starts, if it
@@ -64,7 +63,6 @@ void Nast::pairwiseAlignSeqs(){       //      Here we call one of the pairwise alignment me
                                        }
                                }
                        }
-                       
                        int pairwiseAlignmentLength = tempAln.length();
                        if(tempAln[pairwiseAlignmentLength-1] == '-'){          //      we need to make sure that the candidate sequence alignment
                                for(int i=pairwiseAlignmentLength-1; i>=0; i--){//      ends where the template sequence alignment ends, if it runs
@@ -83,7 +81,7 @@ void Nast::pairwiseAlignSeqs(){       //      Here we call one of the pairwise alignment me
 
        }
        catch(exception& e) {
-               errorOut(e, "Nast", "pairwiseAlignSeqs");
+               m->errorOut(e, "Nast", "pairwiseAlignSeqs");
                exit(1);
        }       
 }
@@ -94,27 +92,26 @@ void Nast::removeExtraGaps(string& candAln, string tempAln, string newTemplateAl
 
 //     here we do steps C-F of Fig. 2 from DeSantis et al.
        try {
-       
-               int longAlignmentLength = newTemplateAlign.length();    
                
+               int longAlignmentLength = newTemplateAlign.length();    
+       
                for(int i=0; i<longAlignmentLength; i++){                               //      use the long alignment as the standard
                        int rightIndex, rightRoom, leftIndex, leftRoom;
-                       
+       
                        //      Part C of Fig. 2 from DeSantis et al.
                        if((isalpha(newTemplateAlign[i]) != isalpha(tempAln[i]))){      //if there is a discrepancy between the regapped
                                
                                rightRoom = 0; leftRoom = 0;
                                
                                //      Part D of Fig. 2 from DeSantis et al.           //      template sequence and the official template sequence
-                               for(leftIndex=i-1;leftIndex>0;leftIndex--){     //      then we've got problems...
+                               for(leftIndex=i-1;leftIndex>0;leftIndex--){             //      then we've got problems...
                                        if(!isalpha(candAln[leftIndex])){
                                                leftRoom = 1;   //count how far it is to the nearest gap on the LEFT side of the anomaly
                                                while(leftIndex-leftRoom>=0 && !isalpha(candAln[leftIndex-leftRoom]))   {       leftRoom++;             }
                                                break;
                                        }
                                }
-                               
-                               
+
                                for(rightIndex=i+1;rightIndex<longAlignmentLength-1;rightIndex++){
                                        if(!isalpha(candAln[rightIndex])){
                                                rightRoom = 1;  //count how far it is to the nearest gap on the RIGHT side of the anomaly
@@ -122,99 +119,109 @@ void Nast::removeExtraGaps(string& candAln, string tempAln, string newTemplateAl
                                                break;
                                        }
                                }
-                                       
+                                                               
                                int insertLength = 0;                                                   //      figure out how long the anomaly is
                                while(!isalpha(newTemplateAlign[i + insertLength]))     {       insertLength++; }
                                if(insertLength > maxInsertLength){     maxInsertLength = insertLength; }
-                                       
+               
                                if((leftRoom + rightRoom) >= insertLength){
        
                                        //      Parts D & E from Fig. 2 of DeSantis et al.
                                        if((i-leftIndex) <= (rightIndex-i)){            //      the left gap is closer - > move stuff left there's
        
                                                if(leftRoom >= insertLength){                   //      enough room to the left to move
-       
+                       //cout << "lr newTemplateAlign = " << newTemplateAlign.length() << '\t' << i << '\t' << insertLength << endl;
                                                        string leftTemplateString = newTemplateAlign.substr(0,i);
-                                                       string rightTemplateString = newTemplateAlign.substr(i+insertLength);
+                                                       string rightTemplateString = newTemplateAlign.substr((i+insertLength));
                                                        newTemplateAlign = leftTemplateString + rightTemplateString;
                                                        longAlignmentLength = newTemplateAlign.length();
-                                                       
-                                                       string leftCandidateString = candAln.substr(0,leftIndex-insertLength+1);
-                                                       string rightCandidateString = candAln.substr(leftIndex+1);
+                       //cout << "lr candAln = " << candAln.length() << '\t' << leftIndex << '\t'  << endl;                            
+                                                       string leftCandidateString = candAln.substr(0,(leftIndex-insertLength+1));
+                                                       string rightCandidateString = candAln.substr((leftIndex+1));
                                                        candAln = leftCandidateString + rightCandidateString;
-               
-                                               }
-                                               else{                                                                   //      not enough room to the left, have to steal some space to
-                                                       string leftTemplateString = newTemplateAlign.substr(0,i);       //      the right
-                                                       string rightTemplateString = newTemplateAlign.substr(i+insertLength);
+                                                       
+                                               }else{                                                                  //      not enough room to the left, have to steal some space to the right
+                                               
+                       //cout << "in else lr newTemplateAlign = " << newTemplateAlign.length() << '\t' << i << '\t' << insertLength << endl;
+                                                       string leftTemplateString = newTemplateAlign.substr(0,i);       
+                                                       string rightTemplateString = newTemplateAlign.substr((i+insertLength));
                                                        newTemplateAlign = leftTemplateString + rightTemplateString;
                                                        longAlignmentLength = newTemplateAlign.length();
-                                                       
-                                                       string leftCandidateString = candAln.substr(0,leftIndex-leftRoom+1);
-                                                       string insertString = candAln.substr(leftIndex+1,rightIndex-leftIndex-1);
-                                                       string rightCandidateString = candAln.substr(rightIndex+(insertLength-leftRoom));
+                       //cout << " in else lr candAln = " << candAln.length() << '\t' << " leftIndex = " << leftIndex << " leftroom = " << leftRoom << " rightIndex = " << rightIndex << '\t' << " rightroom = " << rightRoom << '\t' << endl;                                 
+                                                       string leftCandidateString = candAln.substr(0,(leftIndex-leftRoom+1));
+                                                       string insertString = candAln.substr((leftIndex+1),(rightIndex-leftIndex-1));
+                                                       string rightCandidateString = candAln.substr((rightIndex+(insertLength-leftRoom)));
                                                        candAln = leftCandidateString + insertString + rightCandidateString;
                                
                                                }
-                                       }
-                                       else{                                                                           //      the right gap is closer - > move stuff right there's
+                                       }else{                                                                          //      the right gap is closer - > move stuff right there's
                                                if(rightRoom >= insertLength){                  //      enough room to the right to move
-                       
+                       //cout << "rr newTemplateAlign = " << newTemplateAlign.length() << '\t' << i << '\t' << i+insertLength << endl;
                                                        string leftTemplateString = newTemplateAlign.substr(0,i);
-                                                       string rightTemplateString = newTemplateAlign.substr(i+insertLength);
+                                                       string rightTemplateString = newTemplateAlign.substr((i+insertLength));
                                                        newTemplateAlign = leftTemplateString + rightTemplateString;
                                                        longAlignmentLength = newTemplateAlign.length();
-                                                       
+                       //cout << "rr candAln = " << candAln.length() << '\t' << i << '\t' << rightIndex << '\t' << rightIndex+insertLength << endl;                            
                                                        string leftCandidateString = candAln.substr(0,rightIndex);
-                                                       string rightCandidateString = candAln.substr(rightIndex+insertLength);
+                                                       string rightCandidateString = candAln.substr((rightIndex+insertLength));
                                                        candAln = leftCandidateString + rightCandidateString;   
                                                                        
                                                }
-                                               else{                                                                   //      not enough room to the right, have to steal some 
+                                               else{                                                                   //      not enough room to the right, have to steal some        
                                                        //      space to the left lets move left and then right...
+                                       //cout << "in else rr newTemplateAlign = " << newTemplateAlign.length() << '\t' << i << '\t' << i+insertLength << endl;
                                                        string leftTemplateString = newTemplateAlign.substr(0,i);
-                                                       string rightTemplateString = newTemplateAlign.substr(i+insertLength);
+                                                       string rightTemplateString = newTemplateAlign.substr((i+insertLength));
                                                        newTemplateAlign = leftTemplateString + rightTemplateString;
                                                        longAlignmentLength = newTemplateAlign.length();
-                                                                       
-                                                       string leftCandidateString = candAln.substr(0,leftIndex-(insertLength-rightRoom)+1);
-                                                       string insertString = candAln.substr(leftIndex+1,rightIndex-leftIndex-1);
-                                                       string rightCandidateString = candAln.substr(rightIndex+rightRoom);
+                                       //cout << "in else rr candAln = " << candAln.length() << '\t' << '\t' << (leftIndex-(insertLength-rightRoom)+1) << '\t' <<  (leftIndex+1,rightIndex-leftIndex-1) << '\t' << (rightIndex+rightRoom) << endl;                             
+                                                       string leftCandidateString = candAln.substr(0,(leftIndex-(insertLength-rightRoom)+1));
+                                                       string insertString = candAln.substr((leftIndex+1),(rightIndex-leftIndex-1));
+                                                       string rightCandidateString = candAln.substr((rightIndex+rightRoom));
                                                        candAln = leftCandidateString + insertString + rightCandidateString;    
                                                                        
                                                }
                                        }
+                                       
+                                       if ((i - insertLength) < 0) {  i = 0; }
+                                       else { i -= insertLength; }
+
                                }
                                else{
-                                                                                                       //      there could be a case where there isn't enough room in
-                                       string leftTemplateString = newTemplateAlign.substr(0,i);                       //      either direction to move stuff
-                                       string rightTemplateString = newTemplateAlign.substr(i+leftRoom+rightRoom);
+                       //      there could be a case where there isn't enough room in either direction to move stuff
+//cout << "in else else newTemplateAlign = " << newTemplateAlign.length() << '\t' << i << '\t' << (i+leftRoom+rightRoom) << endl;
+                                       string leftTemplateString = newTemplateAlign.substr(0,i);       
+                                       string rightTemplateString = newTemplateAlign.substr((i+leftRoom+rightRoom));
                                        newTemplateAlign = leftTemplateString + rightTemplateString;
                                        longAlignmentLength = newTemplateAlign.length();
                                                        
-                                       string leftCandidateString = candAln.substr(0,leftIndex-leftRoom+1);
-                                       string insertString = candAln.substr(leftIndex+1,rightIndex-leftIndex-1);
-                                       string rightCandidateString = candAln.substr(rightIndex+rightRoom);
+               //cout << "in else else newTemplateAlign = " << candAln.length() << '\t' << (leftIndex-leftRoom+1) << '\t' << (leftIndex+1) << '\t' << (rightIndex-leftIndex-1) << '\t' << (rightIndex+rightRoom) << endl;      
+                                       string leftCandidateString = candAln.substr(0,(leftIndex-leftRoom+1));
+                                       string insertString = candAln.substr((leftIndex+1),(rightIndex-leftIndex-1));
+                                       string rightCandidateString = candAln.substr((rightIndex+rightRoom));
                                        candAln = leftCandidateString + insertString + rightCandidateString;
-       
+                                       
+                                       i -= (leftRoom + rightRoom);
                                }
+                       
+//                             i -= insertLength;
                                
-                               i -= insertLength;
+                               //if i is negative, we want to remove the extra gaps to the right
+                               if (i < 0) { m->mothurOut("i is negative"); m->mothurOutEndLine(); } 
                        } 
                }
        }
        catch(exception& e) {
-               errorOut(e, "Nast", "removeExtraGaps");
+               m->errorOut(e, "Nast", "removeExtraGaps");
                exit(1);
        }       
-       
 }
 
 /**************************************************************************************************/
 
 void Nast::regapSequences(){   //This is essentially part B in Fig 2. of DeSantis et al.
        try { 
-       
+       //cout << candidateSeq->getName() << endl;
                string candPair = candidateSeq->getPairwise();
                string candAln = "";
                
@@ -229,7 +236,7 @@ void Nast::regapSequences(){        //This is essentially part B in Fig 2. of DeSantis
                        candidateSeq->setAligned(candAln);
                        return;
                }
-               
+       
                int fullAlignIndex = 0;
                int pairwiseAlignIndex = 0;
                string newTemplateAlign = "";                                   //      this is going to be messy so we want a temporary template
@@ -239,10 +246,11 @@ void Nast::regapSequences(){      //This is essentially part B in Fig 2. of DeSantis
                        newTemplateAlign += tempAln[fullAlignIndex];//  pairwise sequences
                        fullAlignIndex++;
                }
-               
+
                string lastLoop = "";
                
                while(pairwiseAlignIndex<pairwiseLength){
+       //cout << pairwiseAlignIndex << '\t' << fullAlignIndex << '\t' << pairwiseLength << endl;
                        if(isalpha(tempPair[pairwiseAlignIndex]) && isalpha(tempAln[fullAlignIndex])
                           && isalpha(candPair[pairwiseAlignIndex])){
                                //  the template and candidate pairwise and template aligned have characters
@@ -308,14 +316,14 @@ void Nast::regapSequences(){      //This is essentially part B in Fig 2. of DeSantis
                                //      would skip the gaps and not progress through full alignment sequence
                                //      not tested yet
                                
-                               mothurOut("We're into D " + toString(fullAlignIndex) + " " +  toString(pairwiseAlignIndex)); mothurOutEndLine();
+                               m->mothurOut("We're into D " + toString(fullAlignIndex) + " " +  toString(pairwiseAlignIndex)); m->mothurOutEndLine();
                                pairwiseAlignIndex++;
                        }
                        else{
                                //      everything has a gap - not possible
                                //      not tested yet
                                
-                               mothurOut("We're into F " +  toString(fullAlignIndex) + " " +  toString(pairwiseAlignIndex)); mothurOutEndLine();
+                               m->mothurOut("We're into F " +  toString(fullAlignIndex) + " " +  toString(pairwiseAlignIndex)); m->mothurOutEndLine();
                                pairwiseAlignIndex++;
                                fullAlignIndex++;                       
                        }               
@@ -326,7 +334,9 @@ void Nast::regapSequences(){        //This is essentially part B in Fig 2. of DeSantis
                        newTemplateAlign += tempAln[i];//
                }
                
-               int start, end;
+               int start = 0;
+               int end = candAln.length()-1;
+
                for(int i=0;i<candAln.length();i++){
                        if(candAln[i] == 'Z' || !isalnum(candAln[i]))   {       candAln[i] = '.';       }       //      if we padded the alignemnt from
                        else{                   start = i;                      break;          }                                                       //      blast with Z's, change them to
@@ -341,16 +351,16 @@ void Nast::regapSequences(){      //This is essentially part B in Fig 2. of DeSantis
                        candAln[i] = toupper(candAln[i]);                       //      everything is upper case
                }
                
-               
+
                if(candAln.length() != tempAln.length()){               //      if the regapped candidate sequence is longer than the official
                        removeExtraGaps(candAln, tempAln, newTemplateAlign);//  template alignment then we need to do steps C-F in Fig.
                }                                                                                               //      2 of Desantis et al.
-               
-               
+
                candidateSeq->setAligned(candAln);
+       //cout << "here" << endl;
        }
        catch(exception& e) {
-               errorOut(e, "Nast", "regapSequences");
+               m->errorOut(e, "Nast", "regapSequences");
                exit(1);
        }       
 }
@@ -385,7 +395,7 @@ float Nast::getSimilarityScore(){
                
        }
        catch(exception& e) {
-               errorOut(e, "Nast", "getSimilarityScore");
+               m->errorOut(e, "Nast", "getSimilarityScore");
                exit(1);
        }       
 }