X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=blobdiff_plain;f=nast.cpp;h=647e0e4f1e17e205c183c78615a966b7a98545cc;hp=0741ccd7ad444a250fa1dec5b23790771142a99c;hb=b206f634aae1b4ce13978d203247fb64757d5482;hpb=510b1cfc25cd79391d6973ca20c5ec25fb1bb3b2 diff --git a/nast.cpp b/nast.cpp index 0741ccd..647e0e4 100644 --- a/nast.cpp +++ b/nast.cpp @@ -22,50 +22,68 @@ /**************************************************************************************************/ Nast::Nast(Alignment* method, Sequence* cand, Sequence* temp) : alignment(method), candidateSeq(cand), templateSeq(temp) { - maxInsertLength = 0; - pairwiseAlignSeqs(); // This is part A in Fig. 2 of DeSantis et al. - regapSequences(); // This is parts B-F in Fig. 2 of DeSantis et al. + try { + m = MothurOut::getInstance(); + maxInsertLength = 0; + + pairwiseAlignSeqs(); // This is part A in Fig. 2 of DeSantis et al. + regapSequences(); // This is parts B-F in Fig. 2 of DeSantis et al. + + } + catch(exception& e) { + m->errorOut(e, "Nast", "Nast"); + exit(1); + } } /**************************************************************************************************/ void Nast::pairwiseAlignSeqs(){ // Here we call one of the pairwise alignment methods to align our unaligned candidate // and template sequences - alignment->align(candidateSeq->getUnaligned(), templateSeq->getUnaligned()); + try { + alignment->align(candidateSeq->getUnaligned(), templateSeq->getUnaligned()); + + string candAln = alignment->getSeqAAln(); + string tempAln = alignment->getSeqBAln(); - string candAln = alignment->getSeqAAln(); - string tempAln = alignment->getSeqBAln(); + if(candAln == ""){ - if(candAln == ""){ - candidateSeq->setPairwise(""); - templateSeq->setPairwise(templateSeq->getUnaligned()); - } - else{ - if(tempAln[0] == '-'){ - int pairwiseAlignmentLength = tempAln.length(); // we need to make sure that the candidate sequence alignment - for(int i=0;isetPairwise(""); + templateSeq->setPairwise(templateSeq->getUnaligned()); + + } + else{ + if(tempAln[0] == '-'){ + int pairwiseAlignmentLength = tempAln.length(); // we need to make sure that the candidate sequence alignment + for(int i=0;i=0; i--){// ends where the template sequence alignment ends, if it runs - if(isalpha(tempAln[i])){ // long, we nuke the end of the candidate sequence - candAln = candAln.substr(0,i+1); - tempAln = tempAln.substr(0,i+1); - break; - } + int pairwiseAlignmentLength = tempAln.length(); + if(tempAln[pairwiseAlignmentLength-1] == '-'){ // we need to make sure that the candidate sequence alignment + for(int i=pairwiseAlignmentLength-1; i>=0; i--){// ends where the template sequence alignment ends, if it runs + if(isalpha(tempAln[i])){ // long, we nuke the end of the candidate sequence + candAln = candAln.substr(0,i+1); + tempAln = tempAln.substr(0,i+1); + break; + } + } } + } + + candidateSeq->setPairwise(candAln); // set the pairwise sequences in the Sequence objects for + templateSeq->setPairwise(tempAln); // the candidate and template sequences + } - candidateSeq->setPairwise(candAln); // set the pairwise sequences in the Sequence objects for - templateSeq->setPairwise(tempAln); // the candidate and template sequences - + catch(exception& e) { + m->errorOut(e, "Nast", "pairwiseAlignSeqs"); + exit(1); + } } /**************************************************************************************************/ @@ -73,274 +91,313 @@ void Nast::pairwiseAlignSeqs(){ // Here we call one of the pairwise alignment me void Nast::removeExtraGaps(string& candAln, string tempAln, string newTemplateAlign){ // here we do steps C-F of Fig. 2 from DeSantis et al. + try { + + int longAlignmentLength = newTemplateAlign.length(); - int longAlignmentLength = newTemplateAlign.length(); - - for(int i=0; i=0;leftIndex--){ // then we've got problems... - if(!isalpha(candAln[leftIndex])){ - leftRoom = 1; //count how far it is to the nearest gap on the LEFT side of the anomaly - while(leftIndex-leftRoom>=0 && !isalpha(candAln[leftIndex-leftRoom])) { leftRoom++; } - break; + for(int i=0; i0;leftIndex--){ // then we've got problems... + if(!isalpha(candAln[leftIndex])){ + leftRoom = 1; //count how far it is to the nearest gap on the LEFT side of the anomaly + while(leftIndex-leftRoom>=0 && !isalpha(candAln[leftIndex-leftRoom])) { leftRoom++; } + break; + } } - } - - for(rightIndex=i+1;rightIndex maxInsertLength){ maxInsertLength = insertLength; } - - if((leftRoom + rightRoom) >= insertLength){ - // Parts D & E from Fig. 2 of DeSantis et al. - if((i-leftIndex) <= (rightIndex-i)){ // the left gap is closer - > move stuff left there's - if(leftRoom >= insertLength){ // enough room to the left to move - string leftTemplateString = newTemplateAlign.substr(0,i); - string rightTemplateString = newTemplateAlign.substr(i+insertLength); - newTemplateAlign = leftTemplateString + rightTemplateString; - longAlignmentLength = newTemplateAlign.length(); - - string leftCandidateString = candAln.substr(0,leftIndex-insertLength+1); - string rightCandidateString = candAln.substr(leftIndex+1); - candAln = leftCandidateString + rightCandidateString; - } - else{ // not enough room to the left, have to steal some space to - string leftTemplateString = newTemplateAlign.substr(0,i); // the right - string rightTemplateString = newTemplateAlign.substr(i+insertLength); - newTemplateAlign = leftTemplateString + rightTemplateString; - longAlignmentLength = newTemplateAlign.length(); - - string leftCandidateString = candAln.substr(0,leftIndex-leftRoom+1); - string insertString = candAln.substr(leftIndex+1,rightIndex-leftIndex-1); - string rightCandidateString = candAln.substr(rightIndex+(insertLength-leftRoom)); - candAln = leftCandidateString + insertString + rightCandidateString; + for(rightIndex=i+1;rightIndex move stuff right there's - if(rightRoom >= insertLength){ // enough room to the right to move - string leftTemplateString = newTemplateAlign.substr(0,i); - string rightTemplateString = newTemplateAlign.substr(i+insertLength); - newTemplateAlign = leftTemplateString + rightTemplateString; - longAlignmentLength = newTemplateAlign.length(); - - string leftCandidateString = candAln.substr(0,rightIndex); - string rightCandidateString = candAln.substr(rightIndex+insertLength); - candAln = leftCandidateString + rightCandidateString; - - } - else{ // not enough room to the right, have to steal some - // space to the left lets move left and then right... - string leftTemplateString = newTemplateAlign.substr(0,i); - string rightTemplateString = newTemplateAlign.substr(i+insertLength); - newTemplateAlign = leftTemplateString + rightTemplateString; - longAlignmentLength = newTemplateAlign.length(); - - string leftCandidateString = candAln.substr(0,leftIndex-(insertLength-rightRoom)+1); - string insertString = candAln.substr(leftIndex+1,rightIndex-leftIndex-1); - string rightCandidateString = candAln.substr(rightIndex+rightRoom); - candAln = leftCandidateString + insertString + rightCandidateString; + + int insertLength = 0; // figure out how long the anomaly is + while(!isalpha(newTemplateAlign[i + insertLength])) { insertLength++; } + if(insertLength > maxInsertLength){ maxInsertLength = insertLength; } + + if((leftRoom + rightRoom) >= insertLength){ + + // Parts D & E from Fig. 2 of DeSantis et al. + if((i-leftIndex) <= (rightIndex-i)){ // the left gap is closer - > move stuff left there's + + if(leftRoom >= insertLength){ // enough room to the left to move + //cout << "lr newTemplateAlign = " << newTemplateAlign.length() << '\t' << i << '\t' << insertLength << endl; + string leftTemplateString = newTemplateAlign.substr(0,i); + string rightTemplateString = newTemplateAlign.substr((i+insertLength)); + newTemplateAlign = leftTemplateString + rightTemplateString; + longAlignmentLength = newTemplateAlign.length(); + //cout << "lr candAln = " << candAln.length() << '\t' << leftIndex << '\t' << endl; + string leftCandidateString = candAln.substr(0,(leftIndex-insertLength+1)); + string rightCandidateString = candAln.substr((leftIndex+1)); + candAln = leftCandidateString + rightCandidateString; + + }else{ // not enough room to the left, have to steal some space to the right + //cout << "in else lr newTemplateAlign = " << newTemplateAlign.length() << '\t' << i << '\t' << insertLength << endl; + string leftTemplateString = newTemplateAlign.substr(0,i); + string rightTemplateString = newTemplateAlign.substr((i+insertLength)); + newTemplateAlign = leftTemplateString + rightTemplateString; + longAlignmentLength = newTemplateAlign.length(); + //cout << " in else lr candAln = " << candAln.length() << '\t' << " leftIndex = " << leftIndex << " leftroom = " << leftRoom << " rightIndex = " << rightIndex << '\t' << " rightroom = " << rightRoom << '\t' << endl; + string leftCandidateString = candAln.substr(0,(leftIndex-leftRoom+1)); + string insertString = candAln.substr((leftIndex+1),(rightIndex-leftIndex-1)); + string rightCandidateString = candAln.substr((rightIndex+(insertLength-leftRoom))); + candAln = leftCandidateString + insertString + rightCandidateString; + + } + }else{ // the right gap is closer - > move stuff right there's + if(rightRoom >= insertLength){ // enough room to the right to move + //cout << "rr newTemplateAlign = " << newTemplateAlign.length() << '\t' << i << '\t' << i+insertLength << endl; + string leftTemplateString = newTemplateAlign.substr(0,i); + string rightTemplateString = newTemplateAlign.substr((i+insertLength)); + newTemplateAlign = leftTemplateString + rightTemplateString; + longAlignmentLength = newTemplateAlign.length(); + //cout << "rr candAln = " << candAln.length() << '\t' << i << '\t' << rightIndex << '\t' << rightIndex+insertLength << endl; + string leftCandidateString = candAln.substr(0,rightIndex); + string rightCandidateString = candAln.substr((rightIndex+insertLength)); + candAln = leftCandidateString + rightCandidateString; + + } + else{ // not enough room to the right, have to steal some + // space to the left lets move left and then right... + //cout << "in else rr newTemplateAlign = " << newTemplateAlign.length() << '\t' << i << '\t' << i+insertLength << endl; + string leftTemplateString = newTemplateAlign.substr(0,i); + string rightTemplateString = newTemplateAlign.substr((i+insertLength)); + newTemplateAlign = leftTemplateString + rightTemplateString; + longAlignmentLength = newTemplateAlign.length(); + //cout << "in else rr candAln = " << candAln.length() << '\t' << '\t' << (leftIndex-(insertLength-rightRoom)+1) << '\t' << (leftIndex+1,rightIndex-leftIndex-1) << '\t' << (rightIndex+rightRoom) << endl; + string leftCandidateString = candAln.substr(0,(leftIndex-(insertLength-rightRoom)+1)); + string insertString = candAln.substr((leftIndex+1),(rightIndex-leftIndex-1)); + string rightCandidateString = candAln.substr((rightIndex+rightRoom)); + candAln = leftCandidateString + insertString + rightCandidateString; + + } } + + if ((i - insertLength) < 0) { i = 0; } + else { i -= insertLength; } + + } + else{ + // there could be a case where there isn't enough room in either direction to move stuff +//cout << "in else else newTemplateAlign = " << newTemplateAlign.length() << '\t' << i << '\t' << (i+leftRoom+rightRoom) << endl; + string leftTemplateString = newTemplateAlign.substr(0,i); + string rightTemplateString = newTemplateAlign.substr((i+leftRoom+rightRoom)); + newTemplateAlign = leftTemplateString + rightTemplateString; + longAlignmentLength = newTemplateAlign.length(); + + //cout << "in else else newTemplateAlign = " << candAln.length() << '\t' << (leftIndex-leftRoom+1) << '\t' << (leftIndex+1) << '\t' << (rightIndex-leftIndex-1) << '\t' << (rightIndex+rightRoom) << endl; + string leftCandidateString = candAln.substr(0,(leftIndex-leftRoom+1)); + string insertString = candAln.substr((leftIndex+1),(rightIndex-leftIndex-1)); + string rightCandidateString = candAln.substr((rightIndex+rightRoom)); + candAln = leftCandidateString + insertString + rightCandidateString; + + i -= (leftRoom + rightRoom); } - } - else{ // there could be a case where there isn't enough room in - string leftTemplateString = newTemplateAlign.substr(0,i); // either direction to move stuff - string rightTemplateString = newTemplateAlign.substr(i+leftRoom+rightRoom); - newTemplateAlign = leftTemplateString + rightTemplateString; - longAlignmentLength = newTemplateAlign.length(); - - string leftCandidateString = candAln.substr(0,leftIndex-leftRoom+1); - string insertString = candAln.substr(leftIndex+1,rightIndex-leftIndex-1); - string rightCandidateString = candAln.substr(rightIndex+rightRoom); - candAln = leftCandidateString + insertString + rightCandidateString; - } - i -= insertLength; - } +// i -= insertLength; + + //if i is negative, we want to remove the extra gaps to the right + if (i < 0) { m->mothurOut("i is negative"); m->mothurOutEndLine(); } + } + } } - + catch(exception& e) { + m->errorOut(e, "Nast", "removeExtraGaps"); + exit(1); + } } /**************************************************************************************************/ void Nast::regapSequences(){ //This is essentially part B in Fig 2. of DeSantis et al. - - string candPair = candidateSeq->getPairwise(); - string candAln = ""; - - string tempPair = templateSeq->getPairwise(); - string tempAln = templateSeq->getAligned(); // we use the template aligned sequence as our guide - - int pairwiseLength = candPair.length(); - int fullAlignLength = tempAln.length(); - - if(candPair == ""){ - for(int i=0;isetAligned(candAln); - return; - } - - int fullAlignIndex = 0; - int pairwiseAlignIndex = 0; - string newTemplateAlign = ""; // this is going to be messy so we want a temporary template - // alignment string - while(tempAln[fullAlignIndex] == '.' || tempAln[fullAlignIndex] == '-'){ - candAln += '.'; // add the initial '-' and '.' to the candidate and template - newTemplateAlign += tempAln[fullAlignIndex];// pairwise sequences - fullAlignIndex++; - } - - string lastLoop = ""; - - while(pairwiseAlignIndexgetName() << endl; + string candPair = candidateSeq->getPairwise(); + string candAln = ""; + + string tempPair = templateSeq->getPairwise(); + string tempAln = templateSeq->getAligned(); // we use the template aligned sequence as our guide + + int pairwiseLength = candPair.length(); + int fullAlignLength = tempAln.length(); + + if(candPair == ""){ + for(int i=0;isetAligned(candAln); + return; } - else if(isalpha(tempPair[pairwiseAlignIndex]) && !isalpha(tempAln[fullAlignIndex]) - && isalpha(candPair[pairwiseAlignIndex])){ - // the template pairwise and candidate pairwise are characters and the template aligned is a gap - // need to insert gaps into the candidateSeq.aligned sequence - - candAln += '-'; - newTemplateAlign += '-';// + + int fullAlignIndex = 0; + int pairwiseAlignIndex = 0; + string newTemplateAlign = ""; // this is going to be messy so we want a temporary template + // alignment string + while(tempAln[fullAlignIndex] == '.' || tempAln[fullAlignIndex] == '-'){ + candAln += '.'; // add the initial '-' and '.' to the candidate and template + newTemplateAlign += tempAln[fullAlignIndex];// pairwise sequences fullAlignIndex++; } - else if(!isalpha(tempPair[pairwiseAlignIndex]) && isalpha(tempAln[fullAlignIndex]) - && isalpha(candPair[pairwiseAlignIndex])){ - // the template pairwise is a gap and the template aligned and pairwise sequences have characters - // this is the alpha scenario. add character to the candidateSeq.aligned sequence without progressing - // further through the tempAln sequence. - - candAln += candPair[pairwiseAlignIndex]; - newTemplateAlign += '-';// - pairwiseAlignIndex++; - } - else if(isalpha(tempPair[pairwiseAlignIndex]) && isalpha(tempAln[fullAlignIndex]) - && !isalpha(candPair[pairwiseAlignIndex])){ - // the template pairwise and full alignment are characters and the candidate sequence has a gap - // should not be a big deal, just add the gap position to the candidateSeq.aligned sequence; - - candAln += candPair[pairwiseAlignIndex]; - newTemplateAlign += tempAln[fullAlignIndex];// - fullAlignIndex++; - pairwiseAlignIndex++; + + string lastLoop = ""; + + while(pairwiseAlignIndexseems to be the opposite of the alpha scenario + + candAln += candPair[pairwiseAlignIndex]; + newTemplateAlign += tempAln[fullAlignIndex];// + pairwiseAlignIndex++; + fullAlignIndex++; + } + else if(isalpha(tempPair[pairwiseAlignIndex]) && !isalpha(tempAln[fullAlignIndex]) + && !isalpha(candPair[pairwiseAlignIndex])){ + // template pairwise has a character, but its full aligned sequence and candidate sequence have gaps + // this would happen like we need to add a gap. basically the opposite of the alpha situation + + newTemplateAlign += tempAln[fullAlignIndex];// + candAln += "-"; + fullAlignIndex++; + } + else if(!isalpha(tempPair[pairwiseAlignIndex]) && isalpha(tempAln[fullAlignIndex]) + && !isalpha(candPair[pairwiseAlignIndex])){ + // template and candidate pairwise are gaps and the template aligned is not a gap this should not be possible + // would skip the gaps and not progress through full alignment sequence + // not tested yet + + m->mothurOut("We're into D " + toString(fullAlignIndex) + " " + toString(pairwiseAlignIndex)); m->mothurOutEndLine(); + pairwiseAlignIndex++; + } + else{ + // everything has a gap - not possible + // not tested yet + + m->mothurOut("We're into F " + toString(fullAlignIndex) + " " + toString(pairwiseAlignIndex)); m->mothurOutEndLine(); + pairwiseAlignIndex++; + fullAlignIndex++; + } } - else if(!isalpha(tempPair[pairwiseAlignIndex]) && !isalpha(tempAln[fullAlignIndex]) - && isalpha(candPair[pairwiseAlignIndex])){ - // the template pairwise and aligned are gaps while the candidate pairwise has a character - // this would be an insertion, go ahead and add the character->seems to be the opposite of the alpha scenario - - candAln += candPair[pairwiseAlignIndex]; - newTemplateAlign += tempAln[fullAlignIndex];// - pairwiseAlignIndex++; - fullAlignIndex++; + + for(int i=fullAlignIndex;i=0;i--){ // ditto. + if(candAln[i] == 'Z' || !isalnum(candAln[i])) { candAln[i] = '.'; } + else{ end = i; break; } } - else if(!isalpha(tempPair[pairwiseAlignIndex]) && isalpha(tempAln[fullAlignIndex]) - && !isalpha(candPair[pairwiseAlignIndex])){ - // template and candidate pairwise are gaps and the template aligned is not a gap this should not be possible - // would skip the gaps and not progress through full alignment sequence - // not tested yet - - mothurOut("We're into D " + toString(fullAlignIndex) + " " + toString(pairwiseAlignIndex)); mothurOutEndLine(); - pairwiseAlignIndex++; + + for(int i=start;i<=end;i++){ // go through the candidate alignment sequence and make sure that + candAln[i] = toupper(candAln[i]); // everything is upper case } - else{ - // everything has a gap - not possible - // not tested yet - - mothurOut("We're into F " + toString(fullAlignIndex) + " " + toString(pairwiseAlignIndex)); mothurOutEndLine(); - pairwiseAlignIndex++; - fullAlignIndex++; - } - } - - for(int i=fullAlignIndex;i=0;i--){ // ditto. - if(candAln[i] == 'Z' || !isalnum(candAln[i])) { candAln[i] = '.'; } - else{ end = i; break; } - } - - for(int i=start;i<=end;i++){ // go through the candidate alignment sequence and make sure that - candAln[i] = toupper(candAln[i]); // everything is upper case - } + + if(candAln.length() != tempAln.length()){ // if the regapped candidate sequence is longer than the official + removeExtraGaps(candAln, tempAln, newTemplateAlign);// template alignment then we need to do steps C-F in Fig. + } // 2 of Desantis et al. - if(candAln.length() != tempAln.length()){ // if the regapped candidate sequence is longer than the official - removeExtraGaps(candAln, tempAln, newTemplateAlign);// template alignment then we need to do steps C-F in Fig. - } // 2 of Desantis et al. - - - candidateSeq->setAligned(candAln); + candidateSeq->setAligned(candAln); + //cout << "here" << endl; + } + catch(exception& e) { + m->errorOut(e, "Nast", "regapSequences"); + exit(1); + } } /**************************************************************************************************/ float Nast::getSimilarityScore(){ - - string cand = candidateSeq->getAligned(); - string temp = templateSeq->getAligned(); - int alignmentLength = temp.length(); - int mismatch = 0; - int denominator = 0; + try { - for(int i=0;igetAligned(); + string temp = templateSeq->getAligned(); + int alignmentLength = temp.length(); + int mismatch = 0; + int denominator = 0; + + for(int i=0;ierrorOut(e, "Nast", "getSimilarityScore"); + exit(1); + } } /**************************************************************************************************/