X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=nast.cpp;fp=nast.cpp;h=0000000000000000000000000000000000000000;hb=4a877efa127e56e81a21f53cfdbbfd3bfbe8c4ff;hp=647e0e4f1e17e205c183c78615a966b7a98545cc;hpb=a6cf29fa4dac0909c7582cb1094151d34093ee76;p=mothur.git diff --git a/nast.cpp b/nast.cpp deleted file mode 100644 index 647e0e4..0000000 --- a/nast.cpp +++ /dev/null @@ -1,411 +0,0 @@ -/* - * nast.cpp - * - * - * Created by Pat Schloss on 12/17/08. - * Copyright 2008 Patrick D. Schloss. All rights reserved. - * - * This is my implementation of the NAST (nearest alignment space termination) algorithm as described in: - * - * DeSantis TZ, Hugenholtz P, Keller K, Brodie EL, Larsen N, Piceno YM, Phan R, & Anderson GL. 2006. NAST: a multiple - * sequence alignment server for comparative analysis of 16S rRNA genes. Nucleic Acids Research. 34:W394-9. - * - * To construct an object one needs to provide a method of getting a pairwise alignment (alignment) and the template - * and candidate sequence that are to be aligned to each other. - * - */ - -#include "sequence.hpp" -#include "alignment.hpp" -#include "nast.hpp" - -/**************************************************************************************************/ - -Nast::Nast(Alignment* method, Sequence* cand, Sequence* temp) : alignment(method), candidateSeq(cand), templateSeq(temp) { - try { - m = MothurOut::getInstance(); - maxInsertLength = 0; - - pairwiseAlignSeqs(); // This is part A in Fig. 2 of DeSantis et al. - regapSequences(); // This is parts B-F in Fig. 2 of DeSantis et al. - - } - catch(exception& e) { - m->errorOut(e, "Nast", "Nast"); - exit(1); - } -} - -/**************************************************************************************************/ - -void Nast::pairwiseAlignSeqs(){ // Here we call one of the pairwise alignment methods to align our unaligned candidate - // and template sequences - try { - alignment->align(candidateSeq->getUnaligned(), templateSeq->getUnaligned()); - - string candAln = alignment->getSeqAAln(); - string tempAln = alignment->getSeqBAln(); - - if(candAln == ""){ - - candidateSeq->setPairwise(""); - templateSeq->setPairwise(templateSeq->getUnaligned()); - - } - else{ - if(tempAln[0] == '-'){ - int pairwiseAlignmentLength = tempAln.length(); // we need to make sure that the candidate sequence alignment - for(int i=0;i=0; i--){// ends where the template sequence alignment ends, if it runs - if(isalpha(tempAln[i])){ // long, we nuke the end of the candidate sequence - candAln = candAln.substr(0,i+1); - tempAln = tempAln.substr(0,i+1); - break; - } - } - } - - } - - candidateSeq->setPairwise(candAln); // set the pairwise sequences in the Sequence objects for - templateSeq->setPairwise(tempAln); // the candidate and template sequences - - } - catch(exception& e) { - m->errorOut(e, "Nast", "pairwiseAlignSeqs"); - exit(1); - } -} - -/**************************************************************************************************/ - -void Nast::removeExtraGaps(string& candAln, string tempAln, string newTemplateAlign){ - -// here we do steps C-F of Fig. 2 from DeSantis et al. - try { - - int longAlignmentLength = newTemplateAlign.length(); - - for(int i=0; i0;leftIndex--){ // then we've got problems... - if(!isalpha(candAln[leftIndex])){ - leftRoom = 1; //count how far it is to the nearest gap on the LEFT side of the anomaly - while(leftIndex-leftRoom>=0 && !isalpha(candAln[leftIndex-leftRoom])) { leftRoom++; } - break; - } - } - - for(rightIndex=i+1;rightIndex maxInsertLength){ maxInsertLength = insertLength; } - - if((leftRoom + rightRoom) >= insertLength){ - - // Parts D & E from Fig. 2 of DeSantis et al. - if((i-leftIndex) <= (rightIndex-i)){ // the left gap is closer - > move stuff left there's - - if(leftRoom >= insertLength){ // enough room to the left to move - //cout << "lr newTemplateAlign = " << newTemplateAlign.length() << '\t' << i << '\t' << insertLength << endl; - string leftTemplateString = newTemplateAlign.substr(0,i); - string rightTemplateString = newTemplateAlign.substr((i+insertLength)); - newTemplateAlign = leftTemplateString + rightTemplateString; - longAlignmentLength = newTemplateAlign.length(); - //cout << "lr candAln = " << candAln.length() << '\t' << leftIndex << '\t' << endl; - string leftCandidateString = candAln.substr(0,(leftIndex-insertLength+1)); - string rightCandidateString = candAln.substr((leftIndex+1)); - candAln = leftCandidateString + rightCandidateString; - - }else{ // not enough room to the left, have to steal some space to the right - - //cout << "in else lr newTemplateAlign = " << newTemplateAlign.length() << '\t' << i << '\t' << insertLength << endl; - string leftTemplateString = newTemplateAlign.substr(0,i); - string rightTemplateString = newTemplateAlign.substr((i+insertLength)); - newTemplateAlign = leftTemplateString + rightTemplateString; - longAlignmentLength = newTemplateAlign.length(); - //cout << " in else lr candAln = " << candAln.length() << '\t' << " leftIndex = " << leftIndex << " leftroom = " << leftRoom << " rightIndex = " << rightIndex << '\t' << " rightroom = " << rightRoom << '\t' << endl; - string leftCandidateString = candAln.substr(0,(leftIndex-leftRoom+1)); - string insertString = candAln.substr((leftIndex+1),(rightIndex-leftIndex-1)); - string rightCandidateString = candAln.substr((rightIndex+(insertLength-leftRoom))); - candAln = leftCandidateString + insertString + rightCandidateString; - - } - }else{ // the right gap is closer - > move stuff right there's - if(rightRoom >= insertLength){ // enough room to the right to move - //cout << "rr newTemplateAlign = " << newTemplateAlign.length() << '\t' << i << '\t' << i+insertLength << endl; - string leftTemplateString = newTemplateAlign.substr(0,i); - string rightTemplateString = newTemplateAlign.substr((i+insertLength)); - newTemplateAlign = leftTemplateString + rightTemplateString; - longAlignmentLength = newTemplateAlign.length(); - //cout << "rr candAln = " << candAln.length() << '\t' << i << '\t' << rightIndex << '\t' << rightIndex+insertLength << endl; - string leftCandidateString = candAln.substr(0,rightIndex); - string rightCandidateString = candAln.substr((rightIndex+insertLength)); - candAln = leftCandidateString + rightCandidateString; - - } - else{ // not enough room to the right, have to steal some - // space to the left lets move left and then right... - //cout << "in else rr newTemplateAlign = " << newTemplateAlign.length() << '\t' << i << '\t' << i+insertLength << endl; - string leftTemplateString = newTemplateAlign.substr(0,i); - string rightTemplateString = newTemplateAlign.substr((i+insertLength)); - newTemplateAlign = leftTemplateString + rightTemplateString; - longAlignmentLength = newTemplateAlign.length(); - //cout << "in else rr candAln = " << candAln.length() << '\t' << '\t' << (leftIndex-(insertLength-rightRoom)+1) << '\t' << (leftIndex+1,rightIndex-leftIndex-1) << '\t' << (rightIndex+rightRoom) << endl; - string leftCandidateString = candAln.substr(0,(leftIndex-(insertLength-rightRoom)+1)); - string insertString = candAln.substr((leftIndex+1),(rightIndex-leftIndex-1)); - string rightCandidateString = candAln.substr((rightIndex+rightRoom)); - candAln = leftCandidateString + insertString + rightCandidateString; - - } - } - - if ((i - insertLength) < 0) { i = 0; } - else { i -= insertLength; } - - } - else{ - // there could be a case where there isn't enough room in either direction to move stuff -//cout << "in else else newTemplateAlign = " << newTemplateAlign.length() << '\t' << i << '\t' << (i+leftRoom+rightRoom) << endl; - string leftTemplateString = newTemplateAlign.substr(0,i); - string rightTemplateString = newTemplateAlign.substr((i+leftRoom+rightRoom)); - newTemplateAlign = leftTemplateString + rightTemplateString; - longAlignmentLength = newTemplateAlign.length(); - - //cout << "in else else newTemplateAlign = " << candAln.length() << '\t' << (leftIndex-leftRoom+1) << '\t' << (leftIndex+1) << '\t' << (rightIndex-leftIndex-1) << '\t' << (rightIndex+rightRoom) << endl; - string leftCandidateString = candAln.substr(0,(leftIndex-leftRoom+1)); - string insertString = candAln.substr((leftIndex+1),(rightIndex-leftIndex-1)); - string rightCandidateString = candAln.substr((rightIndex+rightRoom)); - candAln = leftCandidateString + insertString + rightCandidateString; - - i -= (leftRoom + rightRoom); - } - -// i -= insertLength; - - //if i is negative, we want to remove the extra gaps to the right - if (i < 0) { m->mothurOut("i is negative"); m->mothurOutEndLine(); } - } - } - } - catch(exception& e) { - m->errorOut(e, "Nast", "removeExtraGaps"); - exit(1); - } -} - -/**************************************************************************************************/ - -void Nast::regapSequences(){ //This is essentially part B in Fig 2. of DeSantis et al. - try { - //cout << candidateSeq->getName() << endl; - string candPair = candidateSeq->getPairwise(); - string candAln = ""; - - string tempPair = templateSeq->getPairwise(); - string tempAln = templateSeq->getAligned(); // we use the template aligned sequence as our guide - - int pairwiseLength = candPair.length(); - int fullAlignLength = tempAln.length(); - - if(candPair == ""){ - for(int i=0;isetAligned(candAln); - return; - } - - int fullAlignIndex = 0; - int pairwiseAlignIndex = 0; - string newTemplateAlign = ""; // this is going to be messy so we want a temporary template - // alignment string - while(tempAln[fullAlignIndex] == '.' || tempAln[fullAlignIndex] == '-'){ - candAln += '.'; // add the initial '-' and '.' to the candidate and template - newTemplateAlign += tempAln[fullAlignIndex];// pairwise sequences - fullAlignIndex++; - } - - string lastLoop = ""; - - while(pairwiseAlignIndexseems to be the opposite of the alpha scenario - - candAln += candPair[pairwiseAlignIndex]; - newTemplateAlign += tempAln[fullAlignIndex];// - pairwiseAlignIndex++; - fullAlignIndex++; - } - else if(isalpha(tempPair[pairwiseAlignIndex]) && !isalpha(tempAln[fullAlignIndex]) - && !isalpha(candPair[pairwiseAlignIndex])){ - // template pairwise has a character, but its full aligned sequence and candidate sequence have gaps - // this would happen like we need to add a gap. basically the opposite of the alpha situation - - newTemplateAlign += tempAln[fullAlignIndex];// - candAln += "-"; - fullAlignIndex++; - } - else if(!isalpha(tempPair[pairwiseAlignIndex]) && isalpha(tempAln[fullAlignIndex]) - && !isalpha(candPair[pairwiseAlignIndex])){ - // template and candidate pairwise are gaps and the template aligned is not a gap this should not be possible - // would skip the gaps and not progress through full alignment sequence - // not tested yet - - m->mothurOut("We're into D " + toString(fullAlignIndex) + " " + toString(pairwiseAlignIndex)); m->mothurOutEndLine(); - pairwiseAlignIndex++; - } - else{ - // everything has a gap - not possible - // not tested yet - - m->mothurOut("We're into F " + toString(fullAlignIndex) + " " + toString(pairwiseAlignIndex)); m->mothurOutEndLine(); - pairwiseAlignIndex++; - fullAlignIndex++; - } - } - - for(int i=fullAlignIndex;i=0;i--){ // ditto. - if(candAln[i] == 'Z' || !isalnum(candAln[i])) { candAln[i] = '.'; } - else{ end = i; break; } - } - - for(int i=start;i<=end;i++){ // go through the candidate alignment sequence and make sure that - candAln[i] = toupper(candAln[i]); // everything is upper case - } - - - if(candAln.length() != tempAln.length()){ // if the regapped candidate sequence is longer than the official - removeExtraGaps(candAln, tempAln, newTemplateAlign);// template alignment then we need to do steps C-F in Fig. - } // 2 of Desantis et al. - - candidateSeq->setAligned(candAln); - //cout << "here" << endl; - } - catch(exception& e) { - m->errorOut(e, "Nast", "regapSequences"); - exit(1); - } -} - -/**************************************************************************************************/ - -float Nast::getSimilarityScore(){ - try { - - string cand = candidateSeq->getAligned(); - string temp = templateSeq->getAligned(); - int alignmentLength = temp.length(); - int mismatch = 0; - int denominator = 0; - - for(int i=0;ierrorOut(e, "Nast", "getSimilarityScore"); - exit(1); - } -} - -/**************************************************************************************************/ - -int Nast::getMaxInsertLength(){ - - return maxInsertLength; - -} - -/**************************************************************************************************/