X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=blastalign.cpp;fp=blastalign.cpp;h=bb7192ad498c405fc476ae722db2478fa1b407ab;hb=0caf3fbabaa3ece404f8ce77f4c883dc5b1bf1dc;hp=0000000000000000000000000000000000000000;hpb=1b73ff67c83892a025e597dabd9df6fe7b58206a;p=mothur.git diff --git a/blastalign.cpp b/blastalign.cpp new file mode 100644 index 0000000..bb7192a --- /dev/null +++ b/blastalign.cpp @@ -0,0 +1,159 @@ +/* + * blastalign.cpp + * + * + * Created by Pat Schloss on 12/16/08. + * Copyright 2008 Patrick D. Schloss. All rights reserved. + * + * This is a basic alignment method that gets the blast program to do the heavy lifting. In the future, we should + * probably incorporate NCBI's library so that we don't have to call on a user-supplied executable. This is a child + * of the Alignment class, which requires a constructor and align method. + * + */ + + +#include "alignment.hpp" +#include "blastalign.hpp" + + +//**************************************************************************************************/ + +BlastAlignment::BlastAlignment(float go, float ge, float ma, float mm) : + match(ma), // This is the score to award for two nucleotides matching (match >= 0) + mismatch(mm) // This is the penalty to assess for a mismatch (mismatch <= 0) +{ + path = m->argv; + path = path.substr(0, (path.find_last_of('m'))); + + gapOpen = abs(go); // This is the penalty to assess for opening a gap (gapOpen >= 0) + gapExtend = abs(ge); // This is the penalty to assess for extending a gap (gapExtend >= 0) + + int randNumber = rand(); + candidateFileName = toString(randNumber) + ".candidate"; + templateFileName = toString(randNumber) + ".template"; + blastFileName = toString(randNumber) + ".pairwise"; +} + +//**************************************************************************************************/ + +BlastAlignment::~BlastAlignment(){ // The desctructor should clean up by removing the temporary + m->mothurRemove(candidateFileName); // files used to run bl2seq + m->mothurRemove(templateFileName); + m->mothurRemove(blastFileName); +} + +//**************************************************************************************************/ + +void BlastAlignment::align(string seqA, string seqB){ //Use blastn to align the two sequences + + ofstream candidateFile(candidateFileName.c_str()); // Write the sequence to be aligned to a temporary candidate seq file + candidateFile << ">candidate" << endl << seqA << endl; + candidateFile.close(); + + ofstream templateFile(templateFileName.c_str()); // Write the unaligned template sequence to a temporary candidate seq file + templateFile << ">template" << endl << seqB << endl; + templateFile.close(); + + // The blastCommand assumes that we have DNA sequences (blastn) and that they are fairly similar (-e 0.001) and + // that we don't want to apply any kind of complexity filtering (-F F) + string blastCommand = path + "blast/bin/bl2seq -p blastn -i " + candidateFileName + " -j " + templateFileName + " -e 0.0001 -F F -o " + blastFileName + " -W 11"; + blastCommand += " -r " + toString(match) + " -q " + toString(mismatch); + blastCommand += " -G " + toString(gapOpen) + " -E " + toString(gapExtend); + + system(blastCommand.c_str()); // Here we assume that "bl2seq" is in the users path or in the same folder as + // this executable + setPairwiseSeqs(); +} + +/**************************************************************************************************/ + +void BlastAlignment::setPairwiseSeqs(){ // This method call assigns the blast generated alignment + // to the pairwise entry in the Sequence class for the + // candidate and template Sequence objects + ifstream blastFile; + m->openInputFile(blastFileName, blastFile); + + seqAaln = ""; + seqBaln = ""; + + int candidateLength, templateLength; + char d; + + string candidateName, templateName; + + while((d=blastFile.get()) != '='){} + blastFile >> candidateName; // Get the candidate sequence name from flatfile + + while((d=blastFile.get()) != '('){} + blastFile >> candidateLength; // Get the candidate sequence length from flatfile + + while((d=blastFile.get())){ + if(d == '>'){ + blastFile >> templateName; // Get the template sequence name from flatfile + break; + } + else if(d == '*'){ // We go here if there is no significant match + + seqAstart = 0; + seqBstart = 0; + seqAend = 0; + seqBend = 0; + pairwiseLength = 0; + +// string dummy; +// while(dummy != "query:"){ m->mothurOut(dummy, ""); m->mothurOutEndLine(); blastFile >> dummy; } +// blastFile >> seqBend; +// m->mothurOut(toString(seqBend), ""); m->mothurOutEndLine(); +// for(int i=0;i> templateLength; // Get the template sequence length from flatfile + + while((d=blastFile.get()) != 'Q'){} // Suck up everything else until we get to the start of the alignment + int queryStart, sbjctStart, queryEnd, sbjctEnd; + string queryLabel, sbjctLabel, query, sbjct; + + blastFile >> queryLabel; queryLabel = 'Q' + queryLabel; + + + while(queryLabel == "Query:"){ + blastFile >> queryStart >> query >> queryEnd; + + while((d=blastFile.get()) != 'S'){}; + + blastFile >> sbjctLabel >> sbjctStart >> sbjct >> sbjctEnd; + + if(seqAaln == ""){ + seqAstart = queryStart; + seqBstart = sbjctStart; + } + + seqAaln += query; // concatenate each line of the sequence to what we already have + seqBaln += sbjct; // for the query and template (subject) sequence + + blastFile >> queryLabel; + } + seqAend = queryEnd; + seqBend = sbjctEnd; + pairwiseLength = seqAaln.length(); + + for(int i=1;i