From e339f9008daa7d37c9a9034829565620a6abe783 Mon Sep 17 00:00:00 2001 From: Sarah Westcott Date: Fri, 18 May 2012 12:35:51 -0400 Subject: [PATCH] added check to cluster.classic to make sure file type is phylip. added mapping function to alignments traceback function so we can relate the position in the unaligned sequence to the aligned sequence. worked on make.contigs command --- alignment.cpp | 37 +++++- alignment.hpp | 4 + clusterclassic.cpp | 7 +- commandfactory.cpp | 5 + makecontigscommand.cpp | 251 ++++++++++++++++++++++++++++++++++++++++- makecontigscommand.h | 166 ++++++++++++++++++++++++++- 6 files changed, 456 insertions(+), 14 deletions(-) diff --git a/alignment.cpp b/alignment.cpp index 03bf9ba..0dec250 100644 --- a/alignment.cpp +++ b/alignment.cpp @@ -53,7 +53,8 @@ void Alignment::resize(int A) { void Alignment::traceBack(){ // This traceback routine is used by the dynamic programming algorithms try { - // to fill the values of seqAaln and seqBaln + BBaseMap.clear(); + ABaseMap.clear(); // to fill the values of seqAaln and seqBaln seqAaln = ""; seqBaln = ""; int row = lB-1; @@ -65,31 +66,50 @@ void Alignment::traceBack(){ // This traceback routine is used by the dynamic // matrix if(currentCell.prevCell == 'x'){ seqAaln = seqBaln = "NOALIGNMENT"; }//If there's an 'x' in the bottom- - else{ // right corner bail out because it means nothing got aligned + else{ // right corner bail out because it means nothing got aligned + int count = 0; while(currentCell.prevCell != 'x'){ // while the previous cell isn't an 'x', keep going... if(currentCell.prevCell == 'u'){ // if the pointer to the previous cell is 'u', go up in the seqAaln = '-' + seqAaln; // matrix. this indicates that we need to insert a gap in seqBaln = seqB[row] + seqBaln; // seqA and a base in seqB + BBaseMap[row] = count; currentCell = alignment[--row][column]; } else if(currentCell.prevCell == 'l'){ // if the pointer to the previous cell is 'l', go to the left seqBaln = '-' + seqBaln; // in the matrix. this indicates that we need to insert a gap seqAaln = seqA[column] + seqAaln; // in seqB and a base in seqA + ABaseMap[column] = count; currentCell = alignment[row][--column]; } else{ seqAaln = seqA[column] + seqAaln; // otherwise we need to go diagonally up and to the left, seqBaln = seqB[row] + seqBaln; // here we add a base to both alignments + BBaseMap[row] = count; + ABaseMap[column] = count; currentCell = alignment[--row][--column]; } + count++; } } - pairwiseLength = seqAaln.length(); + + pairwiseLength = seqAaln.length(); seqAstart = 1; seqAend = 0; seqBstart = 1; seqBend = 0; - + //flip maps since we now know the total length + map newAMap; + for (map::iterator it = ABaseMap.begin(); it != ABaseMap.end(); it++) { + int spot = it->second; + newAMap[pairwiseLength-spot-1] = it->first-1; + } + ABaseMap = newAMap; + map newBMap; + for (map::iterator it = BBaseMap.begin(); it != BBaseMap.end(); it++) { + int spot = it->second; + newBMap[pairwiseLength-spot-1] = it->first-1; + } + BBaseMap = newBMap; for(int i=0;i Alignment::getSeqAAlnBaseMap(){ + return ABaseMap; +} +/**************************************************************************************************/ +map Alignment::getSeqBAlnBaseMap(){ + return BBaseMap; +} /**************************************************************************************************/ int Alignment::getTemplateEndPos(){ diff --git a/alignment.hpp b/alignment.hpp index 7a943fd..9d2197f 100644 --- a/alignment.hpp +++ b/alignment.hpp @@ -30,6 +30,8 @@ public: // float getAlignmentScore(); string getSeqAAln(); string getSeqBAln(); + map getSeqAAlnBaseMap(); + map getSeqBAlnBaseMap(); int getCandidateStartPos(); int getCandidateEndPos(); int getTemplateStartPos(); @@ -49,6 +51,8 @@ protected: int pairwiseLength; int nRows, nCols, lA, lB; vector > alignment; + map ABaseMap; + map BBaseMap; MothurOut* m; }; diff --git a/clusterclassic.cpp b/clusterclassic.cpp index f0bae59..2d1b9a6 100644 --- a/clusterclassic.cpp +++ b/clusterclassic.cpp @@ -40,7 +40,12 @@ int ClusterClassic::readPhylipFile(string filename, NameAssignment* nameMap) { ifstream fileHandle; m->openInputFile(filename, fileHandle); - fileHandle >> nseqs >> name; + string numTest; + fileHandle >> numTest >> name; + + if (!m->isContainingOnlyDigits(numTest)) { m->mothurOut("[ERROR]: expected a number and got " + numTest + ", quitting."); m->mothurOutEndLine(); exit(1); } + else { convert(numTest, nseqs); } + matrixNames.push_back(name); diff --git a/commandfactory.cpp b/commandfactory.cpp index e73d60b..1241a04 100644 --- a/commandfactory.cpp +++ b/commandfactory.cpp @@ -133,6 +133,7 @@ #include "makebiomcommand.h" #include "getcoremicrobiomecommand.h" #include "listotulabelscommand.h" +#include "makecontigscommand.h" /*******************************************************/ @@ -287,6 +288,7 @@ CommandFactory::CommandFactory(){ commands["make.biom"] = "make.biom"; commands["get.coremicrobiome"] = "get.coremicrobiome"; commands["list.otulabels"] = "list.otulabels"; + commands["make.contigs"] = "make.contigs"; commands["quit"] = "MPIEnabled"; } @@ -499,6 +501,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString){ else if(commandName == "make.biom") { command = new MakeBiomCommand(optionString); } else if(commandName == "get.coremicrobiome") { command = new GetCoreMicroBiomeCommand(optionString); } else if(commandName == "list.otulabels") { command = new ListOtuLabelsCommand(optionString); } + else if(commandName == "make.contigs") { command = new MakeContigsCommand(optionString); } else { command = new NoCommand(optionString); } return command; @@ -652,6 +655,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString, str else if(commandName == "make.biom") { pipecommand = new MakeBiomCommand(optionString); } else if(commandName == "get.coremicrobiome") { pipecommand = new GetCoreMicroBiomeCommand(optionString); } else if(commandName == "list.otulabels") { pipecommand = new ListOtuLabelsCommand(optionString); } + else if(commandName == "make.contigs") { pipecommand = new MakeContigsCommand(optionString); } else { pipecommand = new NoCommand(optionString); } return pipecommand; @@ -791,6 +795,7 @@ Command* CommandFactory::getCommand(string commandName){ else if(commandName == "make.biom") { shellcommand = new MakeBiomCommand(); } else if(commandName == "get.coremicrobiome") { shellcommand = new GetCoreMicroBiomeCommand(); } else if(commandName == "list.otulabels") { shellcommand = new ListOtuLabelsCommand(); } + else if(commandName == "make.contigs") { shellcommand = new MakeContigsCommand(); } else { shellcommand = new NoCommand(); } return shellcommand; diff --git a/makecontigscommand.cpp b/makecontigscommand.cpp index 697521c..560b39c 100644 --- a/makecontigscommand.cpp +++ b/makecontigscommand.cpp @@ -13,7 +13,7 @@ vector MakeContigsCommand::setParameters(){ try { CommandParameter pfasta("ffastq", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta); CommandParameter prfasta("rfastq", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(prfasta); - CommandParameter palign("align", "Multiple", "needleman-gotoh-blast-noalign", "needleman", "", "", "",false,false); parameters.push_back(palign); + CommandParameter palign("align", "Multiple", "needleman-gotoh", "needleman", "", "", "",false,false); parameters.push_back(palign); CommandParameter pmatch("match", "Number", "", "1.0", "", "", "",false,false); parameters.push_back(pmatch); CommandParameter pmismatch("mismatch", "Number", "", "-1.0", "", "", "",false,false); parameters.push_back(pmismatch); CommandParameter pgapopen("gapopen", "Number", "", "-2.0", "", "", "",false,false); parameters.push_back(pgapopen); @@ -63,6 +63,7 @@ MakeContigsCommand::MakeContigsCommand(){ vector tempOutNames; outputTypes["fasta"] = tempOutNames; outputTypes["qfile"] = tempOutNames; + outputTypes["mismatch"] = tempOutNames; } catch(exception& e) { m->errorOut(e, "MakeContigsCommand", "MakeContigsCommand"); @@ -96,6 +97,7 @@ MakeContigsCommand::MakeContigsCommand(string option) { vector tempOutNames; outputTypes["fasta"] = tempOutNames; outputTypes["qfile"] = tempOutNames; + outputTypes["mismatch"] = tempOutNames; //if the user changes the input directory command factory will send this info to us in the output parameter @@ -155,7 +157,7 @@ MakeContigsCommand::MakeContigsCommand(string option) { m->mothurConvert(temp, processors); align = validParameter.validFile(parameters, "align", false); if (align == "not found"){ align = "needleman"; } - if ((align != "needleman") && (align != "blast") && (align != "gotoh") && (align != "noalign")) { m->mothurOut(align + " is not a valid alignment method. Options are needleman, blast, gotoh and noalign. I will use needleman."); m->mothurOutEndLine(); align = "needleman"; } + if ((align != "needleman") && (align != "gotoh")) { m->mothurOut(align + " is not a valid alignment method. Options are needleman or gotoh. I will use needleman."); m->mothurOutEndLine(); align = "needleman"; } } } @@ -173,12 +175,31 @@ int MakeContigsCommand::execute(){ //this function will create a forward and reverse, fasta and qual files for each processor. //files has an entry for each processor. files[i][0] = forwardFasta, files[i][1] = forwardQual, files[i][2] = reverseFasta, files[i][3] = reverseQual int numReads = 0; - m->mothurOut("Reading fastq data..."); cout.flush(); + int start = time(NULL); + longestBase = 1000; + m->mothurOut("Reading fastq data...\n"); vector< vector > files = readFastqFiles(numReads); m->mothurOut("Done.\n"); + + if (m->control_pressed) { return 0; } + string outFastaFile = outputDir + m->getRootName(m->getSimpleName(ffastqfile)) + "contigs.fasta"; + string outQualFile = outputDir + m->getRootName(m->getSimpleName(ffastqfile)) + "contigs.qual"; + string outMisMatchFile = outputDir + m->getRootName(m->getSimpleName(ffastqfile)) + "contigs.mismatches"; + outputNames.push_back(outFastaFile); outputTypes["fasta"].push_back(outFastaFile); + outputNames.push_back(outQualFile); outputTypes["qfile"].push_back(outQualFile); + outputNames.push_back(outMisMatchFile); outputTypes["mismatch"].push_back(outMisMatchFile); - + m->mothurOut("Making contigs...\n"); + createProcesses(files, outFastaFile, outQualFile, outMisMatchFile); + m->mothurOut("Done.\n"); + + //remove temp fasta and qual files + for (int i = 0; i < processors; i++) { for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); } } + + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } + + m->mothurOut("It took " + toString(time(NULL) - start) + " secs to process " + toString(numReads) + " sequences.\n"); string currentFasta = ""; itTypes = outputTypes.find("fasta"); @@ -207,6 +228,225 @@ int MakeContigsCommand::execute(){ } } //********************************************************************************************************************** +int MakeContigsCommand::createProcesses(vector< vector > files, string outputFasta, string outputQual, string outputMisMatches) { + try { + int num = 0; + vector processIDS; +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) + int process = 0; + + //loop through and create all the processes you want + while (process != processors-1) { + int pid = fork(); + + if (pid > 0) { + processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later + process++; + }else if (pid == 0){ + num = driver(files[process], outputFasta + toString(getpid()) + ".temp", outputQual + toString(getpid()) + ".temp", outputMisMatches + toString(getpid()) + ".temp"); + + //pass numSeqs to parent + ofstream out; + string tempFile = outputFasta + toString(getpid()) + ".num.temp"; + m->openOutputFile(tempFile, out); + out << num << endl; + out.close(); + + exit(0); + }else { + m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); + for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); } + exit(0); + } + } + + //do my part + num = driver(files[processors-1], outputFasta, outputQual, outputMisMatches); + + //force parent to wait until all the processes are done + for (int i=0;iopenInputFile(tempFile, in); + if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; } + in.close(); m->mothurRemove(tempFile); + } + #else + + ////////////////////////////////////////////////////////////////////////////////////////////////////// + //Windows version shared memory, so be careful when passing variables through the contigsData struct. + //Above fork() will clone, so memory is separate, but that's not the case with windows, + ////////////////////////////////////////////////////////////////////////////////////////////////////// + + vector pDataArray; + DWORD dwThreadIdArray[processors-1]; + HANDLE hThreadArray[processors-1]; + + //Create processor worker threads. + for( int i=0; icount; + CloseHandle(hThreadArray[i]); + delete pDataArray[i]; + } + + #endif + + for (int i = 0; i < processIDS.size(); i++) { + m->appendFiles((outputFasta + toString(processIDS[i]) + ".temp"), outputFasta); + m->mothurRemove((outputFasta + toString(processIDS[i]) + ".temp")); + + m->appendFiles((outputQual + toString(processIDS[i]) + ".temp"), outputQual); + m->mothurRemove((outputQual + toString(processIDS[i]) + ".temp")); + + m->appendFiles((outputMisMatches + toString(processIDS[i]) + ".temp"), outputMisMatches); + m->mothurRemove((outputMisMatches + toString(processIDS[i]) + ".temp")); + } + + return num; + } + catch(exception& e) { + m->errorOut(e, "MakeContigsCommand", "createProcesses"); + exit(1); + } +} +//********************************************************************************************************************** +int MakeContigsCommand::driver(vector files, string outputFasta, string outputQual, string outputMisMatches){ + try { + + Alignment* alignment; + if(align == "gotoh") { alignment = new GotohOverlap(gapOpen, gapExtend, match, misMatch, longestBase); } + else if(align == "needleman") { alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase); } + + int num = 0; + string thisffastafile = files[0]; + string thisfqualfile = files[1]; + string thisrfastafile = files[2]; + string thisrqualfile = files[3]; + + if (m->debug) { m->mothurOut("[DEBUG]: ffasta = " + thisffastafile + ".\n[DEBUG]: fqual = " + thisfqualfile + ".\n[DEBUG]: rfasta = " + thisrfastafile + ".\n[DEBUG]: rqual = " + thisrqualfile + ".\n"); } + + ifstream inFFasta, inRFasta, inFQual, inRQual; + m->openInputFile(thisffastafile, inFFasta); + m->openInputFile(thisfqualfile, inFQual); + m->openInputFile(thisrfastafile, inRFasta); + m->openInputFile(thisrqualfile, inRQual); + + ofstream outFasta, outQual, outMisMatch; + m->openOutputFile(outputFasta, outFasta); + m->openOutputFile(outputQual, outQual); + m->openOutputFile(outputMisMatches, outMisMatch); + outMisMatch << "Name\tLength\tMisMatches\n"; + + while ((!inFQual.eof()) && (!inFFasta.eof()) && (!inRFasta.eof()) && (!inRQual.eof())) { + + if (m->control_pressed) { break; } + + //read seqs and quality info + Sequence fSeq(inFFasta); m->gobble(inFFasta); + Sequence rSeq(inRFasta); m->gobble(inRFasta); + QualityScores fQual(inFQual); m->gobble(inFQual); + QualityScores rQual(inRQual); m->gobble(inRQual); + + //flip the reverse reads + rSeq.reverseComplement(); + rQual.flipQScores(); + + //pairwise align + alignment->align(fSeq.getUnaligned(), rSeq.getUnaligned()); + map ABaseMap = alignment->getSeqAAlnBaseMap(); + map BBaseMap = alignment->getSeqBAlnBaseMap(); + fSeq.setAligned(alignment->getSeqAAln()); + rSeq.setAligned(alignment->getSeqBAln()); + int length = fSeq.getAligned().length(); + + //traverse alignments merging into one contiguous seq + string contig = ""; + vector contigScores; + int numMismatches = 0; + string seq1 = fSeq.getAligned(); + string seq2 = rSeq.getAligned(); + + vector scores1 = fQual.getQualityScores(); + vector scores2 = rQual.getQualityScores(); + + for (int i = 0; i < length; i++) { + if (seq1[i] == seq2[i]) { //match, add base and choose highest score + contig += seq1[i]; + contigScores.push_back(scores1[ABaseMap[i]]); + if (scores1[ABaseMap[i]] < scores2[BBaseMap[i]]) { contigScores[i] = scores2[BBaseMap[i]]; } + }else if (((seq1[i] == '.') || (seq1[i] == '-')) && ((seq2[i] != '-') && (seq2[i] != '.'))) { //seq1 is a gap and seq2 is a base, choose seq2 + contig += seq2[i]; + contigScores.push_back(scores2[BBaseMap[i]]); + }else if (((seq2[i] == '.') || (seq2[i] == '-')) && ((seq1[i] != '-') && (seq1[i] != '.'))) { //seq2 is a gap and seq1 is a base, choose seq1 + contig += seq1[i]; + contigScores.push_back(scores1[ABaseMap[i]]); + }else if (((seq1[i] != '-') && (seq1[i] != '.')) && ((seq2[i] != '-') && (seq2[i] != '.'))) { //both bases choose one with better quality + char c = seq1[i]; + contigScores.push_back(scores1[ABaseMap[i]]); + if (scores1[ABaseMap[i]] < scores2[BBaseMap[i]]) { contigScores[i] = scores2[BBaseMap[i]]; c = seq2[i]; } + contig += c; + numMismatches++; + }else { //should never get here + m->mothurOut("[ERROR]: case I didn't think of seq1 = " + toString(seq1[i]) + " and seq2 = " + toString(seq2[i]) + "\n"); + } + } + + //output + outFasta << ">" << fSeq.getName() << endl << contig << endl; + outQual << ">" << fSeq.getName() << endl; + for (int i = 0; i < contigScores.size(); i++) { outQual << contigScores[i] << ' '; } + outQual << endl; + outMisMatch << fSeq.getName() << '\t' << contig.length() << '\t' << numMismatches << endl; + + num++; + + //report progress + if((num) % 1000 == 0){ m->mothurOut(toString(num)); m->mothurOutEndLine(); } + } + + //report progress + if((num) % 1000 != 0){ m->mothurOut(toString(num)); m->mothurOutEndLine(); } + + inFFasta.close(); + inFQual.close(); + inRFasta.close(); + inRQual.close(); + outFasta.close(); + outQual.close(); + outMisMatch.close(); + delete alignment; + + if (m->control_pressed) { m->mothurRemove(outputQual); m->mothurRemove(outputFasta); m->mothurRemove(outputMisMatches);} + + return num; + } + catch(exception& e) { + m->errorOut(e, "MakeContigsCommand", "driver"); + exit(1); + } +} +//********************************************************************************************************************** vector< vector > MakeContigsCommand::readFastqFiles(int& count){ try { vector< vector > files; @@ -287,7 +527,7 @@ vector< vector > MakeContigsCommand::readFastqFiles(int& count){ //report progress if((count) % 10000 != 0){ m->mothurOut(toString(count)); m->mothurOutEndLine(); } - + //close files, delete ofstreams for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close(); delete (it->second)[i]; } } @@ -308,7 +548,6 @@ vector< vector > MakeContigsCommand::readFastqFiles(int& count){ exit(1); } } - //********************************************************************************************************************** fastqRead MakeContigsCommand::readFastq(ifstream& in){ try { diff --git a/makecontigscommand.h b/makecontigscommand.h index e63695e..779d35c 100644 --- a/makecontigscommand.h +++ b/makecontigscommand.h @@ -10,6 +10,14 @@ // #include "command.hpp" +#include "sequence.hpp" +#include "qualityscores.h" +#include "alignment.hpp" +#include "gotohoverlap.hpp" +#include "needlemanoverlap.hpp" +#include "blastalign.hpp" +#include "noalign.hpp" + struct fastqRead { vector scores; @@ -50,14 +58,166 @@ private: fastqRead readFastq(ifstream&); vector< vector > readFastqFiles(int&); bool checkReads(fastqRead&, fastqRead&); + int createProcesses(vector< vector >, string, string, string); + int driver(vector, string, string, string); }; /**************************************************************************************************/ +/**************************************************************************************************/ +//custom data structure for threads to use. +// This is passed by void pointer so it can be any data type +// that can be passed using a single void pointer (LPVOID). +struct contigsData { + string outputFasta; + string outputQual; + string outputMisMatches; + string align; + vector files; + MothurOut* m; + float match, misMatch, gapOpen, gapExtend; + int count, threadID; + + contigsData(){} + contigsData(vector f, string of, string oq, string om, string al, MothurOut* mout, float ma, float misMa, float gapO, float gapE, int tid) { + files = f; + outputFasta = of; + outputQual = oq; + outputMisMatches = om; + m = mout; + match = ma; + misMatch = misMa; + gapOpen = gapO; + gapExtend = gapE; + align = al; + count = 0; + threadID = tid; + } +}; - - - +/**************************************************************************************************/ +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) +#else +static DWORD WINAPI MyContigsThreadFunction(LPVOID lpParam){ + contigsData* pDataArray; + pDataArray = (contigsData*)lpParam; + + try { + int longestBase = 1000; + Alignment* alignment; + if(pDataArray->align == "gotoh") { alignment = new GotohOverlap(pDataArray->gapOpen, pDataArray->gapExtend, pDataArray->match, pDataArray->misMatch, longestBase); } + else if(pDataArray->align == "needleman") { alignment = new NeedlemanOverlap(pDataArray->gapOpen, pDataArray->match, pDataArray->misMatch, longestBase); } + + int num = 0; + string thisffastafile = pDataArray->files[0]; + string thisfqualfile = pDataArray->files[1]; + string thisrfastafile = pDataArray->files[2]; + string thisrqualfile = pDataArray->files[3]; + + if (pDataArray->m->debug) { pDataArray->m->mothurOut("[DEBUG]: ffasta = " + thisffastafile + ".\n[DEBUG]: fqual = " + thisfqualfile + ".\n[DEBUG]: rfasta = " + thisrfastafile + ".\n[DEBUG]: rqual = " + thisrqualfile + ".\n"); } + + ifstream inFFasta, inRFasta, inFQual, inRQual; + pDataArray->m->openInputFile(thisffastafile, inFFasta); + pDataArray->m->openInputFile(thisfqualfile, inFQual); + pDataArray->m->openInputFile(thisrfastafile, inRFasta); + pDataArray->m->openInputFile(thisrqualfile, inRQual); + + ofstream outFasta, outQual, outMisMatch; + pDataArray->m->openOutputFile(pDataArray->outputFasta, outFasta); + pDataArray->m->openOutputFile(pDataArray->outputQual, outQual); + pDataArray->m->openOutputFile(pDataArray->outputMisMatches, outMisMatch); + outMisMatch << "Name\tLength\tMisMatches\n"; + + while ((!inFQual.eof()) && (!inFFasta.eof()) && (!inRFasta.eof()) && (!inRQual.eof())) { + + if (pDataArray->m->control_pressed) { break; } + + //read seqs and quality info + Sequence fSeq(inFFasta); pDataArray->m->gobble(inFFasta); + Sequence rSeq(inRFasta); pDataArray->m->gobble(inRFasta); + QualityScores fQual(inFQual); pDataArray->m->gobble(inFQual); + QualityScores rQual(inRQual); pDataArray->m->gobble(inRQual); + + //flip the reverse reads + rSeq.reverseComplement(); + rQual.flipQScores(); + + //pairwise align + alignment->align(fSeq.getUnaligned(), rSeq.getUnaligned()); + map ABaseMap = alignment->getSeqAAlnBaseMap(); + map BBaseMap = alignment->getSeqBAlnBaseMap(); + fSeq.setAligned(alignment->getSeqAAln()); + rSeq.setAligned(alignment->getSeqBAln()); + int length = fSeq.getAligned().length(); + + //traverse alignments merging into one contiguous seq + string contig = ""; + vector contigScores; + int numMismatches = 0; + string seq1 = fSeq.getAligned(); + string seq2 = rSeq.getAligned(); + + vector scores1 = fQual.getQualityScores(); + vector scores2 = rQual.getQualityScores(); + + for (int i = 0; i < length; i++) { + if (seq1[i] == seq2[i]) { //match, add base and choose highest score + contig += seq1[i]; + contigScores.push_back(scores1[ABaseMap[i]]); + if (scores1[ABaseMap[i]] < scores2[BBaseMap[i]]) { contigScores[i] = scores2[BBaseMap[i]]; } + }else if (((seq1[i] == '.') || (seq1[i] == '-')) && ((seq2[i] != '-') && (seq2[i] != '.'))) { //seq1 is a gap and seq2 is a base, choose seq2 + contig += seq2[i]; + contigScores.push_back(scores2[BBaseMap[i]]); + }else if (((seq2[i] == '.') || (seq2[i] == '-')) && ((seq1[i] != '-') && (seq1[i] != '.'))) { //seq2 is a gap and seq1 is a base, choose seq1 + contig += seq1[i]; + contigScores.push_back(scores1[ABaseMap[i]]); + }else if (((seq1[i] != '-') && (seq1[i] != '.')) && ((seq2[i] != '-') && (seq2[i] != '.'))) { //both bases choose one with better quality + char c = seq1[i]; + contigScores.push_back(scores1[ABaseMap[i]]); + if (scores1[ABaseMap[i]] < scores2[BBaseMap[i]]) { contigScores[i] = scores2[BBaseMap[i]]; c = seq2[i]; } + contig += c; + numMismatches++; + }else { //should never get here + pDataArray->m->mothurOut("[ERROR]: case I didn't think of seq1 = " + toString(seq1[i]) + " and seq2 = " + toString(seq2[i]) + "\n"); + } + } + + //output + outFasta << ">" << fSeq.getName() << endl << contig << endl; + outQual << ">" << fSeq.getName() << endl; + for (int i = 0; i < contigScores.size(); i++) { outQual << contigScores[i] << ' '; } + outQual << endl; + outMisMatch << fSeq.getName() << '\t' << contig.length() << '\t' << numMismatches << endl; + + num++; + + //report progress + if((num) % 1000 == 0){ pDataArray->m->mothurOut(toString(num)); pDataArray->m->mothurOutEndLine(); } + } + + //report progress + if((num) % 1000 != 0){ pDataArray->m->mothurOut(toString(num)); pDataArray->m->mothurOutEndLine(); } + + inFFasta.close(); + inFQual.close(); + inRFasta.close(); + inRQual.close(); + outFasta.close(); + outQual.close(); + outMisMatch.close(); + delete alignment; + + if (pDataArray->m->control_pressed) { pDataArray->m->mothurRemove(pDataArray->outputQual); pDataArray->m->mothurRemove(pDataArray->outputFasta); pDataArray->m->mothurRemove(pDataArray->outputMisMatches);} + + return 0; + + } + catch(exception& e) { + pDataArray->m->errorOut(e, "AlignCommand", "MyContigsThreadFunction"); + exit(1); + } +} +#endif #endif -- 2.39.2