From 8130a55348ef2b5f431fe47c6a83aae581e244b5 Mon Sep 17 00:00:00 2001 From: pschloss Date: Mon, 10 May 2010 11:10:37 +0000 Subject: [PATCH] pds - modified strip barcode funcrtion in trimseqscommand --- makefile | 4 +- mothur.cpp | 2 +- trimseqscommand.cpp | 123 ++++++++++++++++++++++++++++++-------------- trimseqscommand.h | 2 +- 4 files changed, 89 insertions(+), 42 deletions(-) diff --git a/makefile b/makefile index a868ec5..b421ca2 100644 --- a/makefile +++ b/makefile @@ -10,7 +10,7 @@ # Macros # -CC = mpic++ +CC = g++ CC_OPTIONS = -O3 # if you do not want to use the readline library set to no, default yes. @@ -445,7 +445,7 @@ mothur : \ ./logsd.o\ ./geom.o\ ./setlogfilecommand.o\ - -o ../Release/mothur + -o mothur clean : rm \ diff --git a/mothur.cpp b/mothur.cpp index 4f91c2e..d674ff4 100644 --- a/mothur.cpp +++ b/mothur.cpp @@ -58,7 +58,7 @@ int main(int argc, char *argv[]){ m->mothurOutEndLine(); m->mothurOutEndLine(); #else m->mothurOutJustToLog("Linux version"); - >m->mothurOutEndLine(); m->mothurOutEndLine(); + m->mothurOutEndLine(); m->mothurOutEndLine(); #endif #else diff --git a/trimseqscommand.cpp b/trimseqscommand.cpp index b12da1c..253fe7a 100644 --- a/trimseqscommand.cpp +++ b/trimseqscommand.cpp @@ -113,6 +113,8 @@ TrimSeqsCommand::TrimSeqsCommand(string option) { temp = validParameter.validFile(parameters, "pdiffs", false); if (temp == "not found") { temp = "0"; } convert(temp, pdiffs); + if(tdiffs == 0){ tdiffs = bdiffs + pdiffs; } + temp = validParameter.validFile(parameters, "qfile", true); if (temp == "not found") { qFileName = ""; } else if(temp == "not open") { abort = true; } @@ -366,6 +368,7 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string int group; string trashCode = ""; int currentSeqsDiffs = 0; + currentSeqsTdiffs = 0; if(qFileName != ""){ if(qThreshold != 0) { success = stripQualThreshold(currSeq, qFile); } @@ -378,10 +381,11 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string if(barcodes.size() != 0){ success = stripBarcode(currSeq, group); +// cout << "here: " << success << endl; if(!success){ trashCode += 'b'; } else{ currentSeqsDiffs += currentSeqsTdiffs; } } - + if(numFPrimers != 0){ success = stripForward(currSeq); if(!success){ trashCode += 'f'; } @@ -622,42 +626,85 @@ bool TrimSeqsCommand::stripBarcode(Sequence& seq, int& group){ } //if you found the barcode or if you don't want to allow for diffs +// cout << success; if ((bdiffs == 0) || (success == 1)) { return success; } else { //try aligning and see if you can find it - +// cout << endl; + + int maxLength = 0; + Alignment* alignment; - if (barcodes.size() > 0) { //assumes barcodes are all the same length + if (barcodes.size() > 0) { map::iterator it=barcodes.begin(); - string temp = it->first; - - alignment = new NeedlemanOverlap(-2.0, 1.0, -1.0, (temp.length()+bdiffs+1)); + + for(it;it!=barcodes.end();it++){ + if(it->first.length() > maxLength){ + maxLength = it->first.length(); + } + } + alignment = new NeedlemanOverlap(-2.0, 1.0, -1.0, (maxLength+bdiffs+1)); + }else{ alignment = NULL; } - //can you find the barcode + int minDiff = 1e6; + int minCount = 1; + int minGroup = -1; + int minPos = 0; + for(map::iterator it=barcodes.begin();it!=barcodes.end();it++){ string oligo = it->first; - int length = oligo.length(); +// int length = oligo.length(); - if(rawSequence.length() < oligo.length()){ //let's just assume that the barcodes are the same length + if(rawSequence.length() < maxLength){ //let's just assume that the barcodes are the same length success = 0; break; } //use needleman to align first barcode.length()+numdiffs of sequence to each barcode - alignment->align(oligo, rawSequence.substr(0,length+bdiffs)); + alignment->align(oligo, rawSequence.substr(0,oligo.length()+bdiffs)); oligo = alignment->getSeqAAln(); string temp = alignment->getSeqBAln(); - //cout << "barcode = " << oligo << " raw = " << rawSequence.substr(0,oligo.length()) << " raw aligned = " << temp << endl; + + int alnLength = oligo.length(); + + for(int i=oligo.length()-1;i>=0;i--){ + if(oligo[i] != '-'){ alnLength = i+1; break; } + } + oligo = oligo.substr(0,alnLength); + temp = temp.substr(0,alnLength); +// cout << "barcode = " << oligo << " raw = " << rawSequence.substr(0,alnLength) << " raw aligned = " << temp << endl; + cout << seq.getName() << endl; + cout << temp << endl; + cout << oligo << endl; + cout << alnLength << endl; + cout << endl; int newStart=0; - if(compareDNASeq(oligo, temp, length, newStart, bdiffs)){ - group = it->second; - seq.setUnaligned(rawSequence.substr(newStart)); - success = 1; - break; + int numDiff = countDiffs(oligo, temp);//, alnLength, newStart, bdiffs); + if(numDiff < minDiff){ + minDiff = numDiff; + minCount = 1; + minGroup = it->second; + minPos = 0; + for(int i=0;i bdiffs){ success = 0; } + else if(minCount > 1) { success = 0; } + else{ + group = minGroup; + seq.setUnaligned("*" + rawSequence.substr(minPos)); + success = 1; } if (alignment != NULL) { delete alignment; } @@ -721,11 +768,11 @@ bool TrimSeqsCommand::stripForward(Sequence& seq){ string temp = alignment->getSeqBAln(); int newStart = 0; - if(compareDNASeq(oligo, temp, length, newStart, pdiffs)){ - seq.setUnaligned(rawSequence.substr(newStart)); - success = 1; - break; - } +// if(compareDNASeq(oligo, temp, length, newStart, pdiffs)){ +// seq.setUnaligned(rawSequence.substr(newStart)); +// success = 1; +// break; +// } } if (alignment != NULL) { delete alignment; } @@ -870,21 +917,22 @@ bool TrimSeqsCommand::compareDNASeq(string oligo, string seq){ } //*************************************************************************************************************** -bool TrimSeqsCommand::compareDNASeq(string oligo, string seq, int numBases, int& end, int diffs){ +int TrimSeqsCommand::countDiffs(string oligo, string seq){//, int numBases, int& end, int diffs){ try { - bool success = 1; +// bool success = 1; int length = oligo.length(); - end = numBases; - int countBases = 0; +// end = numBases; +// int countBases = 0; int countDiffs = 0; - if (length != 0) { - if ((oligo[0] == '-') || (oligo[0] == '.')) { success = 0; return success; } //no gaps allowed at beginning - } + +// if (length != 0) { +// if ((oligo[0] == '-') || (oligo[0] == '.')) { return 1e6; } //no gaps allowed at beginning +// } for(int i=0;i diffs) { success = 0; break; } - } - else{ - success = 1; +// if(countDiffs > diffs) { break; } } +// else{ +// success = 1; +// } - if (countBases >= numBases) { end = countBases; break; } //stop checking after end of barcode or primer +// if (countBases >= numBases) { end = countBases; break; } //stop checking after end of barcode or primer } - //if it's a success we want to check for total diffs in driver, so save it. - if (success == 1) { currentSeqsTdiffs = countDiffs; } +// if (success == 1) { currentSeqsTdiffs = countDiffs; } - return success; + return countDiffs; } catch(exception& e) { - m->errorOut(e, "TrimSeqsCommand", "compareDNASeq"); + m->errorOut(e, "TrimSeqsCommand", "countDiffs"); exit(1); } diff --git a/trimseqscommand.h b/trimseqscommand.h index f9a251c..f944e7f 100644 --- a/trimseqscommand.h +++ b/trimseqscommand.h @@ -39,7 +39,7 @@ private: bool cullHomoP(Sequence&); bool cullAmbigs(Sequence&); bool compareDNASeq(string, string); - bool compareDNASeq(string, string, int, int&, int); + int countDiffs(string, string);//, int, int&, int); bool abort; string fastaFile, oligoFile, qFileName, outputDir; -- 2.39.2