From: Sarah Westcott Date: Fri, 18 May 2012 19:16:09 +0000 (-0400) Subject: added threshold parameter to make.contigs command. X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=commitdiff_plain;h=e7ae6e6b27c45b5691c19f423ec56faae8e2f255 added threshold parameter to make.contigs command. --- diff --git a/alignment.cpp b/alignment.cpp index 0dec250..9ab1700 100644 --- a/alignment.cpp +++ b/alignment.cpp @@ -110,6 +110,7 @@ void Alignment::traceBack(){ // This traceback routine is used by the dynamic newBMap[pairwiseLength-spot-1] = it->first-1; } BBaseMap = newBMap; + for(int i=0;i& thisLookUp m->mothurOut("zscore: " + toString(zscore)); m->mothurOutEndLine(); m->mothurOut("standard deviation: " + toString(sd)); m->mothurOutEndLine(); - out << metric << '\t' << thisLookUp[0]->getLabel() << '\t' << nullMean << '\t' << zscore '\t' << sd << endl; + out << metric << '\t' << thisLookUp[0]->getLabel() << '\t' << nullMean << '\t' << zscore << '\t' << sd << endl; return 0; } diff --git a/makecontigscommand.cpp b/makecontigscommand.cpp index 560b39c..f672840 100644 --- a/makecontigscommand.cpp +++ b/makecontigscommand.cpp @@ -18,6 +18,7 @@ vector MakeContigsCommand::setParameters(){ CommandParameter pmismatch("mismatch", "Number", "", "-1.0", "", "", "",false,false); parameters.push_back(pmismatch); CommandParameter pgapopen("gapopen", "Number", "", "-2.0", "", "", "",false,false); parameters.push_back(pgapopen); CommandParameter pgapextend("gapextend", "Number", "", "-1.0", "", "", "",false,false); parameters.push_back(pgapextend); + CommandParameter pthreshold("threshold", "Number", "", "40", "", "", "",false,false); parameters.push_back(pthreshold); CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir); @@ -43,6 +44,7 @@ string MakeContigsCommand::getHelpString(){ helpString += "The mistmatch parameter allows you to specify the penalty for having different bases. The default is -1.0.\n"; helpString += "The gapopen parameter allows you to specify the penalty for opening a gap in an alignment. The default is -2.0.\n"; helpString += "The gapextend parameter allows you to specify the penalty for extending a gap in an alignment. The default is -1.0.\n"; + helpString += "The threshold parameter allows you to set a quality scores threshold. In the case where we are trying to decide whether to keep a base or remove it because the base is compared to a gap in the other fragment, if the base has a quality score below the threshold we eliminate it. Default=40.\n"; helpString += "The processors parameter allows you to specify how many processors you would like to use. The default is 1. \n"; helpString += "The make.contigs command should be in the following format: \n"; helpString += "make.contigs(ffastq=yourForwardFastqFile, rfastq=yourReverseFastqFile, align=yourAlignmentMethod) \n"; @@ -152,6 +154,10 @@ MakeContigsCommand::MakeContigsCommand(string option) { m->mothurConvert(temp, gapExtend); if (gapExtend > 0) { m->mothurOut("[ERROR]: gapextend must be negative.\n"); abort=true; } + temp = validParameter.validFile(parameters, "threshold", false); if (temp == "not found"){ temp = "40"; } + m->mothurConvert(temp, threshold); + if ((threshold < 0) || (threshold > 40)) { m->mothurOut("[ERROR]: threshold must be between 0 and 40.\n"); abort=true; } + temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); } m->setProcessors(temp); m->mothurConvert(temp, processors); @@ -291,7 +297,7 @@ int MakeContigsCommand::createProcesses(vector< vector > files, string o for( int i=0; i files, string outputFasta, string vector scores1 = fQual.getQualityScores(); vector scores2 = rQual.getQualityScores(); - for (int i = 0; i < length; i++) { if (seq1[i] == seq2[i]) { //match, add base and choose highest score contig += seq1[i]; contigScores.push_back(scores1[ABaseMap[i]]); - if (scores1[ABaseMap[i]] < scores2[BBaseMap[i]]) { contigScores[i] = scores2[BBaseMap[i]]; } - }else if (((seq1[i] == '.') || (seq1[i] == '-')) && ((seq2[i] != '-') && (seq2[i] != '.'))) { //seq1 is a gap and seq2 is a base, choose seq2 - contig += seq2[i]; - contigScores.push_back(scores2[BBaseMap[i]]); - }else if (((seq2[i] == '.') || (seq2[i] == '-')) && ((seq1[i] != '-') && (seq1[i] != '.'))) { //seq2 is a gap and seq1 is a base, choose seq1 - contig += seq1[i]; - contigScores.push_back(scores1[ABaseMap[i]]); + if (scores1[ABaseMap[i]] < scores2[BBaseMap[i]]) { contigScores[contigScores.size()-1] = scores2[BBaseMap[i]]; } + }else if (((seq1[i] == '.') || (seq1[i] == '-')) && ((seq2[i] != '-') && (seq2[i] != '.'))) { //seq1 is a gap and seq2 is a base, choose seq2, unless quality score for base is below threshold. In that case eliminate base + if (scores2[BBaseMap[i]] < threshold) { } // + else { + contig += seq2[i]; + contigScores.push_back(scores2[BBaseMap[i]]); + } + }else if (((seq2[i] == '.') || (seq2[i] == '-')) && ((seq1[i] != '-') && (seq1[i] != '.'))) { //seq2 is a gap and seq1 is a base, choose seq1, unless quality score for base is below threshold. In that case eliminate base + if (scores1[ABaseMap[i]] < threshold) { } // + else { + contig += seq1[i]; + contigScores.push_back(scores1[ABaseMap[i]]); + } }else if (((seq1[i] != '-') && (seq1[i] != '.')) && ((seq2[i] != '-') && (seq2[i] != '.'))) { //both bases choose one with better quality char c = seq1[i]; contigScores.push_back(scores1[ABaseMap[i]]); - if (scores1[ABaseMap[i]] < scores2[BBaseMap[i]]) { contigScores[i] = scores2[BBaseMap[i]]; c = seq2[i]; } + if (scores1[ABaseMap[i]] < scores2[BBaseMap[i]]) { contigScores[contigScores.size()-1] = scores2[BBaseMap[i]]; c = seq2[i]; } contig += c; numMismatches++; }else { //should never get here diff --git a/makecontigscommand.h b/makecontigscommand.h index 779d35c..cc08842 100644 --- a/makecontigscommand.h +++ b/makecontigscommand.h @@ -52,7 +52,7 @@ private: bool abort; string outputDir, ffastqfile, rfastqfile, align; float match, misMatch, gapOpen, gapExtend; - int processors, longestBase; + int processors, longestBase, threshold; vector outputNames; fastqRead readFastq(ifstream&); @@ -76,10 +76,10 @@ struct contigsData { vector files; MothurOut* m; float match, misMatch, gapOpen, gapExtend; - int count, threadID; + int count, threshold, threadID; contigsData(){} - contigsData(vector f, string of, string oq, string om, string al, MothurOut* mout, float ma, float misMa, float gapO, float gapE, int tid) { + contigsData(vector f, string of, string oq, string om, string al, MothurOut* mout, float ma, float misMa, float gapO, float gapE, int thr, int tid) { files = f; outputFasta = of; outputQual = oq; @@ -89,6 +89,7 @@ struct contigsData { misMatch = misMa; gapOpen = gapO; gapExtend = gapE; + threshold = thr; align = al; count = 0; threadID = tid; @@ -165,12 +166,16 @@ static DWORD WINAPI MyContigsThreadFunction(LPVOID lpParam){ contig += seq1[i]; contigScores.push_back(scores1[ABaseMap[i]]); if (scores1[ABaseMap[i]] < scores2[BBaseMap[i]]) { contigScores[i] = scores2[BBaseMap[i]]; } - }else if (((seq1[i] == '.') || (seq1[i] == '-')) && ((seq2[i] != '-') && (seq2[i] != '.'))) { //seq1 is a gap and seq2 is a base, choose seq2 - contig += seq2[i]; - contigScores.push_back(scores2[BBaseMap[i]]); - }else if (((seq2[i] == '.') || (seq2[i] == '-')) && ((seq1[i] != '-') && (seq1[i] != '.'))) { //seq2 is a gap and seq1 is a base, choose seq1 - contig += seq1[i]; - contigScores.push_back(scores1[ABaseMap[i]]); + }else if (((seq1[i] == '.') || (seq1[i] == '-')) && ((seq2[i] != '-') && (seq2[i] != '.'))) { //seq1 is a gap and seq2 is a base, choose seq2, unless quality score for base is below threshold. In that case eliminate base + if (scores2[BBaseMap[i]] >= pDataArray->threshold)) { + contig += seq2[i]; + contigScores.push_back(scores2[BBaseMap[i]]); + } + }else if (((seq2[i] == '.') || (seq2[i] == '-')) && ((seq1[i] != '-') && (seq1[i] != '.'))) { //seq2 is a gap and seq1 is a base, choose seq1, unless quality score for base is below threshold. In that case eliminate base + if (scores1[ABaseMap[i]] >= pDataArray->threshold) { + contig += seq1[i]; + contigScores.push_back(scores1[ABaseMap[i]]); + } }else if (((seq1[i] != '-') && (seq1[i] != '.')) && ((seq2[i] != '-') && (seq2[i] != '.'))) { //both bases choose one with better quality char c = seq1[i]; contigScores.push_back(scores1[ABaseMap[i]]);