X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=makecontigscommand.cpp;h=0b9e0846eddb90391b68076ca947d2e34fee2421;hb=81ad9ebabe7f951eb2d1c922b1342e9d6e521c4e;hp=3474c57abec72b362dc9af4c27336473899beb96;hpb=eb71e28b7b7afd82540f4a8f0bac9429c5b9d713;p=mothur.git diff --git a/makecontigscommand.cpp b/makecontigscommand.cpp index 3474c57..0b9e084 100644 --- a/makecontigscommand.cpp +++ b/makecontigscommand.cpp @@ -27,11 +27,12 @@ vector MakeContigsCommand::setParameters(){ CommandParameter palign("align", "Multiple", "needleman-gotoh", "needleman", "", "", "","",false,false); parameters.push_back(palign); CommandParameter pallfiles("allfiles", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pallfiles); + CommandParameter ptrimoverlap("trimoverlap", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(ptrimoverlap); CommandParameter pmatch("match", "Number", "", "1.0", "", "", "","",false,false); parameters.push_back(pmatch); CommandParameter pmismatch("mismatch", "Number", "", "-1.0", "", "", "","",false,false); parameters.push_back(pmismatch); CommandParameter pgapopen("gapopen", "Number", "", "-2.0", "", "", "","",false,false); parameters.push_back(pgapopen); CommandParameter pgapextend("gapextend", "Number", "", "-1.0", "", "", "","",false,false); parameters.push_back(pgapextend); - CommandParameter pthreshold("insert", "Number", "", "25", "", "", "","",false,false); parameters.push_back(pthreshold); + CommandParameter pthreshold("insert", "Number", "", "20", "", "", "","",false,false); parameters.push_back(pthreshold); CommandParameter pdeltaq("deltaq", "Number", "", "6", "", "", "","",false,false); parameters.push_back(pdeltaq); CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors); CommandParameter pformat("format", "Multiple", "sanger-illumina-solexa-illumina1.8+", "illumina1.8+", "", "", "","",false,false,true); parameters.push_back(pformat); @@ -71,9 +72,10 @@ string MakeContigsCommand::getHelpString(){ helpString += "The deltaq parameter allows you to specify the delta allowed between quality scores of a mismatched base. For example in the overlap, if deltaq=5 and in the alignment seqA, pos 200 has a quality score of 30 and the same position in seqB has a quality score of 20, you take the base from seqA (30-20 >= 5). If the quality score in seqB is 28 then the base in the consensus will be an N (30-28<5) The default is 6.\n"; helpString += "The gapopen parameter allows you to specify the penalty for opening a gap in an alignment. The default is -2.0.\n"; helpString += "The gapextend parameter allows you to specify the penalty for extending a gap in an alignment. The default is -1.0.\n"; - helpString += "The insert parameter allows you to set a quality scores threshold. In the case where we are trying to decide whether to keep a base or remove it because the base is compared to a gap in the other fragment, if the base has a quality score below the threshold we eliminate it. Default=25.\n"; + helpString += "The insert parameter allows you to set a quality scores threshold. In the case where we are trying to decide whether to keep a base or remove it because the base is compared to a gap in the other fragment, if the base has a quality score equal to or below the threshold we eliminate it. Default=20.\n"; helpString += "The processors parameter allows you to specify how many processors you would like to use. The default is 1. \n"; helpString += "The allfiles parameter will create separate group and fasta file for each grouping. The default is F.\n"; + helpString += "The trimoverlap parameter allows you to trim the sequences to only the overlapping section. The default is F.\n"; helpString += "The make.contigs command should be in the following format: \n"; helpString += "make.contigs(ffastq=yourForwardFastqFile, rfastq=yourReverseFastqFile, align=yourAlignmentMethod) \n"; helpString += "Note: No spaces between parameter labels (i.e. ffastq), '=' and parameters (i.e.yourForwardFastqFile).\n"; @@ -286,7 +288,7 @@ MakeContigsCommand::MakeContigsCommand(string option) { m->mothurConvert(temp, gapExtend); if (gapExtend > 0) { m->mothurOut("[ERROR]: gapextend must be negative.\n"); abort=true; } - temp = validParameter.validFile(parameters, "insert", false); if (temp == "not found"){ temp = "25"; } + temp = validParameter.validFile(parameters, "insert", false); if (temp == "not found"){ temp = "20"; } m->mothurConvert(temp, insert); if ((insert < 0) || (insert > 40)) { m->mothurOut("[ERROR]: insert must be between 0 and 40.\n"); abort=true; } @@ -318,6 +320,9 @@ MakeContigsCommand::MakeContigsCommand(string option) { temp = validParameter.validFile(parameters, "allfiles", false); if (temp == "not found") { temp = "F"; } allFiles = m->isTrue(temp); + + temp = validParameter.validFile(parameters, "trimoverlap", false); if (temp == "not found") { temp = "F"; } + trimOverlap = m->isTrue(temp); align = validParameter.validFile(parameters, "align", false); if (align == "not found"){ align = "needleman"; } if ((align != "needleman") && (align != "gotoh")) { m->mothurOut(align + " is not a valid alignment method. Options are needleman or gotoh. I will use needleman."); m->mothurOutEndLine(); align = "needleman"; } @@ -734,7 +739,7 @@ int MakeContigsCommand::createProcesses(vector< vector > files, string o } - contigsData* tempcontig = new contigsData(files[h], (outputFasta + extension), (outputScrapFasta + extension), (outputMisMatches + extension), align, m, match, misMatch, gapOpen, gapExtend, insert, deltaq, barcodes, primers, tempFASTAFileNames, barcodeNameVector, primerNameVector, pdiffs, bdiffs, tdiffs, createGroup, allFiles, h); + contigsData* tempcontig = new contigsData(files[h], (outputFasta + extension), (outputScrapFasta + extension), (outputMisMatches + extension), align, m, match, misMatch, gapOpen, gapExtend, insert, deltaq, barcodes, primers, tempFASTAFileNames, barcodeNameVector, primerNameVector, pdiffs, bdiffs, tdiffs, createGroup, allFiles, trimOverlap, h); pDataArray.push_back(tempcontig); hThreadArray[h] = CreateThread(NULL, 0, MyContigsThreadFunction, pDataArray[h], 0, &dwThreadIdArray[h]); @@ -919,6 +924,7 @@ int MakeContigsCommand::driver(vector files, string outputFasta, string // if (num < 5) { cout << fSeq.getStartPos() << '\t' << fSeq.getEndPos() << '\t' << rSeq.getStartPos() << '\t' << rSeq.getEndPos() << endl; } int overlapStart = fSeq.getStartPos(); int seq2Start = rSeq.getStartPos(); + //bigger of the 2 starting positions is the location of the overlapping start if (overlapStart < seq2Start) { //seq2 starts later so take from 0 to seq2Start from seq1 overlapStart = seq2Start; @@ -938,12 +944,12 @@ int MakeContigsCommand::driver(vector files, string outputFasta, string contig += seq1[i]; }else if (((seq1[i] == '.') || (seq1[i] == '-')) && ((seq2[i] != '-') && (seq2[i] != '.'))) { //seq1 is a gap and seq2 is a base, choose seq2, unless quality score for base is below insert. In that case eliminate base if (thisfqualfile != "") { - if (scores2[BBaseMap[i]] < insert) { } // + if (scores2[BBaseMap[i]] <= insert) { } // else { contig += seq2[i]; } }else { contig += seq2[i]; } //with no quality info, then we keep it? }else if (((seq2[i] == '.') || (seq2[i] == '-')) && ((seq1[i] != '-') && (seq1[i] != '.'))) { //seq2 is a gap and seq1 is a base, choose seq1, unless quality score for base is below insert. In that case eliminate base if (thisfqualfile != "") { - if (scores1[ABaseMap[i]] < insert) { } // + if (scores1[ABaseMap[i]] <= insert) { } // else { contig += seq1[i]; } }else { contig += seq1[i]; } //with no quality info, then we keep it? }else if (((seq1[i] != '-') && (seq1[i] != '.')) && ((seq2[i] != '-') && (seq2[i] != '.'))) { //both bases choose one with better quality @@ -968,6 +974,8 @@ int MakeContigsCommand::driver(vector files, string outputFasta, string for (int i = overlapEnd; i < length; i++) { contig += seq1[i]; } } + if (trimOverlap) { contig = contig.substr(overlapStart-1, oend-oStart); if (contig.length() == 0) { trashCode += "l"; } } + if(trashCode.length() == 0){ bool ignore = false;