X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=blobdiff_plain;f=trimseqscommand.cpp;h=951ce65d400bb36afc60df92e56e148c14ee271c;hp=935b9a9297e08d1f50e89d4b9a059cf9cbe8e54a;hb=b206f634aae1b4ce13978d203247fb64757d5482;hpb=e9845ee4c8db2e044e87d721cc2d94f8d609e03d diff --git a/trimseqscommand.cpp b/trimseqscommand.cpp index 935b9a9..951ce65 100644 --- a/trimseqscommand.cpp +++ b/trimseqscommand.cpp @@ -24,7 +24,7 @@ vector TrimSeqsCommand::setParameters(){ CommandParameter preorient("checkorient", "Boolean", "", "F", "", "", "","",false,false,true); parameters.push_back(preorient); CommandParameter pmaxambig("maxambig", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(pmaxambig); CommandParameter pmaxhomop("maxhomop", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pmaxhomop); - CommandParameter pminlength("minlength", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pminlength); + CommandParameter pminlength("minlength", "Number", "", "1", "", "", "","",false,false); parameters.push_back(pminlength); CommandParameter pmaxlength("maxlength", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pmaxlength); CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(ppdiffs); CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(pbdiffs); @@ -34,6 +34,7 @@ vector TrimSeqsCommand::setParameters(){ CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors); CommandParameter pallfiles("allfiles", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pallfiles); CommandParameter pkeepforward("keepforward", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pkeepforward); + CommandParameter plogtransform("logtransform", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(plogtransform); CommandParameter pqtrim("qtrim", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pqtrim); CommandParameter pqthreshold("qthreshold", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pqthreshold); CommandParameter pqaverage("qaverage", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pqaverage); @@ -61,7 +62,7 @@ string TrimSeqsCommand::getHelpString(){ string helpString = ""; helpString += "The trim.seqs command reads a fastaFile and creates 2 new fasta files, .trim.fasta and scrap.fasta, as well as group files if you provide and oligos file.\n"; helpString += "The .trim.fasta contains sequences that meet your requirements, and the .scrap.fasta contains those which don't.\n"; - helpString += "The trim.seqs command parameters are fasta, name, count, flip, checkorient, oligos, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim, keepfirst, removelast and allfiles.\n"; + helpString += "The trim.seqs command parameters are fasta, name, count, flip, checkorient, oligos, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim, keepfirst, removelast, logtransform and allfiles.\n"; helpString += "The fasta parameter is required.\n"; helpString += "The flip parameter will output the reverse compliment of your trimmed sequence. The default is false.\n"; helpString += "The checkorient parameter will check the reverse compliment of the sequence if the barcodes and primers cannot be found in the forward. The default is false.\n"; @@ -84,6 +85,7 @@ string TrimSeqsCommand::getHelpString(){ helpString += "The qwindowaverage parameter allows you to set a minimum average quality score allowed over a window. \n"; helpString += "The rollaverage parameter allows you to set a minimum rolling average quality score allowed over a window. \n"; helpString += "The qstepsize parameter allows you to set a number of bases to move the window over. Default=1.\n"; + helpString += "The logtransform parameter allows you to indicate you want the averages for the qwindowaverage, rollaverage and qaverage to be calculated using a logtransform. Default=F.\n"; helpString += "The allfiles parameter will create separate group and fasta file for each grouping. The default is F.\n"; helpString += "The keepforward parameter allows you to indicate whether you want the forward primer removed or not. The default is F, meaning remove the forward primer.\n"; helpString += "The qtrim parameter will trim sequence from the point that they fall below the qthreshold and put it in the .trim file if set to true. The default is T.\n"; @@ -256,7 +258,7 @@ TrimSeqsCommand::TrimSeqsCommand(string option) { temp = validParameter.validFile(parameters, "maxhomop", false); if (temp == "not found") { temp = "0"; } m->mothurConvert(temp, maxHomoP); - temp = validParameter.validFile(parameters, "minlength", false); if (temp == "not found") { temp = "0"; } + temp = validParameter.validFile(parameters, "minlength", false); if (temp == "not found") { temp = "1"; } m->mothurConvert(temp, minLength); temp = validParameter.validFile(parameters, "maxlength", false); if (temp == "not found") { temp = "0"; } @@ -329,6 +331,9 @@ TrimSeqsCommand::TrimSeqsCommand(string option) { temp = validParameter.validFile(parameters, "keepforward", false); if (temp == "not found") { temp = "F"; } keepforward = m->isTrue(temp); + temp = validParameter.validFile(parameters, "logtransform", false); if (temp == "not found") { temp = "F"; } + logtransform = m->isTrue(temp); + temp = validParameter.validFile(parameters, "checkorient", false); if (temp == "not found") { temp = "F"; } reorient = m->isTrue(temp); @@ -422,7 +427,7 @@ int TrimSeqsCommand::execute(){ if (countfile != "") { CountTable ct; - ct.readTable(countfile); + ct.readTable(countfile, true, false); nameCount = ct.getNameMap(); outputNames.push_back(trimCountFile); outputNames.push_back(scrapCountFile); @@ -443,7 +448,7 @@ int TrimSeqsCommand::execute(){ outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName); } } - + if (m->control_pressed) { return 0; } //fills lines and qlines @@ -538,7 +543,7 @@ int TrimSeqsCommand::execute(){ if (countfile != "") { //create countfile with group info included CountTable* ct = new CountTable(); - ct->readTable(trimCountFile); + ct->readTable(trimCountFile, true, false); map justTrimmedNames = ct->getNameMap(); delete ct; @@ -693,7 +698,21 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string rpairedBarcodes[it->first] = tempPair; //cout << reverseOligo((it->second).reverse) << '\t' << (reverseOligo((it->second).forward)) << '\t' << barcodeNameVector[it->first] << endl; } - rtrimOligos = new TrimOligos(pdiffs, bdiffs, 0, 0, rpairedPrimers, rpairedBarcodes); numBarcodes = rpairedBarcodes.size(); + int index = rpairedBarcodes.size(); + for (map::iterator it = barcodes.begin(); it != barcodes.end(); it++) { + oligosPair tempPair("", reverseOligo((it->first))); //reverseBarcode, rc ForwardBarcode + rpairedBarcodes[index] = tempPair; index++; + //cout << reverseOligo((it->second).reverse) << '\t' << (reverseOligo((it->second).forward)) << '\t' << barcodeNameVector[it->first] << endl; + } + + index = rpairedPrimers.size(); + for (map::iterator it = primers.begin(); it != primers.end(); it++) { + oligosPair tempPair("", reverseOligo((it->first))); //reverseBarcode, rc ForwardBarcode + rpairedPrimers[index] = tempPair; index++; + //cout << reverseOligo((it->second).reverse) << '\t' << (reverseOligo((it->second).forward)) << '\t' << primerNameVector[it->first] << endl; + } + + rtrimOligos = new TrimOligos(pdiffs, bdiffs, 0, 0, rpairedPrimers, rpairedBarcodes); numBarcodes = rpairedBarcodes.size(); } while (moreSeqs) { @@ -713,7 +732,7 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string int currentSeqsDiffs = 0; Sequence currSeq(inFASTA); m->gobble(inFASTA); - //cout << currSeq.getName() << '\t' << currSeq.getUnaligned().length() << endl; + //cout << currSeq.getName() << '\t' << currSeq.getUnaligned() << endl; Sequence savedSeq(currSeq.getName(), currSeq.getAligned()); QualityScores currQual; QualityScores savedQual; @@ -738,10 +757,12 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string if(numBarcodes != 0){ success = trimOligos->stripBarcode(currSeq, currQual, barcodeIndex); - if(success > bdiffs) { trashCode += 'b'; } + if(success > bdiffs) { + trashCode += 'b'; + } else{ currentSeqsDiffs += success; } } - + //cout << currSeq.getName() << '\t' << currSeq.getUnaligned() << endl; if(numSpacers != 0){ success = trimOligos->stripSpacer(currSeq, currQual); if(success > sdiffs) { trashCode += 's'; } @@ -752,9 +773,7 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string if(numFPrimers != 0){ success = trimOligos->stripForward(currSeq, currQual, primerIndex, keepforward); if(success > pdiffs) { - //if (pairedOligos) { trashCode += trimOligos->getTrashCode(); } - //else { trashCode += 'f'; } - trashCode += 'f'; + trashCode += 'f'; } else{ currentSeqsDiffs += success; } } @@ -773,20 +792,16 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string int thisBarcodeIndex = 0; int thisPrimerIndex = 0; - + //cout << currSeq.getName() << '\t' << savedSeq.getUnaligned() << endl; if(numBarcodes != 0){ thisSuccess = rtrimOligos->stripBarcode(savedSeq, savedQual, thisBarcodeIndex); - if(thisSuccess > bdiffs) { thisTrashCode += 'b'; } + if(thisSuccess > bdiffs) { thisTrashCode += "b"; } else{ thisCurrentSeqsDiffs += thisSuccess; } } - + //cout << currSeq.getName() << '\t' << savedSeq.getUnaligned() << endl; if(numFPrimers != 0){ thisSuccess = rtrimOligos->stripForward(savedSeq, savedQual, thisPrimerIndex, keepforward); - if(thisSuccess > pdiffs) { - //if (pairedOligos) { thisTrashCode += rtrimOligos->getTrashCode(); } - //else { thisTrashCode += 'f'; } - thisTrashCode += 'f'; - } + if(thisSuccess > pdiffs) { thisTrashCode += "f"; } else{ thisCurrentSeqsDiffs += thisSuccess; } } @@ -804,7 +819,7 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string savedQual.flipQScores(); currQual.setScores(savedQual.getScores()); } - } + }else { trashCode += "(" + thisTrashCode + ")"; } } if(keepFirst != 0){ @@ -821,9 +836,9 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string int origLength = currSeq.getNumBases(); if(qThreshold != 0) { success = currQual.stripQualThreshold(currSeq, qThreshold); } - else if(qAverage != 0) { success = currQual.cullQualAverage(currSeq, qAverage); } - else if(qRollAverage != 0) { success = currQual.stripQualRollingAverage(currSeq, qRollAverage); } - else if(qWindowAverage != 0){ success = currQual.stripQualWindowAverage(currSeq, qWindowStep, qWindowSize, qWindowAverage); } + else if(qAverage != 0) { success = currQual.cullQualAverage(currSeq, qAverage, logtransform); } + else if(qRollAverage != 0) { success = currQual.stripQualRollingAverage(currSeq, qRollAverage, logtransform); } + else if(qWindowAverage != 0){ success = currQual.stripQualWindowAverage(currSeq, qWindowStep, qWindowSize, qWindowAverage, logtransform); } else { success = 1; } //you don't want to trim, if it fails above then scrap it @@ -1176,7 +1191,7 @@ int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName lines[h].start, lines[h].end, qLines[h].start, qLines[h].end, m, pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, primers, barcodes, revPrimer, linker, spacer, pairedBarcodes, pairedPrimers, pairedOligos, primerNameVector, barcodeNameVector, createGroup, allFiles, keepforward, keepFirst, removeLast, - qWindowStep, qWindowSize, qWindowAverage, qtrim, qThreshold, qAverage, qRollAverage, + qWindowStep, qWindowSize, qWindowAverage, qtrim, qThreshold, qAverage, qRollAverage, logtransform, minLength, maxAmbig, maxHomoP, maxLength, flip, reorient, nameMap, nameCount); pDataArray.push_back(tempTrim); @@ -1393,9 +1408,7 @@ int TrimSeqsCommand::setLines(string filename, string qfilename) { string sname = ""; nameStream >> sname; sname = sname.substr(1); - for (int i = 0; i < sname.length(); i++) { - if (sname[i] == ':') { sname[i] = '_'; m->changedSeqNames = true; } - } + m->checkName(sname); map::iterator it = firstSeqNames.find(sname); @@ -1535,7 +1548,7 @@ bool TrimSeqsCommand::getOligos(vector >& fastaFileNames, vector< // get rest of line in case there is a primer name while (!inOligos.eof()) { char c = inOligos.get(); - if (c == 10 || c == 13){ break; } + if (c == 10 || c == 13 || c == -1){ break; } else if (c == 32 || c == 9){;} //space or tab else { group += c; } } @@ -1653,7 +1666,7 @@ bool TrimSeqsCommand::getOligos(vector >& fastaFileNames, vector< } if(barcodeNameVector.size() == 0 && primerNameVector[0] == ""){ allFiles = 0; } - + //add in potential combos if(barcodeNameVector.size() == 0){ barcodes[""] = 0; @@ -1854,7 +1867,13 @@ bool TrimSeqsCommand::keepFirstTrim(Sequence& sequence, QualityScores& qscores){ if(qscores.getName() != ""){ qscores.trimQScores(-1, keepFirst); } + +// sequence.printSequence(cout);cout << endl; + sequence.trim(keepFirst); + +// sequence.printSequence(cout);cout << endl << endl;; + return success; } catch(exception& e) {