X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=trimseqscommand.cpp;h=5ca70a99cb4b600780b425ff7f29b909794cc362;hb=ee1d3ba3f314f05ea50c07c881fa41ce257da77a;hp=937b130deb2c4b2dd6d3e72d191c090b34ee51e5;hpb=6e63c5ff52bd2830b689417df8ba3db831e63a96;p=mothur.git diff --git a/trimseqscommand.cpp b/trimseqscommand.cpp index 937b130..5ca70a9 100644 --- a/trimseqscommand.cpp +++ b/trimseqscommand.cpp @@ -21,7 +21,7 @@ TrimSeqsCommand::TrimSeqsCommand(string option){ else { //valid paramters for this command - string AlignArray[] = {"fasta", "flip", "oligos", "maxambig", "maxhomop", "minlength", "maxlength", "qfile", "qthreshold", "qaverage", "allfiles"}; + string AlignArray[] = {"fasta", "flip", "oligos", "maxambig", "maxhomop", "minlength", "maxlength", "qfile", "qthreshold", "qaverage", "allfiles", "qtrim"}; vector myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string))); @@ -72,6 +72,9 @@ TrimSeqsCommand::TrimSeqsCommand(string option){ temp = validParameter.validFile(parameters, "qthreshold", false); if (temp == "not found") { temp = "0"; } convert(temp, qThreshold); + + temp = validParameter.validFile(parameters, "qtrim", false); if (temp == "not found") { temp = "F"; } + qtrim = isTrue(temp); temp = validParameter.validFile(parameters, "qaverage", false); if (temp == "not found") { temp = "0"; } convert(temp, qAverage); @@ -104,7 +107,7 @@ TrimSeqsCommand::TrimSeqsCommand(string option){ void TrimSeqsCommand::help(){ try { mothurOut("The trim.seqs command reads a fastaFile and creates .....\n"); - mothurOut("The trim.seqs command parameters are fasta, flip, oligos, maxambig, maxhomop, minlength and maxlength.\n"); + mothurOut("The trim.seqs command parameters are fasta, flip, oligos, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, qtrim and allfiles.\n"); mothurOut("The fasta parameter is required.\n"); mothurOut("The flip parameter .... The default is 0.\n"); mothurOut("The oligos parameter .... The default is "".\n"); @@ -112,11 +115,17 @@ void TrimSeqsCommand::help(){ mothurOut("The maxhomop parameter .... The default is 0.\n"); mothurOut("The minlength parameter .... The default is 0.\n"); mothurOut("The maxlength parameter .... The default is 0.\n"); + mothurOut("The qfile parameter .....\n"); + mothurOut("The qthreshold parameter .... The default is 0.\n"); + mothurOut("The qaverage parameter .... The default is 0.\n"); + mothurOut("The allfiles parameter .... The default is F.\n"); + mothurOut("The qtrim parameter .... The default is F.\n"); mothurOut("The trim.seqs command should be in the following format: \n"); mothurOut("trim.seqs(fasta=yourFastaFile, flip=yourFlip, oligos=yourOligos, maxambig=yourMaxambig, \n"); mothurOut("maxhomop=yourMaxhomop, minlength=youMinlength, maxlength=yourMaxlength) \n"); mothurOut("Example trim.seqs(fasta=abrecovery.fasta, flip=..., oligos=..., maxambig=..., maxhomop=..., minlength=..., maxlength=...).\n"); - mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n\n"); + mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n"); + mothurOut("For more details please check out the wiki http://www.mothur.org/wiki/Trim.seqs .\n\n"); } catch(exception& e) { @@ -160,7 +169,7 @@ int TrimSeqsCommand::execute(){ if(qFileName != "") { openInputFile(qFileName, qFile); } bool success; - + while(!inFASTA.eof()){ Sequence currSeq(inFASTA); string origSeq = currSeq.getUnaligned(); @@ -170,10 +179,13 @@ int TrimSeqsCommand::execute(){ if(qFileName != ""){ if(qThreshold != 0) { success = stripQualThreshold(currSeq, qFile); } else if(qAverage != 0) { success = cullQualAverage(currSeq, qFile); } + if ((!qtrim) && (origSeq.length() != currSeq.getUnaligned().length())) { + success = 0; //if you don't want to trim and the sequence does not meet quality requirements, move to scrap + } if(!success) { trashCode += 'q'; } - qFile.close(); } if(barcodes.size() != 0){ + success = stripBarcode(currSeq, group); if(!success){ trashCode += 'b'; } } @@ -187,7 +199,8 @@ int TrimSeqsCommand::execute(){ } if(minLength > 0 || maxLength > 0){ success = cullLength(currSeq); - if(!success){ trashCode += 'l'; } + if ((currSeq.getUnaligned().length() > 300) && (success)) { cout << "too long " << currSeq.getUnaligned().length() << endl; } + if(!success){ trashCode += 'l'; } } if(maxHomoP > 0){ success = cullHomoP(currSeq); @@ -201,6 +214,7 @@ int TrimSeqsCommand::execute(){ if(flip){ currSeq.reverseComplement(); } // should go last if(trashCode.length() == 0){ + currSeq.setAligned(currSeq.getUnaligned()); //this is because of a modification we made to the sequence class to fix a bug. all seqs have an aligned version, which is the version that gets printed. currSeq.printSequence(outFASTA); if(barcodes.size() != 0){ outGroups << currSeq.getName() << '\t' << groupVector[group] << endl; @@ -221,6 +235,7 @@ int TrimSeqsCommand::execute(){ outFASTA.close(); scrapFASTA.close(); outGroups.close(); + if(qFileName != "") { qFile.close(); } for(int i=0;iclose(); @@ -548,7 +563,8 @@ bool TrimSeqsCommand::cullQualAverage(Sequence& seq, ifstream& qFile){ string name; qFile >> name; - if (name.length() != 0) { if(name.substr(1) != seq.getName()) { mothurOut("sequence name mismatch btwn fasta and qual file"); mothurOutEndLine(); } } + if (name[0] == '>') { if(name.substr(1) != seq.getName()) { mothurOut("sequence name mismatch btwn fasta: " + seq.getName() + " and qual file: " + name); mothurOutEndLine(); } } + while (!qFile.eof()) { char c = qFile.get(); if (c == 10 || c == 13){ break; } } float score; @@ -559,7 +575,7 @@ bool TrimSeqsCommand::cullQualAverage(Sequence& seq, ifstream& qFile){ average += score; } average /= seqLength; - + if(average >= qAverage) { success = 1; } else { success = 0; } @@ -572,5 +588,3 @@ bool TrimSeqsCommand::cullQualAverage(Sequence& seq, ifstream& qFile){ } //*************************************************************************************************************** - -