else {
//valid paramters for this command
- string AlignArray[] = {"fasta", "flip", "oligos", "maxambig", "maxhomop", "minlength", "maxlength", "qfile", "qthreshold", "qaverage", "allfiles"};
+ string AlignArray[] = {"fasta", "flip", "oligos", "maxambig", "maxhomop", "minlength", "maxlength", "qfile", "qthreshold", "qaverage", "allfiles", "qtrim"};
vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
temp = validParameter.validFile(parameters, "qthreshold", false); if (temp == "not found") { temp = "0"; }
convert(temp, qThreshold);
+
+ temp = validParameter.validFile(parameters, "qtrim", false); if (temp == "not found") { temp = "F"; }
+ qtrim = isTrue(temp);
temp = validParameter.validFile(parameters, "qaverage", false); if (temp == "not found") { temp = "0"; }
convert(temp, qAverage);
void TrimSeqsCommand::help(){
try {
mothurOut("The trim.seqs command reads a fastaFile and creates .....\n");
- mothurOut("The trim.seqs command parameters are fasta, flip, oligos, maxambig, maxhomop, minlength and maxlength.\n");
+ mothurOut("The trim.seqs command parameters are fasta, flip, oligos, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, qtrim and allfiles.\n");
mothurOut("The fasta parameter is required.\n");
mothurOut("The flip parameter .... The default is 0.\n");
mothurOut("The oligos parameter .... The default is "".\n");
mothurOut("The maxhomop parameter .... The default is 0.\n");
mothurOut("The minlength parameter .... The default is 0.\n");
mothurOut("The maxlength parameter .... The default is 0.\n");
+ mothurOut("The qfile parameter .....\n");
+ mothurOut("The qthreshold parameter .... The default is 0.\n");
+ mothurOut("The qaverage parameter .... The default is 0.\n");
+ mothurOut("The allfiles parameter .... The default is F.\n");
+ mothurOut("The qtrim parameter .... The default is F.\n");
mothurOut("The trim.seqs command should be in the following format: \n");
mothurOut("trim.seqs(fasta=yourFastaFile, flip=yourFlip, oligos=yourOligos, maxambig=yourMaxambig, \n");
mothurOut("maxhomop=yourMaxhomop, minlength=youMinlength, maxlength=yourMaxlength) \n");
mothurOut("Example trim.seqs(fasta=abrecovery.fasta, flip=..., oligos=..., maxambig=..., maxhomop=..., minlength=..., maxlength=...).\n");
- mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n\n");
+ mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n");
+ mothurOut("For more details please check out the wiki http://www.mothur.org/wiki/Trim.seqs .\n\n");
}
catch(exception& e) {
if(qFileName != "") { openInputFile(qFileName, qFile); }
bool success;
-
+
while(!inFASTA.eof()){
Sequence currSeq(inFASTA);
string origSeq = currSeq.getUnaligned();
- int group;
- string trashCode = "";
-
- if(qFileName != ""){
- if(qThreshold != 0) { success = stripQualThreshold(currSeq, qFile); }
- else if(qAverage != 0) { success = cullQualAverage(currSeq, qFile); }
- if(!success) { trashCode += 'q'; }
- qFile.close();
- }
- if(barcodes.size() != 0){
- success = stripBarcode(currSeq, group);
- if(!success){ trashCode += 'b'; }
- }
- if(numFPrimers != 0){
- success = stripForward(currSeq);
- if(!success){ trashCode += 'f'; }
- }
- if(numRPrimers != 0){
- success = stripReverse(currSeq);
- if(!success){ trashCode += 'r'; }
- }
- if(minLength > 0 || maxLength > 0){
- success = cullLength(currSeq);
- if(!success){ trashCode += 'l'; }
- }
- if(maxHomoP > 0){
- success = cullHomoP(currSeq);
- if(!success){ trashCode += 'h'; }
- }
- if(maxAmbig != -1){
- success = cullAmbigs(currSeq);
- if(!success){ trashCode += 'n'; }
- }
-
- if(flip){ currSeq.reverseComplement(); } // should go last
-
- if(trashCode.length() == 0){
- currSeq.printSequence(outFASTA);
+ if (origSeq != "") {
+ int group;
+ string trashCode = "";
+
+ if(qFileName != ""){
+ if(qThreshold != 0) { success = stripQualThreshold(currSeq, qFile); }
+ else if(qAverage != 0) { success = cullQualAverage(currSeq, qFile); }
+ if ((!qtrim) && (origSeq.length() != currSeq.getUnaligned().length())) {
+ success = 0; //if you don't want to trim and the sequence does not meet quality requirements, move to scrap
+ }
+ if(!success) { trashCode += 'q'; }
+ }
if(barcodes.size() != 0){
- outGroups << currSeq.getName() << '\t' << groupVector[group] << endl;
- if(allFiles){
- currSeq.printSequence(*fastaFileNames[group]);
+ success = stripBarcode(currSeq, group);
+ if(!success){ trashCode += 'b'; }
+ }
+ if(numFPrimers != 0){
+ success = stripForward(currSeq);
+ if(!success){ trashCode += 'f'; }
+ }
+ if(numRPrimers != 0){
+ success = stripReverse(currSeq);
+ if(!success){ trashCode += 'r'; }
+ }
+ if(minLength > 0 || maxLength > 0){
+ success = cullLength(currSeq);
+ if(!success){ trashCode += 'l'; }
+ }
+ if(maxHomoP > 0){
+ success = cullHomoP(currSeq);
+ if(!success){ trashCode += 'h'; }
+ }
+ if(maxAmbig != -1){
+ success = cullAmbigs(currSeq);
+ if(!success){ trashCode += 'n'; }
+ }
+
+ if(flip){ currSeq.reverseComplement(); } // should go last
+
+ if(trashCode.length() == 0){
+ currSeq.setAligned(currSeq.getUnaligned()); //this is because of a modification we made to the sequence class to fix a bug. all seqs have an aligned version, which is the version that gets printed.
+ currSeq.printSequence(outFASTA);
+ if(barcodes.size() != 0){
+ outGroups << currSeq.getName() << '\t' << groupVector[group] << endl;
+
+ if(allFiles){
+ currSeq.printSequence(*fastaFileNames[group]);
+ }
}
}
- }
- else{
- currSeq.setName(currSeq.getName() + '|' + trashCode);
- currSeq.setUnaligned(origSeq);
- currSeq.printSequence(scrapFASTA);
+ else{
+ currSeq.setName(currSeq.getName() + '|' + trashCode);
+ currSeq.setUnaligned(origSeq);
+ currSeq.printSequence(scrapFASTA);
+ }
}
gobble(inFASTA);
}
outFASTA.close();
scrapFASTA.close();
outGroups.close();
+ if(qFileName != "") { qFile.close(); }
for(int i=0;i<fastaFileNames.size();i++){
fastaFileNames[i]->close();
string name;
qFile >> name;
- if (name.length() != 0) { if(name.substr(1) != seq.getName()) { mothurOut("sequence name mismatch btwn fasta and qual file"); mothurOutEndLine(); } }
+ if (name[0] == '>') { if(name.substr(1) != seq.getName()) { mothurOut("sequence name mismatch btwn fasta: " + seq.getName() + " and qual file: " + name); mothurOutEndLine(); } }
+
while (!qFile.eof()) { char c = qFile.get(); if (c == 10 || c == 13){ break; } }
float score;
average += score;
}
average /= seqLength;
-
+
if(average >= qAverage) { success = 1; }
else { success = 0; }
}
//***************************************************************************************************************
-
-