X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=trimseqscommand.cpp;h=70b1b4ae8a0c931ff3f84a56d4dd68fa1edd537a;hb=9ada98592a54c82d08f3d46c9b1d8c3e472a922d;hp=455cdc29aa47ecb1ca26a7f9b4021bef151d6314;hpb=1244c4907c07baea86b0f0676d098a29d2e95a39;p=mothur.git diff --git a/trimseqscommand.cpp b/trimseqscommand.cpp index 455cdc2..70b1b4a 100644 --- a/trimseqscommand.cpp +++ b/trimseqscommand.cpp @@ -21,7 +21,8 @@ TrimSeqsCommand::TrimSeqsCommand(string option){ else { //valid paramters for this command - string AlignArray[] = {"fasta", "flip", "oligos", "maxambig", "maxhomop", "minlength", "maxlength", "qfile", "qthreshold", "qaverage", "allfiles"}; + string AlignArray[] = {"fasta", "flip", "oligos", "maxambig", "maxhomop", "minlength", "maxlength", "qfile", + "qthreshold", "qaverage", "allfiles", "qtrim", "outputdir","inputdir"}; vector myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string))); @@ -29,17 +30,54 @@ TrimSeqsCommand::TrimSeqsCommand(string option){ map parameters = parser.getParameters(); ValidParameters validParameter; + map::iterator it; //check to make sure all parameters are valid for command - for (map::iterator it = parameters.begin(); it != parameters.end(); it++) { + for (it = parameters.begin(); it != parameters.end(); it++) { if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; } } + //if the user changes the input directory command factory will send this info to us in the output parameter + string inputDir = validParameter.validFile(parameters, "inputdir", false); + if (inputDir == "not found"){ inputDir = ""; } + else { + string path; + it = parameters.find("fasta"); + //user has given a template file + if(it != parameters.end()){ + path = hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["fasta"] = inputDir + it->second; } + } + + it = parameters.find("oligos"); + //user has given a template file + if(it != parameters.end()){ + path = hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["oligos"] = inputDir + it->second; } + } + + it = parameters.find("qfile"); + //user has given a template file + if(it != parameters.end()){ + path = hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["qfile"] = inputDir + it->second; } + } + } + + //check for required parameters fastaFile = validParameter.validFile(parameters, "fasta", true); if (fastaFile == "not found") { mothurOut("fasta is a required parameter for the screen.seqs command."); mothurOutEndLine(); abort = true; } else if (fastaFile == "not open") { abort = true; } - + + //if the user changes the output directory command factory will send this info to us in the output parameter + outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ + outputDir = ""; + outputDir += hasPath(fastaFile); //if user entered a file with a path then preserve it + } //check for optional parameter and set defaults // ...at some point should added some additional type checking... @@ -67,11 +105,14 @@ TrimSeqsCommand::TrimSeqsCommand(string option){ temp = validParameter.validFile(parameters, "qfile", true); if (temp == "not found") { qFileName = ""; } - else if(temp == "not open") { abort = 0; } + else if(temp == "not open") { abort = true; } else { qFileName = temp; } temp = validParameter.validFile(parameters, "qthreshold", false); if (temp == "not found") { temp = "0"; } convert(temp, qThreshold); + + temp = validParameter.validFile(parameters, "qtrim", false); if (temp == "not found") { temp = "F"; } + qtrim = isTrue(temp); temp = validParameter.validFile(parameters, "qaverage", false); if (temp == "not found") { temp = "0"; } convert(temp, qAverage); @@ -104,7 +145,7 @@ TrimSeqsCommand::TrimSeqsCommand(string option){ void TrimSeqsCommand::help(){ try { mothurOut("The trim.seqs command reads a fastaFile and creates .....\n"); - mothurOut("The trim.seqs command parameters are fasta, flip, oligos, maxambig, maxhomop, minlength and maxlength.\n"); + mothurOut("The trim.seqs command parameters are fasta, flip, oligos, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, qtrim and allfiles.\n"); mothurOut("The fasta parameter is required.\n"); mothurOut("The flip parameter .... The default is 0.\n"); mothurOut("The oligos parameter .... The default is "".\n"); @@ -112,11 +153,17 @@ void TrimSeqsCommand::help(){ mothurOut("The maxhomop parameter .... The default is 0.\n"); mothurOut("The minlength parameter .... The default is 0.\n"); mothurOut("The maxlength parameter .... The default is 0.\n"); + mothurOut("The qfile parameter .....\n"); + mothurOut("The qthreshold parameter .... The default is 0.\n"); + mothurOut("The qaverage parameter .... The default is 0.\n"); + mothurOut("The allfiles parameter .... The default is F.\n"); + mothurOut("The qtrim parameter .... The default is F.\n"); mothurOut("The trim.seqs command should be in the following format: \n"); mothurOut("trim.seqs(fasta=yourFastaFile, flip=yourFlip, oligos=yourOligos, maxambig=yourMaxambig, \n"); mothurOut("maxhomop=yourMaxhomop, minlength=youMinlength, maxlength=yourMaxlength) \n"); mothurOut("Example trim.seqs(fasta=abrecovery.fasta, flip=..., oligos=..., maxambig=..., maxhomop=..., minlength=..., maxlength=...).\n"); - mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n\n"); + mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n"); + mothurOut("For more details please check out the wiki http://www.mothur.org/wiki/Trim.seqs .\n\n"); } catch(exception& e) { @@ -136,86 +183,97 @@ int TrimSeqsCommand::execute(){ try{ if (abort == true) { return 0; } - + + numFPrimers = 0; //this needs to be initialized + numRPrimers = 0; + ifstream inFASTA; openInputFile(fastaFile, inFASTA); ofstream outFASTA; - string trimSeqFile = getRootName(fastaFile) + "trim.fasta"; + string trimSeqFile = outputDir + getRootName(getSimpleName(fastaFile)) + "trim.fasta"; openOutputFile(trimSeqFile, outFASTA); ofstream outGroups; vector fastaFileNames; if(oligoFile != ""){ - string groupFile = getRootName(fastaFile) + "groups"; + string groupFile = outputDir + getRootName(getSimpleName(fastaFile)) + "groups"; openOutputFile(groupFile, outGroups); getOligos(fastaFileNames); } ofstream scrapFASTA; - string scrapSeqFile = getRootName(fastaFile) + "scrap.fasta"; + string scrapSeqFile = outputDir + getRootName(getSimpleName(fastaFile)) + "scrap.fasta"; openOutputFile(scrapSeqFile, scrapFASTA); ifstream qFile; if(qFileName != "") { openInputFile(qFileName, qFile); } bool success; - + while(!inFASTA.eof()){ Sequence currSeq(inFASTA); + string origSeq = currSeq.getUnaligned(); - int group; - string trashCode = ""; - - if(qFileName != ""){ - if(qThreshold != 0) { success = stripQualThreshold(currSeq, qFile); } - else if(qAverage != 0) { success = cullQualAverage(currSeq, qFile); } - if(!success) { trashCode += 'q'; } - } - if(barcodes.size() != 0){ - - success = stripBarcode(currSeq, group); - if(!success){ trashCode += 'b'; } - } - if(numFPrimers != 0){ - success = stripForward(currSeq); - if(!success){ trashCode += 'f'; } - } - if(numRPrimers != 0){ - success = stripReverse(currSeq); - if(!success){ trashCode += 'r'; } - } - if(minLength > 0 || maxLength > 0){ - success = cullLength(currSeq); - if ((currSeq.getUnaligned().length() > 300) && (success)) { cout << "too long " << currSeq.getUnaligned().length() << endl; } - if(!success){ trashCode += 'l'; } - } - if(maxHomoP > 0){ - success = cullHomoP(currSeq); - if(!success){ trashCode += 'h'; } - } - if(maxAmbig != -1){ - success = cullAmbigs(currSeq); - if(!success){ trashCode += 'n'; } - } - - if(flip){ currSeq.reverseComplement(); } // should go last + if (origSeq != "") { + int group; + string trashCode = ""; + + if(qFileName != ""){ + if(qThreshold != 0) { success = stripQualThreshold(currSeq, qFile); } + else if(qAverage != 0) { success = cullQualAverage(currSeq, qFile); } + if ((!qtrim) && (origSeq.length() != currSeq.getUnaligned().length())) { + success = 0; //if you don't want to trim and the sequence does not meet quality requirements, move to scrap + } + if(!success) { trashCode += 'q'; } + } - if(trashCode.length() == 0){ - currSeq.setAligned(currSeq.getUnaligned()); //this is because of a modification we made to the sequence class to fix a bug. all seqs have an aligned version, which is the version that gets printed. - currSeq.printSequence(outFASTA); if(barcodes.size() != 0){ - outGroups << currSeq.getName() << '\t' << groupVector[group] << endl; + success = stripBarcode(currSeq, group); + if(!success){ trashCode += 'b'; } + } + + if(numFPrimers != 0){ + success = stripForward(currSeq); + if(!success){ trashCode += 'f'; } + } - if(allFiles){ - currSeq.printSequence(*fastaFileNames[group]); + if(numRPrimers != 0){ + success = stripReverse(currSeq); + if(!success){ trashCode += 'r'; } + } + + if(minLength > 0 || maxLength > 0){ + success = cullLength(currSeq); + if(!success){ trashCode += 'l'; } + } + if(maxHomoP > 0){ + success = cullHomoP(currSeq); + if(!success){ trashCode += 'h'; } + } + if(maxAmbig != -1){ + success = cullAmbigs(currSeq); + if(!success){ trashCode += 'n'; } + } + + if(flip){ currSeq.reverseComplement(); } // should go last + + if(trashCode.length() == 0){ + currSeq.setAligned(currSeq.getUnaligned()); //this is because of a modification we made to the sequence class to fix a bug. all seqs have an aligned version, which is the version that gets printed. + currSeq.printSequence(outFASTA); + if(barcodes.size() != 0){ + outGroups << currSeq.getName() << '\t' << groupVector[group] << endl; + + if(allFiles){ + currSeq.printSequence(*fastaFileNames[group]); + } } } - } - else{ - currSeq.setName(currSeq.getName() + '|' + trashCode); - currSeq.setUnaligned(origSeq); - currSeq.printSequence(scrapFASTA); + else{ + currSeq.setName(currSeq.getName() + '|' + trashCode); + currSeq.setUnaligned(origSeq); + currSeq.printSequence(scrapFASTA); + } } gobble(inFASTA); } @@ -234,7 +292,7 @@ int TrimSeqsCommand::execute(){ string seqName; openInputFile(getRootName(fastaFile) + groupVector[i] + ".fasta", inFASTA); ofstream outGroups; - openOutputFile(getRootName(fastaFile) + groupVector[i] + ".groups", outGroups); + openOutputFile(outputDir + getRootName(getSimpleName(fastaFile)) + groupVector[i] + ".groups", outGroups); while(!inFASTA.eof()){ if(inFASTA.get() == '>'){ @@ -246,8 +304,7 @@ int TrimSeqsCommand::execute(){ outGroups.close(); inFASTA.close(); } - - + return 0; } catch(exception& e) { @@ -286,7 +343,9 @@ void TrimSeqsCommand::getOligos(vector& outFASTAVec){ forPrimer.push_back(oligo); } else if(type == "reverse"){ - revPrimer.push_back(oligo); + Sequence oligoRC("reverse", oligo); + oligoRC.reverseComplement(); + revPrimer.push_back(oligoRC.getUnaligned()); } else if(type == "barcode"){ inOligos >> group; @@ -294,7 +353,7 @@ void TrimSeqsCommand::getOligos(vector& outFASTAVec){ groupVector.push_back(group); if(allFiles){ - outFASTAVec.push_back(new ofstream((getRootName(fastaFile) + group + ".fasta").c_str(), ios::ate)); + outFASTAVec.push_back(new ofstream((outputDir + getRootName(getSimpleName(fastaFile)) + group + ".fasta").c_str(), ios::ate)); } } } @@ -391,7 +450,7 @@ bool TrimSeqsCommand::stripReverse(Sequence& seq){ } if(compareDNASeq(oligo, rawSequence.substr(rawSequence.length()-oligo.length(),oligo.length()))){ - seq.setUnaligned(rawSequence.substr(rawSequence.length()-oligo.length())); + seq.setUnaligned(rawSequence.substr(0,rawSequence.length()-oligo.length())); success = 1; break; }