X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=prcseqscommand.cpp;h=6b73d440778dce57e39cfdb1753277d9f512a94b;hb=078d7227da8dda9ae8620822fa32d51ec2706fc3;hp=d950b48eea4ec778371a439c1df89272fec03573;hpb=a33a385cc5b7481488f92f794425f01fbf40a543;p=mothur.git diff --git a/prcseqscommand.cpp b/prcseqscommand.cpp index d950b48..6b73d44 100644 --- a/prcseqscommand.cpp +++ b/prcseqscommand.cpp @@ -20,9 +20,9 @@ vector PcrSeqsCommand::setParameters(){ CommandParameter pstart("start", "Number", "", "-1", "", "", "",false,false); parameters.push_back(pstart); CommandParameter pend("end", "Number", "", "-1", "", "", "",false,false); parameters.push_back(pend); CommandParameter pnomatch("nomatch", "Multiple", "reject-keep", "reject", "", "", "",false,false); parameters.push_back(pnomatch); - CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ppdiffs); CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors); CommandParameter pkeepprimer("keepprimer", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pkeepprimer); + CommandParameter pkeepdots("keepdots", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pkeepdots); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir); @@ -39,8 +39,15 @@ vector PcrSeqsCommand::setParameters(){ string PcrSeqsCommand::getHelpString(){ try { string helpString = ""; - helpString += "The pcr.seqs command reads a fasta file ...\n"; - + helpString += "The pcr.seqs command reads a fasta file.\n"; + helpString += "The pcr.seqs command parameters are fasta, oligos, name, group, taxonomy, ecoli, start, end, nomatch, processors, keepprimer and keepdots.\n"; + helpString += "The ecoli parameter is used to provide a fasta file containing a single reference sequence (e.g. for e. coli) this must be aligned. Mothur will trim to the start and end positions of the reference sequence.\n"; + helpString += "The start parameter allows you to provide a starting position to trim to.\n"; + helpString += "The end parameter allows you to provide a ending position to trim from.\n"; + helpString += "The nomatch parameter allows you to decide what to do with sequences where the primer is not found. Default=reject, meaning remove from fasta file. if nomatch=true, then do nothing to sequence.\n"; + helpString += "The processors parameter allows you to use multiple processors.\n"; + helpString += "The keepprimer parameter allows you to keep the primer, default=false.\n"; + helpString += "The keepdots parameter allows you to keep the leading and trailing .'s, default=true.\n"; helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n"; helpString += "For more details please check out the wiki http://www.mothur.org/wiki/Pcr.seqs .\n"; return helpString; @@ -51,7 +58,30 @@ string PcrSeqsCommand::getHelpString(){ } } - +//********************************************************************************************************************** +string PcrSeqsCommand::getOutputFileNameTag(string type, string inputName=""){ + try { + string outputFileName = ""; + map >::iterator it; + + //is this a type this command creates + it = outputTypes.find(type); + if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); } + else { + if (type == "fasta") { outputFileName = "pcr.fasta"; } + else if (type == "taxonomy") { outputFileName = "pcr" + m->getExtension(inputName); } + else if (type == "group") { outputFileName = "pcr" + m->getExtension(inputName); } + else if (type == "name") { outputFileName = "pcr" + m->getExtension(inputName); } + else if (type == "accnos") { outputFileName = "bad.accnos"; } + else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; } + } + return outputFileName; + } + catch(exception& e) { + m->errorOut(e, "PcrSeqsCommand", "getOutputFileNameTag"); + exit(1); + } +} //********************************************************************************************************************** PcrSeqsCommand::PcrSeqsCommand(){ @@ -158,8 +188,6 @@ PcrSeqsCommand::PcrSeqsCommand(string option) { } - //if the user changes the output directory command factory will send this info to us in the output parameter - outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; } //check for required parameters fastafile = validParameter.validFile(parameters, "fasta", true); @@ -170,13 +198,18 @@ PcrSeqsCommand::PcrSeqsCommand(string option) { }else if (fastafile == "not open") { fastafile = ""; abort = true; } else { m->setFastaFile(fastafile); } - + //if the user changes the output directory command factory will send this info to us in the output parameter + outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = m->hasPath(fastafile); } + //check for optional parameter and set defaults // ...at some point should added some additional type checking... string temp; temp = validParameter.validFile(parameters, "keepprimer", false); if (temp == "not found") { temp = "f"; } keepprimer = m->isTrue(temp); + temp = validParameter.validFile(parameters, "keepdots", false); if (temp == "not found") { temp = "t"; } + keepdots = m->isTrue(temp); + temp = validParameter.validFile(parameters, "oligos", true); if (temp == "not found"){ oligosfile = ""; } else if(temp == "not open"){ oligosfile = ""; abort = true; } @@ -200,10 +233,7 @@ PcrSeqsCommand::PcrSeqsCommand(string option) { if (taxfile == "not found"){ taxfile = ""; } else if(taxfile == "not open"){ taxfile = ""; abort = true; } else { m->setTaxonomyFile(taxfile); } - - temp = validParameter.validFile(parameters, "pdiffs", false); if (temp == "not found") { temp = "0"; } - m->mothurConvert(temp, pdiffs); - + temp = validParameter.validFile(parameters, "start", false); if (temp == "not found") { temp = "-1"; } m->mothurConvert(temp, start); @@ -258,11 +288,11 @@ int PcrSeqsCommand::execute(){ string thisOutputDir = outputDir; if (outputDir == "") { thisOutputDir += m->hasPath(fastafile); } - string trimSeqFile = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + "pcr.fasta"; + string trimSeqFile = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + getOutputFileNameTag("fasta"); outputNames.push_back(trimSeqFile); outputTypes["fasta"].push_back(trimSeqFile); - string badSeqFile = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + "pcr.scrap.fasta"; - outputNames.push_back(badSeqFile); outputTypes["fasta"].push_back(badSeqFile); + string badSeqFile = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + "scrap." + getOutputFileNameTag("fasta"); + length = 0; if(oligosfile != ""){ readOligos(); } if (m->control_pressed) { return 0; } @@ -297,7 +327,11 @@ int PcrSeqsCommand::execute(){ if (m->control_pressed) { return 0; } - writeAccnos(badNames); + //don't write or keep if blank + if (badNames.size() != 0) { writeAccnos(badNames); } + if (m->isBlank(badSeqFile)) { m->mothurRemove(badSeqFile); } + else { outputNames.push_back(badSeqFile); outputTypes["fasta"].push_back(badSeqFile); } + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } if (namefile != "") { readName(badNames); } if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } @@ -434,7 +468,7 @@ int PcrSeqsCommand::createProcesses(string filename, string goodFileName, string if (i!=0) {extension += toString(i) + ".temp"; processIDS.push_back(i); } // Allocate memory for thread data. - pcrData* tempPcr = new pcrData(filename, goodFileName+extension, badFileName+extension, m, oligosfile, ecolifile, primers, revPrimer, nomatch, keepprimer, start, end, length, lines[i].start, lines[i].end); + pcrData* tempPcr = new pcrData(filename, goodFileName+extension, badFileName+extension, m, oligosfile, ecolifile, primers, revPrimer, nomatch, keepprimer, keepdots, start, end, length, lines[i].start, lines[i].end); pDataArray.push_back(tempPcr); //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier @@ -516,8 +550,14 @@ int PcrSeqsCommand::driverPcr(string filename, string goodFasta, string badFasta else{ //are you aligned if (aligned) { - if (!keepprimer) { currSeq.filterToPos(mapAligned[primerEnd]); } - else { currSeq.filterToPos(mapAligned[primerStart]); } + if (!keepprimer) { + if (keepdots) { currSeq.filterToPos(mapAligned[primerEnd]); } + else { currSeq.setAligned(currSeq.getAligned().substr(mapAligned[primerEnd])); } + } + else { + if (keepdots) { currSeq.filterToPos(mapAligned[primerStart]); } + else { currSeq.setAligned(currSeq.getAligned().substr(mapAligned[primerStart])); } + } }else { if (!keepprimer) { currSeq.setAligned(currSeq.getUnaligned().substr(primerEnd)); } else { currSeq.setAligned(currSeq.getUnaligned().substr(primerStart)); } @@ -533,8 +573,14 @@ int PcrSeqsCommand::driverPcr(string filename, string goodFasta, string badFasta else{ //are you aligned if (aligned) { - if (!keepprimer) { currSeq.filterFromPos(mapAligned[primerStart]); } - else { currSeq.filterFromPos(mapAligned[primerEnd]); } + if (!keepprimer) { + if (keepdots) { currSeq.filterFromPos(mapAligned[primerStart]); } + else { currSeq.setAligned(currSeq.getAligned().substr(0, mapAligned[primerStart])); } + } + else { + if (keepdots) { currSeq.filterFromPos(mapAligned[primerEnd]); } + else { currSeq.setAligned(currSeq.getAligned().substr(0, mapAligned[primerEnd])); } + } } else { if (!keepprimer) { currSeq.setAligned(currSeq.getUnaligned().substr(0, primerStart)); } @@ -549,24 +595,43 @@ int PcrSeqsCommand::driverPcr(string filename, string goodFasta, string badFasta else if (currSeq.getAligned().length() != length) { m->mothurOut("[ERROR]: seqs are not the same length as ecoli seq. When using ecoli option your sequences must be aligned and the same length as the ecoli sequence.\n"); m->control_pressed = true; break; }else { - currSeq.filterToPos(start); - currSeq.filterFromPos(end); + if (keepdots) { + currSeq.filterToPos(start); + currSeq.filterFromPos(end); + }else { + string seqString = currSeq.getAligned().substr(0, end); + seqString = seqString.substr(start); + currSeq.setAligned(seqString); + } } }else{ //using start and end to trim //make sure the seqs are aligned lengths.insert(currSeq.getAligned().length()); if (lengths.size() > 1) { m->mothurOut("[ERROR]: seqs are not aligned. When using start and end your sequences must be aligned.\n"); m->control_pressed = true; break; } else { - if (start != -1) { currSeq.filterToPos(start); } if (end != -1) { if (end > currSeq.getAligned().length()) { m->mothurOut("[ERROR]: end is longer than your sequence length, aborting.\n"); m->control_pressed = true; break; } else { - currSeq.filterFromPos(end); + if (keepdots) { currSeq.filterFromPos(end); } + else { + string seqString = currSeq.getAligned().substr(0, end); + currSeq.setAligned(seqString); + } + } + } + if (start != -1) { + if (keepdots) { currSeq.filterToPos(start); } + else { + string seqString = currSeq.getAligned().substr(start); + currSeq.setAligned(seqString); } } } } - + + //trimming removed all bases + if (currSeq.getUnaligned() == "") { goodSeq = false; } + if(goodSeq == 1) { currSeq.printSequence(goodFile); } else { badSeqNames.insert(currSeq.getName()); @@ -815,7 +880,7 @@ int PcrSeqsCommand::writeAccnos(set badNames){ try { string thisOutputDir = outputDir; if (outputDir == "") { thisOutputDir += m->hasPath(fastafile); } - string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + "bad.accnos"; + string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + getOutputFileNameTag("accnos"); outputNames.push_back(outputFileName); outputTypes["accnos"].push_back(outputFileName); ofstream out; @@ -877,7 +942,7 @@ int PcrSeqsCommand::readName(set& names){ try { string thisOutputDir = outputDir; if (outputDir == "") { thisOutputDir += m->hasPath(namefile); } - string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(namefile)) + "pcr" + m->getExtension(namefile); + string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(namefile)) + getOutputFileNameTag("name", namefile); ofstream out; m->openOutputFile(outputFileName, out); @@ -935,7 +1000,7 @@ int PcrSeqsCommand::readGroup(set names){ try { string thisOutputDir = outputDir; if (outputDir == "") { thisOutputDir += m->hasPath(groupfile); } - string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(groupfile)) + "pcr" + m->getExtension(groupfile); + string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(groupfile)) + getOutputFileNameTag("group", groupfile); ofstream out; m->openOutputFile(outputFileName, out); @@ -982,7 +1047,7 @@ int PcrSeqsCommand::readTax(set names){ try { string thisOutputDir = outputDir; if (outputDir == "") { thisOutputDir += m->hasPath(taxfile); } - string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(taxfile)) + "pcr" + m->getExtension(taxfile); + string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(taxfile)) + getOutputFileNameTag("taxonomy", taxfile); ofstream out; m->openOutputFile(outputFileName, out);