X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=trimflowscommand.cpp;h=cc49755d2ced89f31aa60258de2abc18838e9967;hb=1a5c2356c1b955c6ec024b2baf9f46377ee7c72e;hp=c2ea5346125d31ed6779b1b7f20c9280a79678c1;hpb=d0051dc9939d3477bd92b42c86bcd3eda743b955;p=mothur.git diff --git a/trimflowscommand.cpp b/trimflowscommand.cpp index c2ea534..cc49755 100644 --- a/trimflowscommand.cpp +++ b/trimflowscommand.cpp @@ -14,22 +14,24 @@ //********************************************************************************************************************** vector TrimFlowsCommand::setParameters(){ try { - CommandParameter pflow("flow", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pflow); - CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(poligos); - CommandParameter pmaxhomop("maxhomop", "Number", "", "9", "", "", "",false,false); parameters.push_back(pmaxhomop); - CommandParameter pmaxflows("maxflows", "Number", "", "450", "", "", "",false,false); parameters.push_back(pmaxflows); - CommandParameter pminflows("minflows", "Number", "", "450", "", "", "",false,false); parameters.push_back(pminflows); - CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ppdiffs); - CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pbdiffs); - CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ptdiffs); - CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors); - CommandParameter psignal("signal", "Number", "", "0.50", "", "", "",false,false); parameters.push_back(psignal); - CommandParameter pnoise("noise", "Number", "", "0.70", "", "", "",false,false); parameters.push_back(pnoise); - CommandParameter pallfiles("allfiles", "Boolean", "", "t", "", "", "",false,false); parameters.push_back(pallfiles); - CommandParameter porder("order", "String", "", "", "", "", "",false,false); parameters.push_back(porder); - CommandParameter pfasta("fasta", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pfasta); - CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir); - CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir); + CommandParameter pflow("flow", "InputTypes", "", "", "none", "none", "none","flow",false,true,true); parameters.push_back(pflow); + CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none","",false,false,true); parameters.push_back(poligos); + CommandParameter pmaxhomop("maxhomop", "Number", "", "9", "", "", "","",false,false); parameters.push_back(pmaxhomop); + CommandParameter pmaxflows("maxflows", "Number", "", "450", "", "", "","",false,false); parameters.push_back(pmaxflows); + CommandParameter pminflows("minflows", "Number", "", "450", "", "", "","",false,false); parameters.push_back(pminflows); + CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(ppdiffs); + CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(pbdiffs); + CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pldiffs); + CommandParameter psdiffs("sdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(psdiffs); + CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ptdiffs); + CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors); + CommandParameter psignal("signal", "Number", "", "0.50", "", "", "","",false,false); parameters.push_back(psignal); + CommandParameter pnoise("noise", "Number", "", "0.70", "", "", "","",false,false); parameters.push_back(pnoise); + CommandParameter pallfiles("allfiles", "Boolean", "", "t", "", "", "","",false,false); parameters.push_back(pallfiles); + CommandParameter porder("order", "String", "", "TACG", "", "", "","",false,false); parameters.push_back(porder); + CommandParameter pfasta("fasta", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pfasta); + CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); + CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } @@ -54,7 +56,23 @@ string TrimFlowsCommand::getHelpString(){ exit(1); } } - +//********************************************************************************************************************** +string TrimFlowsCommand::getOutputPattern(string type) { + try { + string pattern = ""; + + if (type == "flow") { pattern = "[filename],[tag],flow"; } + else if (type == "fasta") { pattern = "[filename],flow.fasta"; } + else if (type == "file") { pattern = "[filename],flow.files"; } + else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; } + + return pattern; + } + catch(exception& e) { + m->errorOut(e, "TrimFlowsCommand", "getOutputPattern"); + exit(1); + } +} //********************************************************************************************************************** TrimFlowsCommand::TrimFlowsCommand(){ @@ -64,6 +82,7 @@ TrimFlowsCommand::TrimFlowsCommand(){ vector tempOutNames; outputTypes["flow"] = tempOutNames; outputTypes["fasta"] = tempOutNames; + outputTypes["file"] = tempOutNames; } catch(exception& e) { m->errorOut(e, "TrimFlowsCommand", "TrimFlowsCommand"); @@ -101,6 +120,7 @@ TrimFlowsCommand::TrimFlowsCommand(string option) { vector tempOutNames; outputTypes["flow"] = tempOutNames; outputTypes["fasta"] = tempOutNames; + outputTypes["file"] = tempOutNames; //if the user changes the input directory command factory will send this info to us in the output parameter string inputDir = validParameter.validFile(parameters, "inputdir", false); @@ -149,10 +169,10 @@ TrimFlowsCommand::TrimFlowsCommand(string option) { string temp; temp = validParameter.validFile(parameters, "minflows", false); if (temp == "not found") { temp = "450"; } - convert(temp, minFlows); + m->mothurConvert(temp, minFlows); temp = validParameter.validFile(parameters, "maxflows", false); if (temp == "not found") { temp = "450"; } - convert(temp, maxFlows); + m->mothurConvert(temp, maxFlows); temp = validParameter.validFile(parameters, "oligos", true); @@ -164,28 +184,35 @@ TrimFlowsCommand::TrimFlowsCommand(string option) { else if(m->isTrue(temp)) { fasta = 1; } temp = validParameter.validFile(parameters, "maxhomop", false); if (temp == "not found"){ temp = "9"; } - convert(temp, maxHomoP); + m->mothurConvert(temp, maxHomoP); temp = validParameter.validFile(parameters, "signal", false); if (temp == "not found"){ temp = "0.50"; } - convert(temp, signal); + m->mothurConvert(temp, signal); temp = validParameter.validFile(parameters, "noise", false); if (temp == "not found"){ temp = "0.70"; } - convert(temp, noise); + m->mothurConvert(temp, noise); temp = validParameter.validFile(parameters, "bdiffs", false); if (temp == "not found"){ temp = "0"; } - convert(temp, bdiffs); + m->mothurConvert(temp, bdiffs); temp = validParameter.validFile(parameters, "pdiffs", false); if (temp == "not found"){ temp = "0"; } - convert(temp, pdiffs); + m->mothurConvert(temp, pdiffs); + + temp = validParameter.validFile(parameters, "ldiffs", false); if (temp == "not found") { temp = "0"; } + m->mothurConvert(temp, ldiffs); + + temp = validParameter.validFile(parameters, "sdiffs", false); if (temp == "not found") { temp = "0"; } + m->mothurConvert(temp, sdiffs); + + temp = validParameter.validFile(parameters, "tdiffs", false); if (temp == "not found") { int tempTotal = pdiffs + bdiffs + ldiffs + sdiffs; temp = toString(tempTotal); } + m->mothurConvert(temp, tdiffs); - temp = validParameter.validFile(parameters, "tdiffs", false); - if (temp == "not found"){ int tempTotal = pdiffs + bdiffs; temp = toString(tempTotal); } - convert(temp, tdiffs); - if(tdiffs == 0){ tdiffs = bdiffs + pdiffs; } + if(tdiffs == 0){ tdiffs = bdiffs + pdiffs + ldiffs + sdiffs; } + temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); } m->setProcessors(temp); - convert(temp, processors); + m->mothurConvert(temp, processors); flowOrder = validParameter.validFile(parameters, "order", false); if (flowOrder == "not found"){ flowOrder = "TACG"; } @@ -198,6 +225,8 @@ TrimFlowsCommand::TrimFlowsCommand(string option) { numFPrimers = 0; numRPrimers = 0; + numLinkers = 0; + numSpacers = 0; } } @@ -214,19 +243,23 @@ int TrimFlowsCommand::execute(){ if (abort == true) { if (calledHelp) { return 0; } return 2; } - string trimFlowFileName = outputDir + m->getRootName(m->getSimpleName(flowFileName)) + "trim.flow"; + map variables; + variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(flowFileName)); + string fastaFileName = getOutputFileName("fasta",variables); + if(fasta){ outputNames.push_back(fastaFileName); outputTypes["fasta"].push_back(fastaFileName); } + + variables["[tag]"] = "trim"; + string trimFlowFileName = getOutputFileName("flow",variables); outputNames.push_back(trimFlowFileName); outputTypes["flow"].push_back(trimFlowFileName); - string scrapFlowFileName = outputDir + m->getRootName(m->getSimpleName(flowFileName)) + "scrap.flow"; + variables["[tag]"] = "scrap"; + string scrapFlowFileName = getOutputFileName("flow",variables); outputNames.push_back(scrapFlowFileName); outputTypes["flow"].push_back(scrapFlowFileName); - string fastaFileName = outputDir + m->getRootName(m->getSimpleName(flowFileName)) + "flow.fasta"; - if(fasta){ - outputNames.push_back(fastaFileName); outputTypes["fasta"].push_back(fastaFileName); - } + vector flowFilePos; - #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) flowFilePos = getFlowFileBreaks(); for (int i = 0; i < (flowFilePos.size()-1); i++) { lines.push_back(new linePair(flowFilePos[i], flowFilePos[(i+1)])); @@ -273,7 +306,8 @@ int TrimFlowsCommand::execute(){ if(allFiles){ set namesAlreadyProcessed; - flowFilesFileName = outputDir + m->getRootName(m->getSimpleName(flowFileName)) + "flow.files"; + variables["[tag]"] = ""; + flowFilesFileName = getOutputFileName("file",variables); m->openOutputFile(flowFilesFileName, output); for(int i=0;imothurRemove(barcodePrimerComboFileNames[i][j]); } else{ - output << barcodePrimerComboFileNames[i][j] << endl; + output << m->getFullPathName(barcodePrimerComboFileNames[i][j]) << endl; outputNames.push_back(barcodePrimerComboFileNames[i][j]); outputTypes["flow"].push_back(barcodePrimerComboFileNames[i][j]); } @@ -306,14 +340,15 @@ int TrimFlowsCommand::execute(){ output.close(); } else{ - flowFilesFileName = outputDir + m->getRootName(m->getSimpleName(flowFileName)) + "flow.files"; + variables["[tag]"] = ""; + flowFilesFileName = getOutputFileName("file",variables); m->openOutputFile(flowFilesFileName, output); - output << trimFlowFileName << endl; + output << m->getFullPathName(trimFlowFileName) << endl; output.close(); } - outputTypes["flow.files"].push_back(flowFilesFileName); + outputTypes["file"].push_back(flowFilesFileName); outputNames.push_back(flowFilesFileName); // set fasta file as new current fastafile @@ -378,12 +413,10 @@ int TrimFlowsCommand::driverCreateTrim(string flowFileName, string trimFlowFileN int count = 0; bool moreSeqs = 1; - TrimOligos trimOligos(pdiffs, bdiffs, primers, barcodes, revPrimer); + TrimOligos trimOligos(pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, revPrimer, linker, spacer); while(moreSeqs) { - //cout << "driver " << count << endl; - - + if (m->control_pressed) { break; } int success = 1; @@ -391,11 +424,9 @@ int TrimFlowsCommand::driverCreateTrim(string flowFileName, string trimFlowFileN string trashCode = ""; flowData.getNext(flowFile); - //cout << "driver good bit " << flowFile.good() << endl; flowData.capFlows(maxFlows); Sequence currSeq = flowData.getSequence(); - if(!flowData.hasMinFlows(minFlows)){ //screen to see if sequence is of a minimum number of flows success = 0; trashCode += 'l'; @@ -404,12 +435,28 @@ int TrimFlowsCommand::driverCreateTrim(string flowFileName, string trimFlowFileN int primerIndex = 0; int barcodeIndex = 0; + if(numLinkers != 0){ + success = trimOligos.stripLinker(currSeq); + if(success > ldiffs) { trashCode += 'k'; } + else{ currentSeqDiffs += success; } + + } + + if (m->debug) { m->mothurOut("[DEBUG]: " + currSeq.getName() + " " + currSeq.getUnaligned() + "\n"); } + if(barcodes.size() != 0){ success = trimOligos.stripBarcode(currSeq, barcodeIndex); if(success > bdiffs) { trashCode += 'b'; } else{ currentSeqDiffs += success; } } + if(numSpacers != 0){ + success = trimOligos.stripSpacer(currSeq); + if(success > sdiffs) { trashCode += 's'; } + else{ currentSeqDiffs += success; } + + } + if(numFPrimers != 0){ success = trimOligos.stripForward(currSeq, primerIndex); if(success > pdiffs) { trashCode += 'f'; } @@ -447,7 +494,7 @@ int TrimFlowsCommand::driverCreateTrim(string flowFileName, string trimFlowFileN //report progress if((count) % 10000 == 0){ m->mothurOut(toString(count)); m->mothurOutEndLine(); } -#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) unsigned long long pos = flowFile.tellg(); if ((pos == -1) || (pos >= line->end)) { break; } @@ -521,9 +568,8 @@ void TrimFlowsCommand::getOligos(vector >& outFlowFileNames){ } else if(type == "REVERSE"){ - Sequence oligoRC("reverse", oligo); - oligoRC.reverseComplement(); - revPrimer.push_back(oligoRC.getUnaligned()); + string oligoRC = reverseOligo(oligo); + revPrimer.push_back(oligoRC); } else if(type == "BARCODE"){ oligosFile >> group; @@ -534,6 +580,10 @@ void TrimFlowsCommand::getOligos(vector >& outFlowFileNames){ barcodes[oligo]=indexBarcode; indexBarcode++; barcodeNameVector.push_back(group); + }else if(type == "LINKER"){ + linker.push_back(oligo); + }else if(type == "SPACER"){ + spacer.push_back(oligo); } else{ m->mothurOut(type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); @@ -574,9 +624,13 @@ void TrimFlowsCommand::getOligos(vector >& outFlowFileNames){ string comboGroupName = ""; string fileName = ""; + map variables; + variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(flowFileName)); + if(primerName == ""){ comboGroupName = barcodeNameVector[itBar->second]; - fileName = outputDir + m->getRootName(m->getSimpleName(flowFileName)) + comboGroupName + ".flow"; + variables["[tag]"] = comboGroupName; + fileName = getOutputFileName("flow", variables); } else{ if(barcodeName == ""){ @@ -585,7 +639,8 @@ void TrimFlowsCommand::getOligos(vector >& outFlowFileNames){ else{ comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second]; } - fileName = outputDir + m->getRootName(m->getSimpleName(flowFileName)) + comboGroupName + ".flow"; + variables["[tag]"] = comboGroupName; + fileName = getOutputFileName("flow", variables); } outFlowFileNames[itBar->second][itPrimer->second] = fileName; @@ -599,6 +654,8 @@ void TrimFlowsCommand::getOligos(vector >& outFlowFileNames){ numFPrimers = primers.size(); numRPrimers = revPrimer.size(); + numLinkers = linker.size(); + numSpacers = spacer.size(); } catch(exception& e) { @@ -606,6 +663,47 @@ void TrimFlowsCommand::getOligos(vector >& outFlowFileNames){ exit(1); } } +//********************************************************************/ +string TrimFlowsCommand::reverseOligo(string oligo){ + try { + string reverse = ""; + + for(int i=oligo.length()-1;i>=0;i--){ + + if(oligo[i] == 'A') { reverse += 'T'; } + else if(oligo[i] == 'T'){ reverse += 'A'; } + else if(oligo[i] == 'U'){ reverse += 'A'; } + + else if(oligo[i] == 'G'){ reverse += 'C'; } + else if(oligo[i] == 'C'){ reverse += 'G'; } + + else if(oligo[i] == 'R'){ reverse += 'Y'; } + else if(oligo[i] == 'Y'){ reverse += 'R'; } + + else if(oligo[i] == 'M'){ reverse += 'K'; } + else if(oligo[i] == 'K'){ reverse += 'M'; } + + else if(oligo[i] == 'W'){ reverse += 'W'; } + else if(oligo[i] == 'S'){ reverse += 'S'; } + + else if(oligo[i] == 'B'){ reverse += 'V'; } + else if(oligo[i] == 'V'){ reverse += 'B'; } + + else if(oligo[i] == 'D'){ reverse += 'H'; } + else if(oligo[i] == 'H'){ reverse += 'D'; } + + else { reverse += 'N'; } + } + + + return reverse; + } + catch(exception& e) { + m->errorOut(e, "TrimFlowsCommand", "reverseOligo"); + exit(1); + } +} + /**************************************************************************************************/ vector TrimFlowsCommand::getFlowFileBreaks() { @@ -688,7 +786,7 @@ int TrimFlowsCommand::createProcessesCreateTrim(string flowFileName, string trim processIDS.clear(); int exitCommand = 1; -#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) int process = 1; //loop through and create all the processes you want