X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=trimflowscommand.cpp;h=6a3535fb4af55ee1b217c9a18c4fc83e08db7539;hb=28bcfc4a41b8b82f66636587e0d4d355d07cbdd1;hp=d87a8df2aa5df56b4e35442255108d527be3a843;hpb=09207a049d2ec77f56ac98960c6525be084fa090;p=mothur.git diff --git a/trimflowscommand.cpp b/trimflowscommand.cpp index d87a8df..6a3535f 100644 --- a/trimflowscommand.cpp +++ b/trimflowscommand.cpp @@ -21,7 +21,9 @@ vector TrimFlowsCommand::setParameters(){ CommandParameter pminflows("minflows", "Number", "", "450", "", "", "",false,false); parameters.push_back(pminflows); CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ppdiffs); CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pbdiffs); - CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ptdiffs); + CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pldiffs); + CommandParameter psdiffs("sdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(psdiffs); + CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ptdiffs); CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors); CommandParameter psignal("signal", "Number", "", "0.50", "", "", "",false,false); parameters.push_back(psignal); CommandParameter pnoise("noise", "Number", "", "0.70", "", "", "",false,false); parameters.push_back(pnoise); @@ -54,7 +56,28 @@ string TrimFlowsCommand::getHelpString(){ exit(1); } } - +//********************************************************************************************************************** +string TrimFlowsCommand::getOutputFileNameTag(string type, string inputName=""){ + try { + string outputFileName = ""; + map >::iterator it; + + //is this a type this command creates + it = outputTypes.find(type); + if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); } + else { + if (type == "flow") { outputFileName = "flow"; } + else if (type == "fasta") { outputFileName = "flow.fasta"; } + else if (type == "file") { outputFileName = "flow.files"; } + else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; } + } + return outputFileName; + } + catch(exception& e) { + m->errorOut(e, "TrimFlowsCommand", "getOutputFileNameTag"); + exit(1); + } +} //********************************************************************************************************************** TrimFlowsCommand::TrimFlowsCommand(){ @@ -64,6 +87,7 @@ TrimFlowsCommand::TrimFlowsCommand(){ vector tempOutNames; outputTypes["flow"] = tempOutNames; outputTypes["fasta"] = tempOutNames; + outputTypes["file"] = tempOutNames; } catch(exception& e) { m->errorOut(e, "TrimFlowsCommand", "TrimFlowsCommand"); @@ -101,6 +125,7 @@ TrimFlowsCommand::TrimFlowsCommand(string option) { vector tempOutNames; outputTypes["flow"] = tempOutNames; outputTypes["fasta"] = tempOutNames; + outputTypes["file"] = tempOutNames; //if the user changes the input directory command factory will send this info to us in the output parameter string inputDir = validParameter.validFile(parameters, "inputdir", false); @@ -178,10 +203,17 @@ TrimFlowsCommand::TrimFlowsCommand(string option) { temp = validParameter.validFile(parameters, "pdiffs", false); if (temp == "not found"){ temp = "0"; } m->mothurConvert(temp, pdiffs); - temp = validParameter.validFile(parameters, "tdiffs", false); - if (temp == "not found"){ int tempTotal = pdiffs + bdiffs; temp = toString(tempTotal); } + temp = validParameter.validFile(parameters, "ldiffs", false); if (temp == "not found") { temp = "0"; } + m->mothurConvert(temp, ldiffs); + + temp = validParameter.validFile(parameters, "sdiffs", false); if (temp == "not found") { temp = "0"; } + m->mothurConvert(temp, sdiffs); + + temp = validParameter.validFile(parameters, "tdiffs", false); if (temp == "not found") { int tempTotal = pdiffs + bdiffs + ldiffs + sdiffs; temp = toString(tempTotal); } m->mothurConvert(temp, tdiffs); - if(tdiffs == 0){ tdiffs = bdiffs + pdiffs; } + + if(tdiffs == 0){ tdiffs = bdiffs + pdiffs + ldiffs + sdiffs; } + temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); } m->setProcessors(temp); @@ -198,6 +230,8 @@ TrimFlowsCommand::TrimFlowsCommand(string option) { numFPrimers = 0; numRPrimers = 0; + numLinkers = 0; + numSpacers = 0; } } @@ -214,19 +248,19 @@ int TrimFlowsCommand::execute(){ if (abort == true) { if (calledHelp) { return 0; } return 2; } - string trimFlowFileName = outputDir + m->getRootName(m->getSimpleName(flowFileName)) + "trim.flow"; + string trimFlowFileName = outputDir + m->getRootName(m->getSimpleName(flowFileName)) + "trim." + getOutputFileNameTag("flow"); outputNames.push_back(trimFlowFileName); outputTypes["flow"].push_back(trimFlowFileName); - string scrapFlowFileName = outputDir + m->getRootName(m->getSimpleName(flowFileName)) + "scrap.flow"; + string scrapFlowFileName = outputDir + m->getRootName(m->getSimpleName(flowFileName)) + "scrap." + getOutputFileNameTag("flow");; outputNames.push_back(scrapFlowFileName); outputTypes["flow"].push_back(scrapFlowFileName); - string fastaFileName = outputDir + m->getRootName(m->getSimpleName(flowFileName)) + "flow.fasta"; + string fastaFileName = outputDir + m->getRootName(m->getSimpleName(flowFileName)) + getOutputFileNameTag("fasta"); if(fasta){ outputNames.push_back(fastaFileName); outputTypes["fasta"].push_back(fastaFileName); } vector flowFilePos; - #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) flowFilePos = getFlowFileBreaks(); for (int i = 0; i < (flowFilePos.size()-1); i++) { lines.push_back(new linePair(flowFilePos[i], flowFilePos[(i+1)])); @@ -273,7 +307,7 @@ int TrimFlowsCommand::execute(){ if(allFiles){ set namesAlreadyProcessed; - flowFilesFileName = outputDir + m->getRootName(m->getSimpleName(flowFileName)) + "flow.files"; + flowFilesFileName = outputDir + m->getRootName(m->getSimpleName(flowFileName)) + getOutputFileNameTag("file"); m->openOutputFile(flowFilesFileName, output); for(int i=0;igetRootName(m->getSimpleName(flowFileName)) + "flow.files"; + flowFilesFileName = outputDir + m->getRootName(m->getSimpleName(flowFileName)) + getOutputFileNameTag("file"); m->openOutputFile(flowFilesFileName, output); output << m->getFullPathName(trimFlowFileName) << endl; @@ -378,7 +412,7 @@ int TrimFlowsCommand::driverCreateTrim(string flowFileName, string trimFlowFileN int count = 0; bool moreSeqs = 1; - TrimOligos trimOligos(pdiffs, bdiffs, primers, barcodes, revPrimer); + TrimOligos trimOligos(pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, revPrimer, linker, spacer); while(moreSeqs) { @@ -389,11 +423,9 @@ int TrimFlowsCommand::driverCreateTrim(string flowFileName, string trimFlowFileN string trashCode = ""; flowData.getNext(flowFile); - //cout << "driver good bit " << flowFile.good() << endl; flowData.capFlows(maxFlows); Sequence currSeq = flowData.getSequence(); - if(!flowData.hasMinFlows(minFlows)){ //screen to see if sequence is of a minimum number of flows success = 0; trashCode += 'l'; @@ -402,12 +434,26 @@ int TrimFlowsCommand::driverCreateTrim(string flowFileName, string trimFlowFileN int primerIndex = 0; int barcodeIndex = 0; + if(numLinkers != 0){ + success = trimOligos.stripLinker(currSeq); + if(success > ldiffs) { trashCode += 'k'; } + else{ currentSeqDiffs += success; } + + } + if(barcodes.size() != 0){ success = trimOligos.stripBarcode(currSeq, barcodeIndex); if(success > bdiffs) { trashCode += 'b'; } else{ currentSeqDiffs += success; } } + if(numSpacers != 0){ + success = trimOligos.stripSpacer(currSeq); + if(success > sdiffs) { trashCode += 's'; } + else{ currentSeqDiffs += success; } + + } + if(numFPrimers != 0){ success = trimOligos.stripForward(currSeq, primerIndex); if(success > pdiffs) { trashCode += 'f'; } @@ -445,7 +491,7 @@ int TrimFlowsCommand::driverCreateTrim(string flowFileName, string trimFlowFileN //report progress if((count) % 10000 == 0){ m->mothurOut(toString(count)); m->mothurOutEndLine(); } -#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) unsigned long long pos = flowFile.tellg(); if ((pos == -1) || (pos >= line->end)) { break; } @@ -519,9 +565,8 @@ void TrimFlowsCommand::getOligos(vector >& outFlowFileNames){ } else if(type == "REVERSE"){ - Sequence oligoRC("reverse", oligo); - oligoRC.reverseComplement(); - revPrimer.push_back(oligoRC.getUnaligned()); + string oligoRC = reverseOligo(oligo); + revPrimer.push_back(oligoRC); } else if(type == "BARCODE"){ oligosFile >> group; @@ -532,6 +577,10 @@ void TrimFlowsCommand::getOligos(vector >& outFlowFileNames){ barcodes[oligo]=indexBarcode; indexBarcode++; barcodeNameVector.push_back(group); + }else if(type == "LINKER"){ + linker.push_back(oligo); + }else if(type == "SPACER"){ + spacer.push_back(oligo); } else{ m->mothurOut(type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); @@ -597,6 +646,8 @@ void TrimFlowsCommand::getOligos(vector >& outFlowFileNames){ numFPrimers = primers.size(); numRPrimers = revPrimer.size(); + numLinkers = linker.size(); + numSpacers = spacer.size(); } catch(exception& e) { @@ -604,6 +655,47 @@ void TrimFlowsCommand::getOligos(vector >& outFlowFileNames){ exit(1); } } +//********************************************************************/ +string TrimFlowsCommand::reverseOligo(string oligo){ + try { + string reverse = ""; + + for(int i=oligo.length()-1;i>=0;i--){ + + if(oligo[i] == 'A') { reverse += 'T'; } + else if(oligo[i] == 'T'){ reverse += 'A'; } + else if(oligo[i] == 'U'){ reverse += 'A'; } + + else if(oligo[i] == 'G'){ reverse += 'C'; } + else if(oligo[i] == 'C'){ reverse += 'G'; } + + else if(oligo[i] == 'R'){ reverse += 'Y'; } + else if(oligo[i] == 'Y'){ reverse += 'R'; } + + else if(oligo[i] == 'M'){ reverse += 'K'; } + else if(oligo[i] == 'K'){ reverse += 'M'; } + + else if(oligo[i] == 'W'){ reverse += 'W'; } + else if(oligo[i] == 'S'){ reverse += 'S'; } + + else if(oligo[i] == 'B'){ reverse += 'V'; } + else if(oligo[i] == 'V'){ reverse += 'B'; } + + else if(oligo[i] == 'D'){ reverse += 'H'; } + else if(oligo[i] == 'H'){ reverse += 'D'; } + + else { reverse += 'N'; } + } + + + return reverse; + } + catch(exception& e) { + m->errorOut(e, "TrimFlowsCommand", "reverseOligo"); + exit(1); + } +} + /**************************************************************************************************/ vector TrimFlowsCommand::getFlowFileBreaks() { @@ -686,7 +778,7 @@ int TrimFlowsCommand::createProcessesCreateTrim(string flowFileName, string trim processIDS.clear(); int exitCommand = 1; -#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) int process = 1; //loop through and create all the processes you want