From fefd5ee1517abd3bc38b469cb2dffc85a1571c7e Mon Sep 17 00:00:00 2001 From: Sarah Westcott Date: Wed, 9 Apr 2014 09:13:12 -0400 Subject: [PATCH] added oligos class. added check orient parameter to trim.flows, sffinfo, fastq.info, and make.contigs. --- Mothur.xcodeproj/project.pbxproj | 6 + aligncommand.cpp | 46 ++- getmimarkspackagecommand.cpp | 171 +--------- getmimarkspackagecommand.h | 5 +- makecontigscommand.cpp | 418 ++++++++++------------- makecontigscommand.h | 144 +++++--- mothur.h | 10 + mothurout.cpp | 1 + mothurout.h | 1 + oligos.cpp | 556 +++++++++++++++++++++++++++++++ oligos.h | 92 +++++ parsefastaqcommand.cpp | 484 +++++++++------------------ parsefastaqcommand.h | 15 +- pcrseqscommand.h | 47 ++- prcseqscommand.cpp | 185 +++------- qualityscores.cpp | 19 +- qualityscores.h | 1 + sffinfocommand.cpp | 271 +++++++-------- sffinfocommand.h | 14 +- sracommand.cpp | 247 +++----------- sracommand.h | 7 +- trimflowscommand.cpp | 375 ++++++++++----------- trimflowscommand.h | 45 +-- trimoligos.cpp | 449 ++++++++++++++++++++++++- trimoligos.h | 11 +- trimseqscommand.cpp | 332 +++++++++++------- trimseqscommand.h | 123 ++----- 27 files changed, 2322 insertions(+), 1753 deletions(-) create mode 100644 oligos.cpp create mode 100644 oligos.h diff --git a/Mothur.xcodeproj/project.pbxproj b/Mothur.xcodeproj/project.pbxproj index 0448c02..57b6624 100644 --- a/Mothur.xcodeproj/project.pbxproj +++ b/Mothur.xcodeproj/project.pbxproj @@ -10,6 +10,7 @@ 219C1DE01552C4BD004209F9 /* newcommandtemplate.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 219C1DDF1552C4BD004209F9 /* newcommandtemplate.cpp */; }; 219C1DE41559BCCF004209F9 /* getcoremicrobiomecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 219C1DE31559BCCD004209F9 /* getcoremicrobiomecommand.cpp */; }; 483C952E188F0CAD0035E7B7 /* sharedrjsd.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 483C952D188F0CAD0035E7B7 /* sharedrjsd.cpp */; }; + 4893DE2918EEF28100C615DF /* oligos.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4893DE2818EEF28100C615DF /* oligos.cpp */; }; 48A85BAD18E1AF2000199B6F /* getmimarkspackagecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48A85BAC18E1AF2000199B6F /* getmimarkspackagecommand.cpp */; }; 48E981CF189C38FB0042BE9D /* mergesfffilecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48E981CE189C38FB0042BE9D /* mergesfffilecommand.cpp */; }; 7E6BE10A12F710D8007ADDBE /* refchimeratest.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 7E6BE10912F710D8007ADDBE /* refchimeratest.cpp */; }; @@ -411,6 +412,8 @@ 219C1DE51559BCF2004209F9 /* getcoremicrobiomecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = getcoremicrobiomecommand.h; sourceTree = ""; }; 483C952C188F0C960035E7B7 /* sharedrjsd.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = sharedrjsd.h; sourceTree = ""; }; 483C952D188F0CAD0035E7B7 /* sharedrjsd.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = sharedrjsd.cpp; sourceTree = ""; }; + 4893DE2718EEF27300C615DF /* oligos.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = oligos.h; sourceTree = ""; }; + 4893DE2818EEF28100C615DF /* oligos.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = oligos.cpp; sourceTree = ""; }; 48A85BAB18E1AF0600199B6F /* getmimarkspackagecommand.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = getmimarkspackagecommand.h; sourceTree = ""; }; 48A85BAC18E1AF2000199B6F /* getmimarkspackagecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = getmimarkspackagecommand.cpp; sourceTree = ""; }; 48E981CD189C38E60042BE9D /* mergesfffilecommand.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = mergesfffilecommand.h; sourceTree = ""; }; @@ -1854,6 +1857,8 @@ A7E9B74012D37EC400DA6239 /* listvector.hpp */, A7E9B75F12D37EC400DA6239 /* nameassignment.cpp */, A7E9B76012D37EC400DA6239 /* nameassignment.hpp */, + 4893DE2718EEF27300C615DF /* oligos.h */, + 4893DE2818EEF28100C615DF /* oligos.cpp */, A7E9B77712D37EC400DA6239 /* ordervector.cpp */, A7E9B77812D37EC400DA6239 /* ordervector.hpp */, A7E9B79F12D37EC400DA6239 /* qualityscores.cpp */, @@ -2179,6 +2184,7 @@ A7E9B8DC12D37EC400DA6239 /* gotohoverlap.cpp in Sources */, A7E9B8DD12D37EC400DA6239 /* gower.cpp in Sources */, A7E9B8DE12D37EC400DA6239 /* groupmap.cpp in Sources */, + 4893DE2918EEF28100C615DF /* oligos.cpp in Sources */, A7E9B8DF12D37EC400DA6239 /* hamming.cpp in Sources */, A7E9B8E012D37EC400DA6239 /* hcluster.cpp in Sources */, A7E9B8E112D37EC400DA6239 /* hclustercommand.cpp in Sources */, diff --git a/aligncommand.cpp b/aligncommand.cpp index 3222f8c..bcc5967 100644 --- a/aligncommand.cpp +++ b/aligncommand.cpp @@ -110,7 +110,7 @@ AlignCommand::AlignCommand(){ //********************************************************************************************************************** AlignCommand::AlignCommand(string option) { try { - abort = false; calledHelp = false; + abort = false; calledHelp = false; ReferenceDB* rdb = ReferenceDB::getInstance(); //allow user to run help @@ -860,6 +860,8 @@ int AlignCommand::createProcesses(string alignFileName, string reportFileName, s try { int num = 0; processIDS.resize(0); + bool recalc = false; + #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) int process = 1; @@ -882,12 +884,48 @@ int AlignCommand::createProcesses(string alignFileName, string reportFileName, s exit(0); }else { - m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); - for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); } - exit(0); + m->mothurOut("[ERROR]: unable to spawn the number of processes you requested, reducing number to " + toString(process) + "\n"); processors = process; + for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); } + recalc = true; + break; } } + if (recalc) { + for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); + vector positions; + positions = m->divideFile(filename, processors); + for (int i = 0; i < (positions.size()-1); i++) { lines.push_back(new linePair(positions[i], positions[(i+1)])); } + + num = 0; + processIDS.resize(0); + process = 1; + + while (process != processors) { + pid_t pid = fork(); + + if (pid > 0) { + processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later + process++; + }else if (pid == 0){ + num = driver(lines[process], alignFileName + toString(m->mothurGetpid(process)) + ".temp", reportFileName + toString(m->mothurGetpid(process)) + ".temp", accnosFName + m->mothurGetpid(process) + ".temp", filename); + + //pass numSeqs to parent + ofstream out; + string tempFile = alignFileName + toString(m->mothurGetpid(process)) + ".num.temp"; + m->openOutputFile(tempFile, out); + out << num << endl; + out.close(); + + exit(0); + }else { + m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); + for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); } + exit(0); + } + } + } + //do my part num = driver(lines[0], alignFileName, reportFileName, accnosFName, filename); diff --git a/getmimarkspackagecommand.cpp b/getmimarkspackagecommand.cpp index 955cb79..67194ce 100644 --- a/getmimarkspackagecommand.cpp +++ b/getmimarkspackagecommand.cpp @@ -9,6 +9,7 @@ #include "getmimarkspackagecommand.h" #include "groupmap.h" + //********************************************************************************************************************** vector GetMIMarksPackageCommand::setParameters(){ try { @@ -106,7 +107,7 @@ GetMIMarksPackageCommand::GetMIMarksPackageCommand(string option) { outputTypes["tsv"] = tempOutNames; //if the user changes the input directory command factory will send this info to us in the output parameter - string inputDir = validParameter.validFile(parameters, "inputdir", false); + inputDir = validParameter.validFile(parameters, "inputdir", false); if (inputDir == "not found"){ inputDir = ""; } else { @@ -192,12 +193,10 @@ int GetMIMarksPackageCommand::execute(){ if (abort == true) { if (calledHelp) { return 0; } return 2; } - if (oligosfile != "") { readOligos(); } + if (oligosfile != "") { Oligos oligos(oligosfile); Groups = oligos.getGroupNames(); } else if (file != "") { readFile(); } else { GroupMap groupmap(groupfile); groupmap.readMap(); Groups = groupmap.getNamesOfGroups(); } - for (set::iterator it = uniqueNames.begin(); it != uniqueNames.end(); it++) { Groups.push_back(*it); } - if (outputDir == "") { outputDir += m->hasPath(inputfile); } map variables; variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(inputfile)); @@ -332,155 +331,7 @@ int GetMIMarksPackageCommand::execute(){ } } //*************************************************************************************************************** -int GetMIMarksPackageCommand::readOligos(){ - try { - ifstream inOligos; - m->openInputFile(oligosfile, inOligos); - - string type, oligo, roligo, group; - vector primerNameVector, barcodeNameVector; - set uniquePrimers; - set uniqueBarcodes; - - while(!inOligos.eof()){ - - inOligos >> type; - - if (m->debug) { m->mothurOut("[DEBUG]: reading type - " + type + ".\n"); } - - if(type[0] == '#'){ - while (!inOligos.eof()) { char c = inOligos.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there - m->gobble(inOligos); - } - else{ - m->gobble(inOligos); - //make type case insensitive - for(int i=0;i> oligo; - - if (m->debug) { m->mothurOut("[DEBUG]: reading - " + oligo + ".\n"); } - - for(int i=0;igobble(inOligos); - - inOligos >> roligo; - - for(int i=0;i> group; - - //barcode lines can look like BARCODE atgcatgc groupName - for 454 seqs - //or BARCODE atgcatgc atgcatgc groupName - for illumina data that has forward and reverse info - - string temp = ""; - while (!inOligos.eof()) { - char c = inOligos.get(); - if (c == 10 || c == 13 || c == -1){ break; } - else if (c == 32 || c == 9){;} //space or tab - else { temp += c; } - } - - //then this is illumina data with 4 columns - if (temp != "") { - - string reverseBarcode = group; //reverseOligo(group); //reverse barcode - group = temp; - - barcodeNameVector.push_back(group); - }else { - barcodeNameVector.push_back(group); - } - } - } - m->gobble(inOligos); - } - inOligos.close(); - - //add in potential combos - if(barcodeNameVector.size() == 0){ - barcodeNameVector.push_back(""); - } - - if(primerNameVector.size() == 0){ - primerNameVector.push_back(""); - } - - - for(int i = 0; i < barcodeNameVector.size(); i++){ - for(int j = 0; j < primerNameVector.size(); j++){ - - string primerName = primerNameVector[j]; - string barcodeName = barcodeNameVector[i]; - - if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing - else if ((primerName == "") && (barcodeName == "")) { } - else { - string comboGroupName = ""; - - if(primerName == ""){ - comboGroupName = barcodeNameVector[i]; - } - else{ - if(barcodeName == ""){ - comboGroupName = primerNameVector[j]; - } - else{ - comboGroupName = barcodeNameVector[i] + "." + primerNameVector[j]; - } - } - uniqueNames.insert(comboGroupName); - } - } - } - - - - if (m->debug) { int count = 0; for (set::iterator it = uniqueNames.begin(); it != uniqueNames.end(); it++) { m->mothurOut("[DEBUG]: " + toString(count) + " groupName = " + *it + "\n"); count++; } } - - - return true; - - } - catch(exception& e) { - m->errorOut(e, "GetMIMarksPackageCommand", "readOligos"); - exit(1); - } -} -//********************************************************************************************************************** + // going to have to rework this to allow for other options -- /* file option 1 @@ -506,7 +357,7 @@ int GetMIMarksPackageCommand::readOligos(){ int GetMIMarksPackageCommand::readFile(){ try { - //vector theseFiles; + Oligos oligos; inputfile = file; ifstream in; @@ -534,6 +385,14 @@ int GetMIMarksPackageCommand::readFile(){ if (m->debug) { m->mothurOut("[DEBUG]: group = " + group + ", thisFileName1 = " + thisFileName1 + ", thisFileName2 = " + thisFileName2 + ".\n"); } + if (inputDir != "") { + string path = m->hasPath(thisFileName2); + if (path == "") { thisFileName2 = inputDir + thisFileName2; } + + path = m->hasPath(thisFileName1); + if (path == "") { thisFileName1 = inputDir + thisFileName1; } + } + //check to make sure both are able to be opened ifstream in2; int openForward = m->openInputFile(thisFileName1, in2, "noerror"); @@ -603,13 +462,15 @@ int GetMIMarksPackageCommand::readFile(){ if ((pieces.size() == 2) && (openForward != 1) && (openReverse != 1)) { //good pair and sff or fastq and oligos oligosfile = thisFileName2; if (m->debug) { m->mothurOut("[DEBUG]: about to read oligos\n"); } - readOligos(); + oligos.read(oligosfile); }else if((pieces.size() == 3) && (openForward != 1) && (openReverse != 1)) { //good pair and paired read Groups.push_back(group); } } in.close(); + Groups = oligos.getGroupNames(); + inputfile = file; return 0; diff --git a/getmimarkspackagecommand.h b/getmimarkspackagecommand.h index 8bd2e49..65219d7 100644 --- a/getmimarkspackagecommand.h +++ b/getmimarkspackagecommand.h @@ -10,6 +10,7 @@ #define Mothur_getmimarkspackagecommand_h #include "command.hpp" +#include "oligos.h" /**************************************************************************************************/ @@ -33,12 +34,10 @@ public: private: bool abort, requiredonly; - string oligosfile, groupfile, package, inputfile, file; + string oligosfile, groupfile, package, inputfile, file, inputDir; string outputDir; vector outputNames, Groups; - set uniqueNames; - int readOligos(); int readFile(); }; diff --git a/makecontigscommand.cpp b/makecontigscommand.cpp index 15f002b..0d563fe 100644 --- a/makecontigscommand.cpp +++ b/makecontigscommand.cpp @@ -24,7 +24,7 @@ vector MakeContigsCommand::setParameters(){ CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(ppdiffs); CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(pbdiffs); CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ptdiffs); - + CommandParameter preorient("checkorient", "Boolean", "", "F", "", "", "","",false,false,true); parameters.push_back(preorient); CommandParameter palign("align", "Multiple", "needleman-gotoh", "needleman", "", "", "","",false,false); parameters.push_back(palign); CommandParameter pallfiles("allfiles", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pallfiles); CommandParameter ptrimoverlap("trimoverlap", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(ptrimoverlap); @@ -71,6 +71,7 @@ string MakeContigsCommand::getHelpString(){ //helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n"; helpString += "The match parameter allows you to specify the bonus for having the same base. The default is 1.0.\n"; helpString += "The mistmatch parameter allows you to specify the penalty for having different bases. The default is -1.0.\n"; + helpString += "The checkorient parameter will check look for the reverse compliment of the barcode or primer in the sequence. If found the sequence is flipped. The default is false.\n"; helpString += "The deltaq parameter allows you to specify the delta allowed between quality scores of a mismatched base. For example in the overlap, if deltaq=5 and in the alignment seqA, pos 200 has a quality score of 30 and the same position in seqB has a quality score of 20, you take the base from seqA (30-20 >= 5). If the quality score in seqB is 28 then the base in the consensus will be an N (30-28<5) The default is 6.\n"; helpString += "The gapopen parameter allows you to specify the penalty for opening a gap in an alignment. The default is -2.0.\n"; helpString += "The gapextend parameter allows you to specify the penalty for extending a gap in an alignment. The default is -1.0.\n"; @@ -153,7 +154,7 @@ MakeContigsCommand::MakeContigsCommand(string option) { //if the user changes the input directory command factory will send this info to us in the output parameter - string inputDir = validParameter.validFile(parameters, "inputdir", false); + inputDir = validParameter.validFile(parameters, "inputdir", false); if (inputDir == "not found"){ inputDir = ""; } else { string path; @@ -378,6 +379,9 @@ MakeContigsCommand::MakeContigsCommand(string option) { abort=true; } + temp = validParameter.validFile(parameters, "checkorient", false); if (temp == "not found") { temp = "F"; } + reorient = m->isTrue(temp); + //fill convert table - goes from solexa to sanger. Used fq_all2std.pl as a reference. for (int i = -64; i < 65; i++) { char temp = (char) ((int)(33 + 10*log(1+pow(10,(i/10.0)))/log(10)+0.499)); @@ -440,14 +444,17 @@ int MakeContigsCommand::execute(){ groupCounts.clear(); groupMap.clear(); vector > fastaFileNames; + map uniqueFastaNames;// so we don't add the same groupfile multiple times createOligosGroup = false; + oligos = new Oligos(); + numBarcodes = 0; numFPrimers= 0; numLinkers= 0; numSpacers = 0; numRPrimers = 0; string outputGroupFileName; map variables; string thisOutputDir = outputDir; if (outputDir == "") { thisOutputDir = m->hasPath(filesToProcess[l][0][0]); } variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(filesToProcess[l][0][0])); variables["[tag]"] = ""; - if(oligosfile != ""){ createOligosGroup = getOligos(fastaFileNames, variables["[filename]"]); } + if(oligosfile != ""){ createOligosGroup = getOligos(fastaFileNames, variables["[filename]"], uniqueFastaNames); } if (createOligosGroup || createFileGroup) { outputGroupFileName = getOutputFileName("group",variables); } @@ -468,10 +475,10 @@ int MakeContigsCommand::execute(){ //remove temp fasta and qual files for (int i = 0; i < processors; i++) { for(int j = 0; j < filesToProcess[l][i].size(); j++) { m->mothurRemove(filesToProcess[l][i][j]); } } - if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } delete oligos; return 0; } if(allFiles){ - map uniqueFastaNames;// so we don't add the same groupfile multiple times + // so we don't add the same groupfile multiple times map::iterator it; set namesToRemove; for(int i=0;iisBlank(fastaFileNames[i][j])){ m->mothurRemove(fastaFileNames[i][j]); namesToRemove.insert(fastaFileNames[i][j]); - }else{ - it = uniqueFastaNames.find(fastaFileNames[i][j]); - if (it == uniqueFastaNames.end()) { - uniqueFastaNames[fastaFileNames[i][j]] = barcodeNameVector[i]; - } + uniqueFastaNames.erase(fastaFileNames[i][j]); //remove from list for group file print } } } @@ -502,8 +505,8 @@ int MakeContigsCommand::execute(){ m->openInputFile(it->first, in); ofstream out; - string thisGroupName = thisOutputDir + m->getRootName(m->getSimpleName(it->first)); - thisGroupName += getOutputFileName("group",variables); outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName); + variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(it->first)); + string thisGroupName = getOutputFileName("group",variables); outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName); m->openOutputFile(thisGroupName, out); while (!in.eof()){ @@ -566,6 +569,7 @@ int MakeContigsCommand::execute(){ } } m->mothurOut("Done.\n"); + delete oligos; } m->mothurOut("It took " + toString(time(NULL) - start) + " secs to process " + toString(numReads) + " sequences.\n"); @@ -822,7 +826,7 @@ int MakeContigsCommand::createProcesses(vector< vector > files, string o } } - contigsData* tempcontig = new contigsData(group, files[h], (outputFasta + extension), (outputScrapFasta + extension), (outputMisMatches + extension), align, m, match, misMatch, gapOpen, gapExtend, insert, deltaq, barcodes, primers, tempFASTAFileNames, barcodeNameVector, primerNameVector, pdiffs, bdiffs, tdiffs, createOligosGroup, createFileGroup, allFiles, trimOverlap, h); + contigsData* tempcontig = new contigsData(group, files[h], (outputFasta + extension), (outputScrapFasta + extension), (outputMisMatches + extension), align, m, match, misMatch, gapOpen, gapExtend, insert, deltaq, tempFASTAFileNames, oligosfile, reorient, pdiffs, bdiffs, tdiffs, createOligosGroup, createFileGroup, allFiles, trimOverlap, h); pDataArray.push_back(tempcontig); hThreadArray[h] = CreateThread(NULL, 0, MyContigsThreadFunction, pDataArray[h], 0, &dwThreadIdArray[h]); @@ -942,7 +946,12 @@ int MakeContigsCommand::driver(vector files, string outputFasta, string m->openOutputFile(outputMisMatches, outMisMatch); outMisMatch << "Name\tLength\tOverlap_Length\tOverlap_Start\tOverlap_End\tMisMatches\tNum_Ns\n"; - TrimOligos trimOligos(pdiffs, bdiffs, 0, 0, primers, barcodes); + TrimOligos trimOligos(pdiffs, bdiffs, 0, 0, oligos->getPairedPrimers(), oligos->getPairedBarcodes()); + + TrimOligos* rtrimOligos = NULL; + if (reorient) { + rtrimOligos = new TrimOligos(pdiffs, bdiffs, 0, 0, oligos->getReorientedPairedPrimers(), oligos->getReorientedPairedBarcodes()); numBarcodes = oligos->getReorientedPairedBarcodes().size(); + } while ((!inFFasta.eof()) && (!inRFasta.eof())) { @@ -973,8 +982,15 @@ int MakeContigsCommand::driver(vector files, string outputFasta, string int barcodeIndex = 0; int primerIndex = 0; + Sequence savedFSeq(fSeq.getName(), fSeq.getAligned()); Sequence savedRSeq(rSeq.getName(), rSeq.getAligned()); + Sequence savedFindex(findexBarcode.getName(), findexBarcode.getAligned()); Sequence savedRIndex(rindexBarcode.getName(), rindexBarcode.getAligned()); + QualityScores* savedFQual = NULL; QualityScores* savedRQual = NULL; + if (thisfqualfile != "") { + savedFQual = new QualityScores(fQual->getName(), fQual->getQualityScores()); + savedRQual = new QualityScores(rQual->getName(), rQual->getQualityScores()); + } - if(barcodes.size() != 0){ + if(numBarcodes != 0){ if (thisfqualfile != "") { if ((thisfindexfile != "") || (thisrindexfile != "")) { success = trimOligos.stripBarcode(findexBarcode, rindexBarcode, *fQual, *rQual, barcodeIndex); @@ -988,7 +1004,7 @@ int MakeContigsCommand::driver(vector files, string outputFasta, string else{ currentSeqsDiffs += success; } } - if(primers.size() != 0){ + if(numFPrimers != 0){ if (thisfqualfile != "") { success = trimOligos.stripForward(fSeq, rSeq, *fQual, *rQual, primerIndex); }else { @@ -1000,6 +1016,58 @@ int MakeContigsCommand::driver(vector files, string outputFasta, string if (currentSeqsDiffs > tdiffs) { trashCode += 't'; } + if (reorient && (trashCode != "")) { //if you failed and want to check the reverse + int thisSuccess = 0; + string thisTrashCode = ""; + int thisCurrentSeqsDiffs = 0; + + int thisBarcodeIndex = 0; + int thisPrimerIndex = 0; + + if(numBarcodes != 0){ + if (thisfqualfile != "") { + if ((thisfindexfile != "") || (thisrindexfile != "")) { + thisSuccess = rtrimOligos->stripBarcode(savedFindex, savedRIndex, *savedFQual, *savedRQual, thisBarcodeIndex); + }else { + thisSuccess = rtrimOligos->stripBarcode(savedFSeq, savedRSeq, *savedFQual, *savedRQual, thisBarcodeIndex); + } + }else { + thisSuccess = rtrimOligos->stripBarcode(savedFSeq, savedRSeq, thisBarcodeIndex); + } + if(thisSuccess > bdiffs) { thisTrashCode += 'b'; } + else{ thisCurrentSeqsDiffs += thisSuccess; } + } + + if(numFPrimers != 0){ + if (thisfqualfile != "") { + thisSuccess = rtrimOligos->stripForward(savedFSeq, savedRSeq, *savedFQual, *savedRQual, thisPrimerIndex); + }else { + thisSuccess = rtrimOligos->stripForward(savedFSeq, savedRSeq, thisPrimerIndex); + } + if(thisSuccess > pdiffs) { thisTrashCode += 'f'; } + else{ thisCurrentSeqsDiffs += thisSuccess; } + } + + if (thisCurrentSeqsDiffs > tdiffs) { thisTrashCode += 't'; } + + if (thisTrashCode == "") { + trashCode = thisTrashCode; + success = thisSuccess; + currentSeqsDiffs = thisCurrentSeqsDiffs; + barcodeIndex = thisBarcodeIndex; + primerIndex = thisPrimerIndex; + savedFSeq.reverseComplement(); + savedRSeq.reverseComplement(); + fSeq.setAligned(savedFSeq.getAligned()); + rSeq.setAligned(savedRSeq.getAligned()); + if(thisfqualfile != ""){ + savedFQual->flipQScores(); savedRQual->flipQScores(); + fQual->setScores(savedFQual->getScores()); rQual->setScores(savedRQual->getScores()); + } + }else { trashCode += "(" + thisTrashCode + ")"; } + } + + //flip the reverse reads rSeq.reverseComplement(); if (thisfqualfile != "") { rQual->flipQScores(); } @@ -1021,7 +1089,7 @@ int MakeContigsCommand::driver(vector files, string outputFasta, string if (thisfqualfile != "") { scores1 = fQual->getQualityScores(); scores2 = rQual->getQualityScores(); - delete fQual; delete rQual; + delete fQual; delete rQual; delete savedFQual; delete savedRQual; } // if (num < 5) { cout << fSeq.getStartPos() << '\t' << fSeq.getEndPos() << '\t' << rSeq.getStartPos() << '\t' << rSeq.getEndPos() << endl; } @@ -1088,18 +1156,7 @@ int MakeContigsCommand::driver(vector files, string outputFasta, string if (m->debug) { m->mothurOut(fSeq.getName()); } if (createOligosGroup) { - if(barcodes.size() != 0){ - string thisGroup = barcodeNameVector[barcodeIndex]; - if (primers.size() != 0) { - if (primerNameVector[primerIndex] != "") { - if(thisGroup != "") { - thisGroup += "." + primerNameVector[primerIndex]; - }else { - thisGroup = primerNameVector[primerIndex]; - } - } - } - + string thisGroup = oligos->getGroupName(barcodeIndex, primerIndex); if (m->debug) { m->mothurOut(", group= " + thisGroup + "\n"); } int pos = thisGroup.find("ignore"); @@ -1110,8 +1167,6 @@ int MakeContigsCommand::driver(vector files, string outputFasta, string if (it == groupCounts.end()) { groupCounts[thisGroup] = 1; } else { groupCounts[it->first] ++; } }else { ignore = true; } - - } }else if (createFileGroup) { int pos = group.find("ignore"); if (pos == string::npos) { @@ -1123,7 +1178,7 @@ int MakeContigsCommand::driver(vector files, string outputFasta, string }else { ignore = true; } } if (m->debug) { m->mothurOut("\n"); } - + if(allFiles && !ignore){ ofstream output; m->openOutputFileAppend(fastaFileNames[barcodeIndex][primerIndex], output); @@ -1159,6 +1214,7 @@ int MakeContigsCommand::driver(vector files, string outputFasta, string inRQual.close(); } delete alignment; + if (reorient) { delete rtrimOligos; } if (m->control_pressed) { m->mothurRemove(outputFasta); m->mothurRemove(outputScrapFasta);m->mothurRemove(outputMisMatches); } @@ -1749,6 +1805,24 @@ vector< vector > MakeContigsCommand::readFileNames(string filename){ if (m->debug) { m->mothurOut("[DEBUG]: group = " + group + ", forward = " + forward + ", reverse = " + reverse + ", forwardIndex = " + findex + ", reverseIndex = " + rindex + ".\n"); } + if (inputDir != "") { + string path = m->hasPath(forward); + if (path == "") { forward = inputDir + forward; } + + path = m->hasPath(reverse); + if (path == "") { reverse = inputDir + reverse; } + + if (findex != "") { + path = m->hasPath(findex); + if (path == "") { findex = inputDir + findex; } + } + + if (rindex != "") { + path = m->hasPath(rindex); + if (path == "") { rindex = inputDir + rindex; } + } + } + //check to make sure both are able to be opened ifstream in2; int openForward = m->openInputFile(forward, in2, "noerror"); @@ -1905,155 +1979,53 @@ vector< vector > MakeContigsCommand::readFileNames(string filename){ //illumina data requires paired forward and reverse data //BARCODE atgcatgc atgcatgc groupName //PRIMER atgcatgc atgcatgc groupName -//PRIMER atgcatgc atgcatgc -bool MakeContigsCommand::getOligos(vector >& fastaFileNames, string rootname){ +//PRIMER atgcatgc atgcatgc +bool MakeContigsCommand::getOligos(vector >& fastaFileNames, string rootname, map& fastaFile2Group){ try { - ifstream in; - m->openInputFile(oligosfile, in); - - ofstream test; - - string type, foligo, roligo, group; + if (m->debug) { m->mothurOut("[DEBUG]: oligosfile = " + oligosfile + "\n"); } - int indexPrimer = 0; - int indexBarcode = 0; - set uniquePrimers; - set uniqueBarcodes; - - while(!in.eof()){ - - in >> type; + bool allBlank = false; + oligos->read(oligosfile, false); + + if (m->control_pressed) { return false; } //error in reading oligos + + if (oligos->hasPairedBarcodes()) { + numFPrimers = oligos->getPairedPrimers().size(); + numBarcodes = oligos->getPairedBarcodes().size(); + }else { + m->mothurOut("[ERROR]: make.contigs requires paired barcodes and primers. You can set one end to NONE if you are using an index file.\n"); m->control_pressed = true; + } - if (m->debug) { m->mothurOut("[DEBUG]: reading type - " + type + ".\n"); } - - if(type[0] == '#'){ - while (!in.eof()) { char c = in.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there - m->gobble(in); - } - else{ - m->gobble(in); - //make type case insensitive - for(int i=0;i> foligo; - - if (m->debug) { m->mothurOut("[DEBUG]: reading - " + foligo + ".\n"); } - - for(int i=0;igobble(in); - - in >> roligo; - - for(int i=0;idebug) { m->mothurOut("[DEBUG]: reading - " + roligo + ".\n"); } - - group = ""; - - // get rest of line in case there is a primer name - while (!in.eof()) { - char c = in.get(); - if (c == 10 || c == 13 || c == -1){ break; } - else if (c == 32 || c == 9){;} //space or tab - else { group += c; } - } - - oligosPair newPrimer(foligo, roligo); - - if (m->debug) { m->mothurOut("[DEBUG]: primer pair " + newPrimer.forward + " " + newPrimer.reverse + ", and group = " + group + ".\n"); } - - //check for repeat barcodes - string tempPair = foligo+roligo; - if (uniquePrimers.count(tempPair) != 0) { m->mothurOut("primer pair " + newPrimer.forward + " " + newPrimer.reverse + " is in your oligos file already."); m->mothurOutEndLine(); } - else { uniquePrimers.insert(tempPair); } - - if (m->debug) { if (group != "") { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); }else{ m->mothurOut("[DEBUG]: no group for primer pair " + newPrimer.forward + " " + newPrimer.reverse + ".\n"); } } - primers[indexPrimer]=newPrimer; indexPrimer++; - primerNameVector.push_back(group); - }else if(type == "BARCODE"){ - m->gobble(in); - - in >> roligo; - - for(int i=0;imothurOut("[ERROR]: barcodes must be paired unless you are using an index file.\n"); m->control_pressed = true; } } - - group = ""; - while (!in.eof()) { - char c = in.get(); - if (c == 10 || c == 13 || c == -1){ break; } - else if (c == 32 || c == 9){;} //space or tab - else { group += c; } - } - - if (m->debug) { m->mothurOut("[DEBUG]: barcode pair " + newPair.forward + " " + newPair.reverse + ", and group = " + group + ".\n"); } - - //check for repeat barcodes - string tempPair = foligo+roligo; - if (uniqueBarcodes.count(tempPair) != 0) { m->mothurOut("barcode pair " + newPair.forward + " " + newPair.reverse + " is in your oligos file already, disregarding."); m->mothurOutEndLine(); } - else { uniqueBarcodes.insert(tempPair); } - - barcodes[indexBarcode]=newPair; indexBarcode++; - barcodeNameVector.push_back(group); - }else if(type == "LINKER"){ - linker.push_back(foligo); - m->mothurOut("[WARNING]: make.contigs is not setup to remove linkers, ignoring.\n"); - }else if(type == "SPACER"){ - spacer.push_back(foligo); - m->mothurOut("[WARNING]: make.contigs is not setup to remove spacers, ignoring.\n"); - } - else{ m->mothurOut("[WARNING]: " + type + " is not recognized as a valid type. Choices are primer, barcode, linker and spacer. Ignoring " + foligo + "."); m->mothurOutEndLine(); } - } - m->gobble(in); - } - in.close(); - - if(barcodeNameVector.size() == 0 && primerNameVector[0] == ""){ allFiles = 0; } - - //add in potential combos - if(barcodeNameVector.size() == 0){ - oligosPair temp("", ""); - barcodes[0] = temp; - barcodeNameVector.push_back(""); - } - - if(primerNameVector.size() == 0){ - oligosPair temp("", ""); - primers[0] = temp; - primerNameVector.push_back(""); - } - - fastaFileNames.resize(barcodeNameVector.size()); + if (m->control_pressed) { return false; } + + numLinkers = oligos->getLinkers().size(); + numSpacers = oligos->getSpacers().size(); + numRPrimers = oligos->getReversePrimers().size(); + if (numLinkers != 0) { m->mothurOut("[WARNING]: make.contigs is not setup to remove linkers, ignoring.\n"); } + if (numSpacers != 0) { m->mothurOut("[WARNING]: make.contigs is not setup to remove spacers, ignoring.\n"); } + + vector groupNames = oligos->getGroupNames(); + if (groupNames.size() == 0) { allFiles = 0; allBlank = true; } + + + fastaFileNames.resize(oligos->getBarcodeNames().size()); for(int i=0;igetPrimerNames().size();j++){ fastaFileNames[i].push_back(""); } } - - if(allFiles){ - set uniqueNames; //used to cleanup outputFileNames - for(map::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){ - for(map::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){ - - string primerName = primerNameVector[itPrimer->first]; - string barcodeName = barcodeNameVector[itBar->first]; + + if (allFiles) { + set uniqueNames; //used to cleanup outputFileNames + map barcodes = oligos->getPairedBarcodes(); + map primers = oligos->getPairedPrimers(); + for(map::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){ + for(map::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){ - if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing - else { + string primerName = oligos->getPrimerName(itPrimer->first); + string barcodeName = oligos->getBarcodeName(itBar->first); + + if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing + else if ((primerName == "") && (barcodeName == "")) { } //do nothing + else { string comboGroupName = ""; string fastaFileName = ""; string qualFileName = ""; @@ -2061,108 +2033,58 @@ bool MakeContigsCommand::getOligos(vector >& fastaFileNames, stri string countFileName = ""; if(primerName == ""){ - comboGroupName = barcodeNameVector[itBar->first]; - } - else{ + comboGroupName = barcodeName; + }else{ if(barcodeName == ""){ - comboGroupName = primerNameVector[itPrimer->first]; + comboGroupName = primerName; } else{ - comboGroupName = barcodeNameVector[itBar->first] + "." + primerNameVector[itPrimer->first]; + comboGroupName = barcodeName + "." + primerName; } } ofstream temp; - fastaFileName = rootname + comboGroupName + ".fasta"; + map variables; + variables["[filename]"] = rootname; + variables["[tag]"] = comboGroupName; + fastaFileName = getOutputFileName("fasta", variables); if (uniqueNames.count(fastaFileName) == 0) { outputNames.push_back(fastaFileName); outputTypes["fasta"].push_back(fastaFileName); uniqueNames.insert(fastaFileName); + fastaFile2Group[fastaFileName] = comboGroupName; } fastaFileNames[itBar->first][itPrimer->first] = fastaFileName; m->openOutputFile(fastaFileName, temp); temp.close(); + cout << fastaFileName << endl; } - } - } - } - - bool allBlank = true; - for (int i = 0; i < barcodeNameVector.size(); i++) { - if (barcodeNameVector[i] != "") { - allBlank = false; - break; - } - } - for (int i = 0; i < primerNameVector.size(); i++) { - if (primerNameVector[i] != "") { - allBlank = false; - break; - } - } - - if (allBlank) { - m->mothurOut("[WARNING]: your oligos file does not contain any group names. mothur will not create a groupfile."); m->mothurOutEndLine(); - allFiles = false; - return false; - } - - return true; - + } + } + } + + if (allBlank) { + m->mothurOut("[WARNING]: your oligos file does not contain any group names. mothur will not create a groupfile."); m->mothurOutEndLine(); + allFiles = false; + return false; + } + + return true; + } catch(exception& e) { m->errorOut(e, "MakeContigsCommand", "getOligos"); exit(1); } } -//********************************************************************/ -string MakeContigsCommand::reverseOligo(string oligo){ - try { - string reverse = ""; - - for(int i=oligo.length()-1;i>=0;i--){ - - if(oligo[i] == 'A') { reverse += 'T'; } - else if(oligo[i] == 'T'){ reverse += 'A'; } - else if(oligo[i] == 'U'){ reverse += 'A'; } - - else if(oligo[i] == 'G'){ reverse += 'C'; } - else if(oligo[i] == 'C'){ reverse += 'G'; } - - else if(oligo[i] == 'R'){ reverse += 'Y'; } - else if(oligo[i] == 'Y'){ reverse += 'R'; } - - else if(oligo[i] == 'M'){ reverse += 'K'; } - else if(oligo[i] == 'K'){ reverse += 'M'; } - - else if(oligo[i] == 'W'){ reverse += 'W'; } - else if(oligo[i] == 'S'){ reverse += 'S'; } - - else if(oligo[i] == 'B'){ reverse += 'V'; } - else if(oligo[i] == 'V'){ reverse += 'B'; } - - else if(oligo[i] == 'D'){ reverse += 'H'; } - else if(oligo[i] == 'H'){ reverse += 'D'; } - - else { reverse += 'N'; } - } - - - return reverse; - } - catch(exception& e) { - m->errorOut(e, "MakeContigsCommand", "reverseOligo"); - exit(1); - } -} //********************************************************************************************************************** vector MakeContigsCommand::convertQual(string qual) { try { vector qualScores; bool negativeScores = false; - for (int i = 0; i < qual.length(); i++) { + for (int i = 0; i < qual.length(); i++) { int temp = 0; temp = int(qual[i]); diff --git a/makecontigscommand.h b/makecontigscommand.h index 1ea3835..67e86bc 100644 --- a/makecontigscommand.h +++ b/makecontigscommand.h @@ -18,6 +18,7 @@ #include "blastalign.hpp" #include "noalign.hpp" #include "trimoligos.h" +#include "oligos.h" struct fastqRead { vector scores; @@ -62,18 +63,12 @@ public: void help() { m->mothurOut(getHelpString()); } private: - bool abort, allFiles, trimOverlap, createFileGroup, createOligosGroup, makeCount, noneOk; - string outputDir, ffastqfile, rfastqfile, align, oligosfile, rfastafile, ffastafile, rqualfile, fqualfile, findexfile, rindexfile, file, format; + bool abort, allFiles, trimOverlap, createFileGroup, createOligosGroup, makeCount, noneOk, reorient; + string outputDir, ffastqfile, rfastqfile, align, oligosfile, rfastafile, ffastafile, rqualfile, fqualfile, findexfile, rindexfile, file, format, inputDir; float match, misMatch, gapOpen, gapExtend; - int processors, longestBase, insert, tdiffs, bdiffs, pdiffs, ldiffs, sdiffs, deltaq; + int processors, longestBase, insert, tdiffs, bdiffs, pdiffs, ldiffs, sdiffs, deltaq, numBarcodes, numFPrimers, numLinkers, numSpacers, numRPrimers; vector outputNames; - - map barcodes; - map primers; - vector linker; - vector spacer; - vector primerNameVector; - vector barcodeNameVector; + Oligos* oligos; vector convertTable; map groupCounts; @@ -89,8 +84,7 @@ private: //bool checkReads(fastqRead&, fastqRead&, string, string); int createProcesses(vector< vector >, string, string, string, vector >, int); int driver(vector, string, string, string, vector >, int, string); - bool getOligos(vector >&, string); - string reverseOligo(string); + bool getOligos(vector >&, string, map&); vector getReads(bool ignoref, bool ignorer, fastqRead forward, fastqRead reverse, map& uniques, bool); vector mergeReads(vector frReads, vector friReads, map& pairUniques); }; @@ -105,22 +99,19 @@ struct contigsData { string outputFasta; string outputScrapFasta; string outputMisMatches; - string align, group; + string align, group, oligosfile; vector files; vector > fastaFileNames; MothurOut* m; float match, misMatch, gapOpen, gapExtend; int count, insert, threadID, pdiffs, bdiffs, tdiffs, deltaq; - bool allFiles, createOligosGroup, createFileGroup, done, trimOverlap; + bool allFiles, createOligosGroup, createFileGroup, done, trimOverlap, reorient; map groupCounts; map groupMap; - vector primerNameVector; - vector barcodeNameVector; - map barcodes; - map primers; + contigsData(){} - contigsData(string g, vector f, string of, string osf, string om, string al, MothurOut* mout, float ma, float misMa, float gapO, float gapE, int thr, int delt, map br, map pr, vector > ffn, vectorbnv, vector pnv, int pdf, int bdf, int tdf, bool cg, bool cfg, bool all, bool to, int tid) { + contigsData(string g, vector f, string of, string osf, string om, string al, MothurOut* mout, float ma, float misMa, float gapO, float gapE, int thr, int delt, vector > ffn, string olig, bool ro, int pdf, int bdf, int tdf, bool cg, bool cfg, bool all, bool to, int tid) { files = f; outputFasta = of; outputMisMatches = om; @@ -135,10 +126,7 @@ struct contigsData { count = 0; outputScrapFasta = osf; fastaFileNames = ffn; - barcodes = br; - primers = pr; - barcodeNameVector = bnv; - primerNameVector = pnv; + oligosfile = olig; pdiffs = pdf; bdiffs = bdf; tdiffs = tdf; @@ -148,6 +136,7 @@ struct contigsData { createFileGroup = cfg; threadID = tid; deltaq = delt; + reorient = ro; done=false; } }; @@ -204,7 +193,17 @@ static DWORD WINAPI MyContigsThreadFunction(LPVOID lpParam){ outMisMatch << "Name\tLength\tOverlap_Length\tOverlap_Start\tOverlap_End\tMisMatches\tNum_Ns\n"; - TrimOligos trimOligos(pDataArray->pdiffs, pDataArray->bdiffs, 0, 0, pDataArray->primers, pDataArray->barcodes); + Oligos oligos; + if (pDataArray->oligosfile != "") { oligos.read(pDataArray->oligosfile); } + int numFPrimers = oligos.getPairedPrimers().size(); + int numBarcodes = oligos.getPairedBarcodes().size(); + + + TrimOligos trimOligos(pDataArray->pdiffs, pDataArray->bdiffs, 0, 0, oligos.getPairedPrimers(), oligos.getPairedBarcodes()); + TrimOligos* rtrimOligos = NULL; + if (pDataArray->reorient) { + rtrimOligos = new TrimOligos(pDataArray->pdiffs, pDataArray->bdiffs, 0, 0, oligos.getReorientedPairedPrimers(), oligos.getReorientedPairedBarcodes()); numBarcodes = oligos.getReorientedPairedBarcodes().size(); + } while ((!inFFasta.eof()) && (!inRFasta.eof())) { @@ -236,8 +235,15 @@ static DWORD WINAPI MyContigsThreadFunction(LPVOID lpParam){ int barcodeIndex = 0; int primerIndex = 0; + Sequence savedFSeq(fSeq.getName(), fSeq.getAligned()); Sequence savedRSeq(rSeq.getName(), rSeq.getAligned()); + Sequence savedFindex(findexBarcode.getName(), findexBarcode.getAligned()); Sequence savedRIndex(rindexBarcode.getName(), rindexBarcode.getAligned()); + QualityScores* savedFQual = NULL; QualityScores* savedRQual = NULL; + if (thisfqualfile != "") { + savedFQual = new QualityScores(fQual->getName(), fQual->getQualityScores()); + savedRQual = new QualityScores(rQual->getName(), rQual->getQualityScores()); + } - if(pDataArray->barcodes.size() != 0){ + if(numBarcodes != 0){ if (thisfqualfile != "") { if ((thisfindexfile != "") || (thisrindexfile != "")) { success = trimOligos.stripBarcode(findexBarcode, rindexBarcode, *fQual, *rQual, barcodeIndex); @@ -251,7 +257,7 @@ static DWORD WINAPI MyContigsThreadFunction(LPVOID lpParam){ else{ currentSeqsDiffs += success; } } - if(pDataArray->primers.size() != 0){ + if(numFPrimers != 0){ if (thisfqualfile != "") { success = trimOligos.stripForward(fSeq, rSeq, *fQual, *rQual, primerIndex); }else { @@ -263,6 +269,57 @@ static DWORD WINAPI MyContigsThreadFunction(LPVOID lpParam){ if (currentSeqsDiffs > pDataArray->tdiffs) { trashCode += 't'; } + if (pDataArray->reorient && (trashCode != "")) { //if you failed and want to check the reverse + int thisSuccess = 0; + string thisTrashCode = ""; + int thisCurrentSeqsDiffs = 0; + + int thisBarcodeIndex = 0; + int thisPrimerIndex = 0; + + if(numBarcodes != 0){ + if (thisfqualfile != "") { + if ((thisfindexfile != "") || (thisrindexfile != "")) { + thisSuccess = rtrimOligos->stripBarcode(savedFindex, savedRIndex, *savedFQual, *savedRQual, thisBarcodeIndex); + }else { + thisSuccess = rtrimOligos->stripBarcode(savedFSeq, savedRSeq, *savedFQual, *savedRQual, thisBarcodeIndex); + } + }else { + thisSuccess = rtrimOligos->stripBarcode(savedFSeq, savedRSeq, thisBarcodeIndex); + } + if(thisSuccess > pDataArray->bdiffs) { thisTrashCode += 'b'; } + else{ thisCurrentSeqsDiffs += thisSuccess; } + } + + if(numFPrimers != 0){ + if (thisfqualfile != "") { + thisSuccess = rtrimOligos->stripForward(savedFSeq, savedRSeq, *savedFQual, *savedRQual, thisPrimerIndex); + }else { + thisSuccess = rtrimOligos->stripForward(savedFSeq, savedRSeq, thisPrimerIndex); + } + if(thisSuccess > pDataArray->pdiffs) { thisTrashCode += 'f'; } + else{ thisCurrentSeqsDiffs += thisSuccess; } + } + + if (thisCurrentSeqsDiffs > pDataArray->tdiffs) { thisTrashCode += 't'; } + + if (thisTrashCode == "") { + trashCode = thisTrashCode; + success = thisSuccess; + currentSeqsDiffs = thisCurrentSeqsDiffs; + barcodeIndex = thisBarcodeIndex; + primerIndex = thisPrimerIndex; + savedFSeq.reverseComplement(); + savedRSeq.reverseComplement(); + fSeq.setAligned(savedFSeq.getAligned()); + rSeq.setAligned(savedRSeq.getAligned()); + if(thisfqualfile != ""){ + savedFQual->flipQScores(); savedRQual->flipQScores(); + fQual->setScores(savedFQual->getScores()); rQual->setScores(savedRQual->getScores()); + } + }else { trashCode += "(" + thisTrashCode + ")"; } + } + //flip the reverse reads rSeq.reverseComplement(); if (thisfqualfile != "") { rQual->flipQScores(); } @@ -284,7 +341,7 @@ static DWORD WINAPI MyContigsThreadFunction(LPVOID lpParam){ if (thisfqualfile != "") { scores1 = fQual->getQualityScores(); scores2 = rQual->getQualityScores(); - delete fQual; delete rQual; + delete fQual; delete rQual; delete savedFQual; delete savedRQual; } int overlapStart = fSeq.getStartPos(); @@ -344,29 +401,17 @@ static DWORD WINAPI MyContigsThreadFunction(LPVOID lpParam){ if(trashCode.length() == 0){ bool ignore = false; if (pDataArray->createOligosGroup) { - if(pDataArray->barcodes.size() != 0){ - string thisGroup = pDataArray->barcodeNameVector[barcodeIndex]; - if (pDataArray->primers.size() != 0) { - if (pDataArray->primerNameVector[primerIndex] != "") { - if(thisGroup != "") { - thisGroup += "." + pDataArray->primerNameVector[primerIndex]; - }else { - thisGroup = pDataArray->primerNameVector[primerIndex]; - } - } - } - - if (pDataArray->m->debug) { pDataArray->m->mothurOut(", group= " + thisGroup + "\n"); } - - int pos = thisGroup.find("ignore"); - if (pos == string::npos) { - pDataArray->groupMap[fSeq.getName()] = thisGroup; + string thisGroup = oligos.getGroupName(barcodeIndex, primerIndex); + if (pDataArray->m->debug) { pDataArray->m->mothurOut(", group= " + thisGroup + "\n"); } + + int pos = thisGroup.find("ignore"); + if (pos == string::npos) { + pDataArray->groupMap[fSeq.getName()] = thisGroup; - map::iterator it = pDataArray->groupCounts.find(thisGroup); - if (it == pDataArray->groupCounts.end()) { pDataArray->groupCounts[thisGroup] = 1; } - else { pDataArray->groupCounts[it->first] ++; } - }else { ignore = true; } - } + map::iterator it = pDataArray->groupCounts.find(thisGroup); + if (it == pDataArray->groupCounts.end()) { pDataArray->groupCounts[thisGroup] = 1; } + else { pDataArray->groupCounts[it->first] ++; } + }else { ignore = true; } }else if (pDataArray->createFileGroup) { int pos = pDataArray->group.find("ignore"); if (pos == string::npos) { @@ -414,6 +459,7 @@ static DWORD WINAPI MyContigsThreadFunction(LPVOID lpParam){ inRQual.close(); } delete alignment; + if (pDataArray->reorient) { delete rtrimOligos; } pDataArray->done = true; if (pDataArray->m->control_pressed) { pDataArray->m->mothurRemove(pDataArray->outputFasta); pDataArray->m->mothurRemove(pDataArray->outputMisMatches); pDataArray->m->mothurRemove(pDataArray->outputScrapFasta); } diff --git a/mothur.h b/mothur.h index 102b44d..b4fc3ce 100644 --- a/mothur.h +++ b/mothur.h @@ -210,6 +210,16 @@ struct distlinePair { int end; }; +/************************************************************/ +struct oligosPair { + string forward; + string reverse; + + oligosPair() { forward = ""; reverse = ""; } + oligosPair(string f, string r) : forward(f), reverse(r) {} + ~oligosPair() {} +}; + /************************************************************/ struct seqPriorityNode { int numIdentical; diff --git a/mothurout.cpp b/mothurout.cpp index baa7710..8143687 100644 --- a/mothurout.cpp +++ b/mothurout.cpp @@ -4157,6 +4157,7 @@ double MothurOut::min(vector& featureVector){ exit(1); } } + /**************************************************************************************************/ diff --git a/mothurout.h b/mothurout.h index a57fb13..0f34d02 100644 --- a/mothurout.h +++ b/mothurout.h @@ -166,6 +166,7 @@ class MothurOut { bool isSubset(vector, vector); //bigSet, subset int checkName(string&); map > parseClasses(string); + //math operation double max(vector&); //returns largest value in vector diff --git a/oligos.cpp b/oligos.cpp new file mode 100644 index 0000000..f058ad8 --- /dev/null +++ b/oligos.cpp @@ -0,0 +1,556 @@ +// +// oligos.cpp +// Mothur +// +// Created by Sarah Westcott on 4/4/14. +// Copyright (c) 2014 Schloss Lab. All rights reserved. +// + +#include "oligos.h" + +/**************************************************************************************************/ + +Oligos::Oligos(string o){ + try { + m = MothurOut::getInstance(); + hasPPrimers = false; hasPBarcodes = false; pairedOligos = false; reversePairs = true; + indexBarcode = 0; indexPairedBarcode = 0; indexPrimer = 0; indexPairedPrimer = 0; + oligosfile = o; + readOligos(); + if (pairedOligos) { + numBarcodes = pairedBarcodes.size(); + numFPrimers = pairedPrimers.size(); + }else { + numBarcodes = barcodes.size(); + numFPrimers = primers.size(); + } + } + catch(exception& e) { + m->errorOut(e, "Oligos", "Oligos"); + exit(1); + } +} +/**************************************************************************************************/ + +Oligos::Oligos(){ + try { + m = MothurOut::getInstance(); + hasPPrimers = false; hasPBarcodes = false; pairedOligos = false; reversePairs = true; + indexBarcode = 0; indexPairedBarcode = 0; indexPrimer = 0; indexPairedPrimer = 0; + numFPrimers = 0; numBarcodes = 0; + } + catch(exception& e) { + m->errorOut(e, "Oligos", "Oligos"); + exit(1); + } +} +/**************************************************************************************************/ +int Oligos::read(string o){ + try { + oligosfile = o; + readOligos(); + if (pairedOligos) { + numBarcodes = pairedBarcodes.size(); + numFPrimers = pairedPrimers.size(); + }else { + numBarcodes = barcodes.size(); + numFPrimers = primers.size(); + } + return 0; + } + catch(exception& e) { + m->errorOut(e, "Oligos", "read"); + exit(1); + } +} +/**************************************************************************************************/ +int Oligos::read(string o, bool reverse){ + try { + oligosfile = o; + reversePairs = reverse; + readOligos(); + if (pairedOligos) { + numBarcodes = pairedBarcodes.size(); + numFPrimers = pairedPrimers.size(); + }else { + numBarcodes = barcodes.size(); + numFPrimers = primers.size(); + } + return 0; + } + catch(exception& e) { + m->errorOut(e, "Oligos", "read"); + exit(1); + } +} +//*************************************************************************************************************** + +int Oligos::readOligos(){ + try { + ifstream inOligos; + m->openInputFile(oligosfile, inOligos); + + string type, oligo, roligo, group; + + while(!inOligos.eof()){ + + inOligos >> type; + + if (m->debug) { m->mothurOut("[DEBUG]: reading type - " + type + ".\n"); } + + if(type[0] == '#'){ + while (!inOligos.eof()) { char c = inOligos.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there + m->gobble(inOligos); + } + else{ + m->gobble(inOligos); + //make type case insensitive + for(int i=0;i> oligo; + + if (m->debug) { m->mothurOut("[DEBUG]: reading - " + oligo + ".\n"); } + + for(int i=0;i::iterator itPrime = primers.find(oligo); + if (itPrime != primers.end()) { m->mothurOut("[WARNING]: primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); } + + if (m->debug) { if (group != "") { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); }else{ m->mothurOut("[DEBUG]: no group for primer " + oligo + ".\n"); } } + + primers[oligo]=indexPrimer; indexPrimer++; + primerNameVector.push_back(group); + } + else if (type == "PRIMER"){ + m->gobble(inOligos); + + inOligos >> roligo; + + for(int i=0;idebug) { m->mothurOut("[DEBUG]: primer pair " + newPrimer.forward + " " + newPrimer.reverse + ", and group = " + group + ".\n"); } + + //check for repeat barcodes + string tempPair = oligo+roligo; + if (uniquePrimers.count(tempPair) != 0) { m->mothurOut("primer pair " + newPrimer.forward + " " + newPrimer.reverse + " is in your oligos file already."); m->mothurOutEndLine(); } + else { uniquePrimers.insert(tempPair); } + + if (m->debug) { if (group != "") { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); }else{ m->mothurOut("[DEBUG]: no group for primer pair " + newPrimer.forward + " " + newPrimer.reverse + ".\n"); } } + + pairedPrimers[indexPairedPrimer]=newPrimer; indexPairedPrimer++; + primerNameVector.push_back(group); + hasPPrimers = true; + } + else if(type == "REVERSE"){ + string oligoRC = reverseOligo(oligo); + revPrimer.push_back(oligoRC); + } + else if(type == "BARCODE"){ + inOligos >> group; + + //barcode lines can look like BARCODE atgcatgc groupName - for 454 seqs + //or BARCODE atgcatgc atgcatgc groupName - for illumina data that has forward and reverse info + + string temp = ""; + while (!inOligos.eof()) { + char c = inOligos.get(); + if (c == 10 || c == 13 || c == -1){ break; } + else if (c == 32 || c == 9){;} //space or tab + else { temp += c; } + } + + //then this is illumina data with 4 columns + if (temp != "") { + hasPBarcodes = true; + string reverseBarcode = group; //reverseOligo(group); //reverse barcode + group = temp; + + for(int i=0;idebug) { m->mothurOut("[DEBUG]: barcode pair " + newPair.forward + " " + newPair.reverse + ", and group = " + group + ".\n"); } + + //check for repeat barcodes + string tempPair = oligo+reverseBarcode; + if (uniqueBarcodes.count(tempPair) != 0) { m->mothurOut("barcode pair " + newPair.forward + " " + newPair.reverse + " is in your oligos file already, disregarding."); m->mothurOutEndLine(); } + else { uniqueBarcodes.insert(tempPair); } + + pairedBarcodes[indexPairedBarcode]=newPair; indexPairedBarcode++; + barcodeNameVector.push_back(group); + }else { + //check for repeat barcodes + map::iterator itBar = barcodes.find(oligo); + if (itBar != barcodes.end()) { m->mothurOut("[WARNING]: barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); } + + barcodes[oligo]=indexBarcode; indexBarcode++; + barcodeNameVector.push_back(group); + } + }else if(type == "LINKER"){ + linker.push_back(oligo); + }else if(type == "SPACER"){ + spacer.push_back(oligo); + } + else{ m->mothurOut("[WARNING]: " + type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); } + } + m->gobble(inOligos); + } + inOligos.close(); + + if (hasPBarcodes || hasPPrimers) { + pairedOligos = true; + if ((primers.size() != 0) || (barcodes.size() != 0) || (linker.size() != 0) || (spacer.size() != 0) || (revPrimer.size() != 0)) { m->control_pressed = true; m->mothurOut("[ERROR]: cannot mix paired primers and barcodes with non paired or linkers and spacers, quitting."); m->mothurOutEndLine(); return 0; } + } + + + //add in potential combos + if(barcodeNameVector.size() == 0){ + if (pairedOligos) { + oligosPair newPair("", ""); + pairedBarcodes[0] = newPair; + }else { + barcodes[""] = 0; + } + barcodeNameVector.push_back(""); + } + + if(primerNameVector.size() == 0){ + if (pairedOligos) { + oligosPair newPair("", ""); + pairedPrimers[0] = newPair; + }else { + primers[""] = 0; + } + primerNameVector.push_back(""); + } + + + if (pairedOligos) { + for(map::iterator itBar = pairedBarcodes.begin();itBar != pairedBarcodes.end();itBar++){ + for(map::iterator itPrimer = pairedPrimers.begin();itPrimer != pairedPrimers.end(); itPrimer++){ + + string primerName = primerNameVector[itPrimer->first]; + string barcodeName = barcodeNameVector[itBar->first]; + + if (m->debug) { m->mothurOut("[DEBUG]: primerName = " + primerName + " barcodeName = " + barcodeName + "\n"); } + + if ((primerName == "ignore") || (barcodeName == "ignore")) { if (m->debug) { m->mothurOut("[DEBUG]: in ignore. \n"); } } //do nothing + else if ((primerName == "") && (barcodeName == "")) { if (m->debug) { m->mothurOut("[DEBUG]: in blank. \n"); } } //do nothing + else { + string comboGroupName = ""; + string fastqFileName = ""; + + if(primerName == ""){ + comboGroupName = barcodeNameVector[itBar->first]; + } + else{ + if(barcodeName == ""){ + comboGroupName = primerNameVector[itPrimer->first]; + } + else{ + comboGroupName = barcodeNameVector[itBar->first] + "." + primerNameVector[itPrimer->first]; + } + } + + if (m->debug) { m->mothurOut("[DEBUG]: comboGroupName = " + comboGroupName + "\n"); } + + uniqueNames.insert(comboGroupName); + + map >::iterator itGroup2Barcode = Group2Barcode.find(comboGroupName); + if (itGroup2Barcode == Group2Barcode.end()) { + vector tempBarcodes; tempBarcodes.push_back((itBar->second).forward+"."+(itBar->second).reverse); + Group2Barcode[comboGroupName] = tempBarcodes; + }else { + Group2Barcode[comboGroupName].push_back((itBar->second).forward+"."+(itBar->second).reverse); + } + + itGroup2Barcode = Group2Primer.find(comboGroupName); + if (itGroup2Barcode == Group2Primer.end()) { + vector tempPrimers; tempPrimers.push_back((itPrimer->second).forward+"."+(itPrimer->second).reverse); + Group2Primer[comboGroupName] = tempPrimers; + }else { + Group2Primer[comboGroupName].push_back((itPrimer->second).forward+"."+(itPrimer->second).reverse); + } + } + } + } + }else { + for(map::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){ + for(map::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){ + + string primerName = primerNameVector[itPrimer->second]; + string barcodeName = barcodeNameVector[itBar->second]; + + if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing + else if ((primerName == "") && (barcodeName == "")) { } //do nothing + else { + string comboGroupName = ""; + string fastqFileName = ""; + + if(primerName == ""){ + comboGroupName = barcodeNameVector[itBar->second]; + } + else{ + if(barcodeName == ""){ + comboGroupName = primerNameVector[itPrimer->second]; + } + else{ + comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second]; + } + } + uniqueNames.insert(comboGroupName); + + map >::iterator itGroup2Barcode = Group2Barcode.find(comboGroupName); + if (itGroup2Barcode == Group2Barcode.end()) { + vector tempBarcodes; tempBarcodes.push_back(itBar->first); + Group2Barcode[comboGroupName] = tempBarcodes; + }else { + Group2Barcode[comboGroupName].push_back(itBar->first); + } + + itGroup2Barcode = Group2Primer.find(comboGroupName); + if (itGroup2Barcode == Group2Primer.end()) { + vector tempPrimers; tempPrimers.push_back(itPrimer->first); + Group2Primer[comboGroupName] = tempPrimers; + }else { + Group2Primer[comboGroupName].push_back(itPrimer->first); + } + } + } + } + } + + + if (m->debug) { int count = 0; for (set::iterator it = uniqueNames.begin(); it != uniqueNames.end(); it++) { m->mothurOut("[DEBUG]: " + toString(count) + " groupName = " + *it + "\n"); count++; } } + + + Groups.clear(); + for (set::iterator it = uniqueNames.begin(); it != uniqueNames.end(); it++) { Groups.push_back(*it); } + + return 0; + } + catch(exception& e) { + m->errorOut(e, "Oligos", "readOligos"); + exit(1); + } +} +//********************************************************************/ +vector Oligos::getBarcodes(string groupName){ + try { + vector thisGroupsBarcodes; + + map >::iterator it = Group2Barcode.find(groupName); + + if (it == Group2Barcode.end()) { m->mothurOut("[ERROR]: no barcodes found for group " + groupName + ".\n"); m->control_pressed=true; + }else { thisGroupsBarcodes = it->second; } + + return thisGroupsBarcodes; + } + catch(exception& e) { + m->errorOut(e, "Oligos", "getBarcodes"); + exit(1); + } +} +//********************************************************************/ +vector Oligos::getPrimers(string groupName){ + try { + vector thisGroupsPrimers; + + map >::iterator it = Group2Primer.find(groupName); + + if (it == Group2Primer.end()) { m->mothurOut("[ERROR]: no primers found for group " + groupName + ".\n"); m->control_pressed=true; + }else { thisGroupsPrimers = it->second; } + + return thisGroupsPrimers; + } + catch(exception& e) { + m->errorOut(e, "Oligos", "getPrimers"); + exit(1); + } +} +//********************************************************************/ +//can't have paired and unpaired so this function will either run the paired map or the unpaired +map Oligos::getReorientedPairedPrimers(){ + try { + map rpairedPrimers; + + for (map::iterator it = pairedPrimers.begin(); it != pairedPrimers.end(); it++) { + string forward = (it->second).reverse; + if (reversePairs) { forward = reverseOligo(forward); } + string reverse = (it->second).forward; + if (reversePairs) { reverse = reverseOligo(reverse); } + oligosPair tempPair(forward, reverse); //reversePrimer, rc ForwardPrimer + rpairedPrimers[it->first] = tempPair; + } + + + for (map::iterator it = primers.begin(); it != primers.end(); it++) { + oligosPair tempPair("", reverseOligo((it->first))); //reverseBarcode, rc ForwardBarcode + rpairedPrimers[it->second] = tempPair; + } + + return rpairedPrimers; + } + catch(exception& e) { + m->errorOut(e, "Oligos", "getReorientedPairedPrimers"); + exit(1); + } +} +//********************************************************************/ +//can't have paired and unpaired so this function will either run the paired map or the unpaired +map Oligos::getReorientedPairedBarcodes(){ + try { + map rpairedBarcodes; + + for (map::iterator it = pairedBarcodes.begin(); it != pairedBarcodes.end(); it++) { + string forward = (it->second).reverse; + if (reversePairs) { forward = reverseOligo(forward); } + string reverse = (it->second).forward; + if (reversePairs) { reverse = reverseOligo(reverse); } + oligosPair tempPair(forward, reverse); //reversePrimer, rc ForwardPrimer + rpairedBarcodes[it->first] = tempPair; + } + + for (map::iterator it = barcodes.begin(); it != barcodes.end(); it++) { + oligosPair tempPair("", reverseOligo((it->first))); //reverseBarcode, rc ForwardBarcode + rpairedBarcodes[it->second] = tempPair; + } + + return rpairedBarcodes; + } + catch(exception& e) { + m->errorOut(e, "Oligos", "getReorientedPairedBarcodes"); + exit(1); + } +} + +//********************************************************************/ +string Oligos::reverseOligo(string oligo){ + try { + string reverse = ""; + + for(int i=oligo.length()-1;i>=0;i--){ + + if(oligo[i] == 'A') { reverse += 'T'; } + else if(oligo[i] == 'T'){ reverse += 'A'; } + else if(oligo[i] == 'U'){ reverse += 'A'; } + + else if(oligo[i] == 'G'){ reverse += 'C'; } + else if(oligo[i] == 'C'){ reverse += 'G'; } + + else if(oligo[i] == 'R'){ reverse += 'Y'; } + else if(oligo[i] == 'Y'){ reverse += 'R'; } + + else if(oligo[i] == 'M'){ reverse += 'K'; } + else if(oligo[i] == 'K'){ reverse += 'M'; } + + else if(oligo[i] == 'W'){ reverse += 'W'; } + else if(oligo[i] == 'S'){ reverse += 'S'; } + + else if(oligo[i] == 'B'){ reverse += 'V'; } + else if(oligo[i] == 'V'){ reverse += 'B'; } + + else if(oligo[i] == 'D'){ reverse += 'H'; } + else if(oligo[i] == 'H'){ reverse += 'D'; } + + else { reverse += 'N'; } + } + + + return reverse; + } + catch(exception& e) { + m->errorOut(e, "Oligos", "reverseOligo"); + exit(1); + } +} +//********************************************************************/ +string Oligos::getBarcodeName(int index){ + try { + string name = ""; + + if ((index >= 0) && (index < barcodeNameVector.size())) { name = barcodeNameVector[index]; } + + return name; + } + catch(exception& e) { + m->errorOut(e, "Oligos", "getBarcodeName"); + exit(1); + } +} +//********************************************************************/ +string Oligos::getPrimerName(int index){ + try { + string name = ""; + + if ((index >= 0) && (index < primerNameVector.size())) { name = primerNameVector[index]; } + + return name; + } + catch(exception& e) { + m->errorOut(e, "Oligos", "getPrimerName"); + exit(1); + } +} +//********************************************************************/ +string Oligos::getGroupName(int barcodeIndex, int primerIndex){ + try { + + string thisGroup = ""; + if(numBarcodes != 0){ + thisGroup = getBarcodeName(barcodeIndex); + if (numFPrimers != 0) { + if (getPrimerName(primerIndex) != "") { + if(thisGroup != "") { + thisGroup += "." + getPrimerName(primerIndex); + }else { + thisGroup = getPrimerName(primerIndex); + } + } + } + } + + return thisGroup; + } + catch(exception& e) { + m->errorOut(e, "Oligos", "getGroupName"); + exit(1); + } +} + +/**************************************************************************************************/ + diff --git a/oligos.h b/oligos.h new file mode 100644 index 0000000..d5bc303 --- /dev/null +++ b/oligos.h @@ -0,0 +1,92 @@ +// +// oligos.h +// Mothur +// +// Created by Sarah Westcott on 4/4/14. +// Copyright (c) 2014 Schloss Lab. All rights reserved. +// + +#ifndef Mothur_oligos_h +#define Mothur_oligos_h + + +#include "mothurout.h" + + +/**************************************************************************************************/ + +class Oligos { + +public: + Oligos(string); + Oligos(); + ~Oligos() {} + + int read(string); + int read(string, bool); //read without reversing the paired barcodes, for make.contigs. + bool hasPairedPrimers() { return hasPPrimers; } + bool hasPairedBarcodes() { return hasPBarcodes; } + + //for processing with trimOligos class + map getPairedPrimers() { return pairedPrimers; } + map getPairedBarcodes() { return pairedBarcodes; } + map getPrimers() { return primers; } + map getBarcodes() { return barcodes; } + + map getReorientedPairedPrimers(); + map getReorientedPairedBarcodes(); + map getReorientedPrimers(); + map getReorientedBarcodes(); + + + vector getLinkers() { return linker; } + vector getSpacers() { return spacer; } + vector getReversePrimers() { return revPrimer; } + vector getPrimerNames() { return primerNameVector; } + vector getBarcodeNames() { return barcodeNameVector; } + vector getGroupNames() { return Groups; } + + + //for printing and other formatting uses + vector getBarcodes(string); //get barcodes for a group. For paired barcodes will return forward.reverse + vector getPrimers(string); //get primers for a group. For paired primers will return forward.reverse + string getBarcodeName(int); + string getPrimerName(int); + string getGroupName(int, int); + + +private: + + set uniqueNames; + vector Groups; + vector revPrimer; + map > Group2Barcode; + map > Group2Primer; + map pairedBarcodes; + map pairedPrimers; + map primers; + map barcodes; + vector linker; + vector spacer; + vector primerNameVector; + vector barcodeNameVector; + bool hasPPrimers, hasPBarcodes, pairedOligos, reversePairs; + string oligosfile; + int numBarcodes, numFPrimers; + MothurOut* m; + + int indexPrimer; + int indexBarcode; + int indexPairedPrimer; + int indexPairedBarcode; + set uniquePrimers; + set uniqueBarcodes; + + int readOligos(); + string reverseOligo(string); +}; + +/**************************************************************************************************/ + + +#endif diff --git a/parsefastaqcommand.cpp b/parsefastaqcommand.cpp index 2f6f5bb..7202efd 100644 --- a/parsefastaqcommand.cpp +++ b/parsefastaqcommand.cpp @@ -16,6 +16,7 @@ vector ParseFastaQCommand::setParameters(){ CommandParameter pfastq("fastq", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(pfastq); CommandParameter poligos("oligos", "InputTypes", "", "", "oligosGroup", "none", "none","",false,false); parameters.push_back(poligos); CommandParameter pgroup("group", "InputTypes", "", "", "oligosGroup", "none", "none","",false,false); parameters.push_back(pgroup); + CommandParameter preorient("checkorient", "Boolean", "", "F", "", "", "","",false,false,true); parameters.push_back(preorient); CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ppdiffs); CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pbdiffs); CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pldiffs); @@ -51,6 +52,7 @@ string ParseFastaQCommand::getHelpString(){ helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n"; helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n"; helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n"; + helpString += "The checkorient parameter will check look for the reverse compliment of the barcode or primer in the sequence. If found the sequence is flipped. The default is false.\n"; helpString += "The format parameter is used to indicate whether your sequences are sanger, solexa, illumina1.8+ or illumina, default=sanger.\n"; helpString += "The fasta parameter allows you to indicate whether you want a fasta file generated. Default=T.\n"; helpString += "The qfile parameter allows you to indicate whether you want a quality file generated. Default=T.\n"; @@ -211,6 +213,9 @@ ParseFastaQCommand::ParseFastaQCommand(string option){ } if ((!fasta) && (!qual)) { m->mothurOut("[ERROR]: no outputs selected. Aborting."); m->mothurOutEndLine(); abort=true; } + + temp = validParameter.validFile(parameters, "checkorient", false); if (temp == "not found") { temp = "F"; } + reorient = m->isTrue(temp); } } @@ -235,14 +240,17 @@ int ParseFastaQCommand::execute(){ if (fasta) { m->openOutputFile(fastaFile, outFasta); outputNames.push_back(fastaFile); outputTypes["fasta"].push_back(fastaFile); } if (qual) { m->openOutputFile(qualFile, outQual); outputNames.push_back(qualFile); outputTypes["qfile"].push_back(qualFile); } - TrimOligos* trimOligos = NULL; - int numBarcodes, numPrimers; numBarcodes = 0; numPrimers = 0; + TrimOligos* trimOligos = NULL; TrimOligos* rtrimOligos = NULL; + pairedOligos = false; numBarcodes = 0; numPrimers= 0; numLinkers= 0; numSpacers = 0; numRPrimers = 0; if (oligosfile != "") { readOligos(oligosfile); - numPrimers = primers.size(); numBarcodes = barcodes.size(); //find group read belongs to - if (pairedOligos) { trimOligos = new TrimOligos(pdiffs, bdiffs, 0, 0, pairedPrimers, pairedBarcodes); numBarcodes = pairedBarcodes.size(); numPrimers = pairedPrimers.size(); } - else { trimOligos = new TrimOligos(pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, revPrimer, linker, spacer); } + if (pairedOligos) { trimOligos = new TrimOligos(pdiffs, bdiffs, 0, 0, oligos.getPairedPrimers(), oligos.getPairedBarcodes()); numBarcodes = oligos.getPairedBarcodes().size(); numPrimers = oligos.getPairedPrimers().size(); } + else { trimOligos = new TrimOligos(pdiffs, bdiffs, ldiffs, sdiffs, oligos.getPrimers(), oligos.getBarcodes(), oligos.getReversePrimers(), oligos.getLinkers(), oligos.getSpacers()); numPrimers = oligos.getPrimers().size(); numBarcodes = oligos.getBarcodes().size(); } + + if (reorient) { + rtrimOligos = new TrimOligos(pdiffs, bdiffs, 0, 0, oligos.getReorientedPairedPrimers(), oligos.getReorientedPairedBarcodes()); numBarcodes = oligos.getReorientedPairedBarcodes().size(); + } } else if (groupfile != "") { readGroup(groupfile); } @@ -290,7 +298,7 @@ int ParseFastaQCommand::execute(){ if (split > 1) { int barcodeIndex, primerIndex, trashCodeLength; - if (oligosfile != "") { trashCodeLength = findGroup(thisRead, barcodeIndex, primerIndex, trimOligos, numBarcodes, numPrimers); } + if (oligosfile != "") { trashCodeLength = findGroup(thisRead, barcodeIndex, primerIndex, trimOligos, rtrimOligos, numBarcodes, numPrimers); } else if (groupfile != "") { trashCodeLength = findGroup(thisRead, barcodeIndex, primerIndex, "groupMode"); } else { m->mothurOut("[ERROR]: uh oh, we shouldn't be here...\n"); } @@ -322,7 +330,7 @@ int ParseFastaQCommand::execute(){ if (split > 1) { if (groupfile != "") { delete groupMap; } - else if (oligosfile != "") { delete trimOligos; } + else if (oligosfile != "") { delete trimOligos; if (reorient) { delete rtrimOligos; } } map::iterator it; set namesToRemove; @@ -460,7 +468,7 @@ vector ParseFastaQCommand::convertQual(string qual) { } } //********************************************************************************************************************** -int ParseFastaQCommand::findGroup(fastqRead2 thisRead, int& barcode, int& primer, TrimOligos*& trimOligos, int numBarcodes, int numPrimers) { +int ParseFastaQCommand::findGroup(fastqRead2 thisRead, int& barcode, int& primer, TrimOligos*& trimOligos, TrimOligos*& rtrimOligos, int numBarcodes, int numPrimers) { try { int success = 1; string trashCode = ""; @@ -469,7 +477,11 @@ int ParseFastaQCommand::findGroup(fastqRead2 thisRead, int& barcode, int& primer Sequence currSeq(thisRead.seq.getName(), thisRead.seq.getAligned()); QualityScores currQual; currQual.setScores(convertQual(thisRead.quality)); - if(linker.size() != 0){ + //for reorient + Sequence savedSeq(currSeq.getName(), currSeq.getAligned()); + QualityScores savedQual(currQual.getName(), currQual.getScores()); + + if(numLinkers != 0){ success = trimOligos->stripLinker(currSeq, currQual); if(success > ldiffs) { trashCode += 'k'; } else{ currentSeqsDiffs += success; } @@ -482,7 +494,7 @@ int ParseFastaQCommand::findGroup(fastqRead2 thisRead, int& barcode, int& primer else{ currentSeqsDiffs += success; } } - if(spacer.size() != 0){ + if(numSpacers != 0){ success = trimOligos->stripSpacer(currSeq, currQual); if(success > sdiffs) { trashCode += 's'; } else{ currentSeqsDiffs += success; } @@ -497,27 +509,49 @@ int ParseFastaQCommand::findGroup(fastqRead2 thisRead, int& barcode, int& primer if (currentSeqsDiffs > tdiffs) { trashCode += 't'; } - if(revPrimer.size() != 0){ + if(numRPrimers != 0){ success = trimOligos->stripReverse(currSeq, currQual); if(!success) { trashCode += 'r'; } } - if (trashCode.length() == 0) { //is this sequence in the ignore group - string thisGroup = ""; + if (reorient && (trashCode != "")) { //if you failed and want to check the reverse + int thisSuccess = 0; + string thisTrashCode = ""; + int thisCurrentSeqsDiffs = 0; - if(barcodes.size() != 0){ - thisGroup = barcodeNameVector[barcode]; - if (numPrimers != 0) { - if (primerNameVector[primer] != "") { - if(thisGroup != "") { - thisGroup += "." + primerNameVector[primer]; - }else { - thisGroup = primerNameVector[primer]; - } - } - } + int thisBarcodeIndex = 0; + int thisPrimerIndex = 0; + //cout << currSeq.getName() << '\t' << savedSeq.getUnaligned() << endl; + if(numBarcodes != 0){ + thisSuccess = rtrimOligos->stripBarcode(savedSeq, savedQual, thisBarcodeIndex); + if(thisSuccess > bdiffs) { thisTrashCode += "b"; } + else{ thisCurrentSeqsDiffs += thisSuccess; } + } + //cout << currSeq.getName() << '\t' << savedSeq.getUnaligned() << endl; + if(numPrimers != 0){ + thisSuccess = rtrimOligos->stripForward(savedSeq, savedQual, thisPrimerIndex, true); + if(thisSuccess > pdiffs) { thisTrashCode += "f"; } + else{ thisCurrentSeqsDiffs += thisSuccess; } } + if (thisCurrentSeqsDiffs > tdiffs) { thisTrashCode += 't'; } + + if (thisTrashCode == "") { + trashCode = thisTrashCode; + success = thisSuccess; + currentSeqsDiffs = thisCurrentSeqsDiffs; + barcode = thisBarcodeIndex; + primer = thisPrimerIndex; + savedSeq.reverseComplement(); + currSeq.setAligned(savedSeq.getAligned()); + savedQual.flipQScores(); + currQual.setScores(savedQual.getScores()); + }else { trashCode += "(" + thisTrashCode + ")"; } + } + + if (trashCode.length() == 0) { //is this sequence in the ignore group + string thisGroup = oligos.getGroupName(barcode, primer); + int pos = thisGroup.find("ignore"); if (pos != string::npos) { trashCode += "i"; } } @@ -538,13 +572,7 @@ int ParseFastaQCommand::findGroup(fastqRead2 thisRead, int& barcode, int& primer string group = groupMap->getGroup(thisRead.seq.getName()); if (group == "not found") { trashCode += "g"; } //scrap for group - else { //find file group - map::iterator it = barcodes.find(group); - if (it != barcodes.end()) { - barcode = it->second; - }else { trashCode += "g"; } - } - + return trashCode.length(); } catch(exception& e) { @@ -556,276 +584,139 @@ int ParseFastaQCommand::findGroup(fastqRead2 thisRead, int& barcode, int& primer bool ParseFastaQCommand::readOligos(string oligoFile){ try { - ifstream inOligos; - m->openInputFile(oligoFile, inOligos); - - string type, oligo, roligo, group; - bool hasPrimer = false; bool hasPairedBarcodes = false; pairedOligos = false; - - int indexPrimer = 0; - int indexBarcode = 0; - int indexPairedPrimer = 0; - int indexPairedBarcode = 0; - set uniquePrimers; - set uniqueBarcodes; - - while(!inOligos.eof()){ - - inOligos >> type; - - if (m->debug) { m->mothurOut("[DEBUG]: reading type - " + type + ".\n"); } - - if(type[0] == '#'){ - while (!inOligos.eof()) { char c = inOligos.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there - m->gobble(inOligos); - } - else{ - m->gobble(inOligos); - //make type case insensitive - for(int i=0;i> oligo; - - if (m->debug) { m->mothurOut("[DEBUG]: reading - " + oligo + ".\n"); } - - for(int i=0;i::iterator itPrime = primers.find(oligo); - if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); } - - if (m->debug) { if (group != "") { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); }else{ m->mothurOut("[DEBUG]: no group for primer " + oligo + ".\n"); } } - - primers[oligo]=indexPrimer; indexPrimer++; - primerNameVector.push_back(group); - } - else if (type == "PRIMER"){ - m->gobble(inOligos); - - inOligos >> roligo; - - for(int i=0;idebug) { m->mothurOut("[DEBUG]: primer pair " + newPrimer.forward + " " + newPrimer.reverse + ", and group = " + group + ".\n"); } - - //check for repeat barcodes - string tempPair = oligo+roligo; - if (uniquePrimers.count(tempPair) != 0) { m->mothurOut("primer pair " + newPrimer.forward + " " + newPrimer.reverse + " is in your oligos file already."); m->mothurOutEndLine(); } - else { uniquePrimers.insert(tempPair); } - - if (m->debug) { if (group != "") { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); }else{ m->mothurOut("[DEBUG]: no group for primer pair " + newPrimer.forward + " " + newPrimer.reverse + ".\n"); } } - - pairedPrimers[indexPairedPrimer]=newPrimer; indexPairedPrimer++; - primerNameVector.push_back(group); - hasPrimer = true; - } - else if(type == "REVERSE"){ - //Sequence oligoRC("reverse", oligo); - //oligoRC.reverseComplement(); - string oligoRC = reverseOligo(oligo); - revPrimer.push_back(oligoRC); - } - else if(type == "BARCODE"){ - inOligos >> group; - - //barcode lines can look like BARCODE atgcatgc groupName - for 454 seqs - //or BARCODE atgcatgc atgcatgc groupName - for illumina data that has forward and reverse info - - string temp = ""; - while (!inOligos.eof()) { - char c = inOligos.get(); - if (c == 10 || c == 13 || c == -1){ break; } - else if (c == 32 || c == 9){;} //space or tab - else { temp += c; } - } - - //then this is illumina data with 4 columns - if (temp != "") { - hasPairedBarcodes = true; - string reverseBarcode = group; //reverseOligo(group); //reverse barcode - group = temp; - - for(int i=0;idebug) { m->mothurOut("[DEBUG]: barcode pair " + newPair.forward + " " + newPair.reverse + ", and group = " + group + ".\n"); } - //check for repeat barcodes - string tempPair = oligo+reverseBarcode; - if (uniqueBarcodes.count(tempPair) != 0) { m->mothurOut("barcode pair " + newPair.forward + " " + newPair.reverse + " is in your oligos file already, disregarding."); m->mothurOutEndLine(); } - else { uniqueBarcodes.insert(tempPair); } - - pairedBarcodes[indexPairedBarcode]=newPair; indexPairedBarcode++; - barcodeNameVector.push_back(group); - }else { - //check for repeat barcodes - map::iterator itBar = barcodes.find(oligo); - if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); } - - barcodes[oligo]=indexBarcode; indexBarcode++; - barcodeNameVector.push_back(group); - } - }else if(type == "LINKER"){ - linker.push_back(oligo); - }else if(type == "SPACER"){ - spacer.push_back(oligo); - } - else{ m->mothurOut("[WARNING]: " + type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); } - } - m->gobble(inOligos); - } - inOligos.close(); - - if (hasPairedBarcodes || hasPrimer) { + bool allBlank = false; + oligos.read(oligosfile); + + if (m->control_pressed) { return false; } //error in reading oligos + + if (oligos.hasPairedBarcodes()) { pairedOligos = true; - if ((primers.size() != 0) || (barcodes.size() != 0) || (linker.size() != 0) || (spacer.size() != 0) || (revPrimer.size() != 0)) { m->control_pressed = true; m->mothurOut("[ERROR]: cannot mix paired primers and barcodes with non paired or linkers and spacers, quitting."); m->mothurOutEndLine(); return 0; } + numPrimers = oligos.getPairedPrimers().size(); + numBarcodes = oligos.getPairedBarcodes().size(); + }else { + pairedOligos = false; + numPrimers = oligos.getPrimers().size(); + numBarcodes = oligos.getBarcodes().size(); } - //add in potential combos - if(barcodeNameVector.size() == 0){ - barcodes[""] = 0; - barcodeNameVector.push_back(""); - } - - if(primerNameVector.size() == 0){ - primers[""] = 0; - primerNameVector.push_back(""); - } - - fastqFileNames.resize(barcodeNameVector.size()); + numLinkers = oligos.getLinkers().size(); + numSpacers = oligos.getSpacers().size(); + numRPrimers = oligos.getReversePrimers().size(); + + vector groupNames = oligos.getGroupNames(); + if (groupNames.size() == 0) { allBlank = true; } + + + fastqFileNames.resize(oligos.getBarcodeNames().size()); for(int i=0;i uniqueNames; //used to cleanup outputFileNames - if (pairedOligos) { - for(map::iterator itBar = pairedBarcodes.begin();itBar != pairedBarcodes.end();itBar++){ - for(map::iterator itPrimer = pairedPrimers.begin();itPrimer != pairedPrimers.end(); itPrimer++){ - - string primerName = primerNameVector[itPrimer->first]; - string barcodeName = barcodeNameVector[itBar->first]; + + set uniqueNames; //used to cleanup outputFileNames + if (pairedOligos) { + map barcodes = oligos.getPairedBarcodes(); + map primers = oligos.getPairedPrimers(); + for(map::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){ + for(map::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){ + + string primerName = oligos.getPrimerName(itPrimer->first); + string barcodeName = oligos.getBarcodeName(itBar->first); + + if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing + else if ((primerName == "") && (barcodeName == "")) { } //do nothing + else { + string comboGroupName = ""; + string fastaFileName = ""; + string qualFileName = ""; + string nameFileName = ""; + string countFileName = ""; - if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing - else { - string comboGroupName = ""; - string fastqFileName = ""; - - if(primerName == ""){ - comboGroupName = barcodeNameVector[itBar->first]; + if(primerName == ""){ + comboGroupName = barcodeName; + }else{ + if(barcodeName == ""){ + comboGroupName = primerName; } else{ - if(barcodeName == ""){ - comboGroupName = primerNameVector[itPrimer->first]; - } - else{ - comboGroupName = barcodeNameVector[itBar->first] + "." + primerNameVector[itPrimer->first]; - } + comboGroupName = barcodeName + "." + primerName; } - - - ofstream temp; - map variables; - variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaQFile)); - variables["[group]"] = comboGroupName; - fastqFileName = getOutputFileName("fastq", variables); - if (uniqueNames.count(fastqFileName) == 0) { - outputNames.push_back(fastqFileName); - outputTypes["fastq"].push_back(fastqFileName); - uniqueNames.insert(fastqFileName); - } - - fastqFileNames[itBar->first][itPrimer->first] = fastqFileName; - m->openOutputFile(fastqFileName, temp); temp.close(); - } + + ofstream temp; + map variables; + variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaQFile)); + variables["[group]"] = comboGroupName; + string fastqFileName = getOutputFileName("fastq", variables); + if (uniqueNames.count(fastqFileName) == 0) { + outputNames.push_back(fastqFileName); + outputTypes["fastq"].push_back(fastqFileName); + uniqueNames.insert(fastqFileName); + } + + fastqFileNames[itBar->first][itPrimer->first] = fastqFileName; + m->openOutputFile(fastqFileName, temp); temp.close(); } } - }else { - for(map::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){ - for(map::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){ - - string primerName = primerNameVector[itPrimer->second]; - string barcodeName = barcodeNameVector[itBar->second]; + } + }else { + map barcodes = oligos.getBarcodes() ; + map primers = oligos.getPrimers(); + for(map::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){ + for(map::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){ + + string primerName = oligos.getPrimerName(itPrimer->second); + string barcodeName = oligos.getBarcodeName(itBar->second); + + if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing + else if ((primerName == "") && (barcodeName == "")) { } //do nothing + else { + string comboGroupName = ""; + string fastaFileName = ""; + string qualFileName = ""; + string nameFileName = ""; + string countFileName = ""; - if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing - else { - string comboGroupName = ""; - string fastqFileName = ""; - - if(primerName == ""){ - comboGroupName = barcodeNameVector[itBar->second]; + if(primerName == ""){ + comboGroupName = barcodeName; + }else{ + if(barcodeName == ""){ + comboGroupName = primerName; } else{ - if(barcodeName == ""){ - comboGroupName = primerNameVector[itPrimer->second]; - } - else{ - comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second]; - } + comboGroupName = barcodeName + "." + primerName; } - - - ofstream temp; - map variables; - variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaQFile)); - variables["[group]"] = comboGroupName; - fastqFileName = getOutputFileName("fastq", variables); - if (uniqueNames.count(fastqFileName) == 0) { - outputNames.push_back(fastqFileName); - outputTypes["fastq"].push_back(fastqFileName); - uniqueNames.insert(fastqFileName); - } - - fastqFileNames[itBar->second][itPrimer->second] = fastqFileName; - m->openOutputFile(fastqFileName, temp); temp.close(); - } + + ofstream temp; + map variables; + variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaQFile)); + variables["[group]"] = comboGroupName; + string fastqFileName = getOutputFileName("fastq", variables); + if (uniqueNames.count(fastqFileName) == 0) { + outputNames.push_back(fastqFileName); + outputTypes["fastq"].push_back(fastqFileName); + uniqueNames.insert(fastqFileName); + } + + fastqFileNames[itBar->second][itPrimer->second] = fastqFileName; + m->openOutputFile(fastqFileName, temp); temp.close(); } } } - + } + + if (allBlank) { + m->mothurOut("[WARNING]: your oligos file does not contain any group names. mothur will not create a groupfile."); m->mothurOutEndLine(); + return false; + } + ofstream temp; map variables; variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaQFile)); variables["[group]"] = "scrap"; noMatchFile = getOutputFileName("fastq", variables); m->openOutputFile(noMatchFile, temp); temp.close(); - + return true; } @@ -859,7 +750,6 @@ bool ParseFastaQCommand::readGroup(string groupfile){ ofstream temp; m->openOutputFileBinary(thisFilename, temp); temp.close(); fastqFileNames[i].push_back(thisFilename); - barcodes[groups[i]] = i; } } @@ -877,48 +767,6 @@ bool ParseFastaQCommand::readGroup(string groupfile){ exit(1); } } -//********************************************************************/ -string ParseFastaQCommand::reverseOligo(string oligo){ - try { - string reverse = ""; - - for(int i=oligo.length()-1;i>=0;i--){ - - if(oligo[i] == 'A') { reverse += 'T'; } - else if(oligo[i] == 'T'){ reverse += 'A'; } - else if(oligo[i] == 'U'){ reverse += 'A'; } - - else if(oligo[i] == 'G'){ reverse += 'C'; } - else if(oligo[i] == 'C'){ reverse += 'G'; } - - else if(oligo[i] == 'R'){ reverse += 'Y'; } - else if(oligo[i] == 'Y'){ reverse += 'R'; } - - else if(oligo[i] == 'M'){ reverse += 'K'; } - else if(oligo[i] == 'K'){ reverse += 'M'; } - - else if(oligo[i] == 'W'){ reverse += 'W'; } - else if(oligo[i] == 'S'){ reverse += 'S'; } - - else if(oligo[i] == 'B'){ reverse += 'V'; } - else if(oligo[i] == 'V'){ reverse += 'B'; } - - else if(oligo[i] == 'D'){ reverse += 'H'; } - else if(oligo[i] == 'H'){ reverse += 'D'; } - - else { reverse += 'N'; } - } - - - return reverse; - } - catch(exception& e) { - m->errorOut(e, "ParseFastaQCommand", "reverseOligo"); - exit(1); - } -} - - //********************************************************************************************************************** diff --git a/parsefastaqcommand.h b/parsefastaqcommand.h index 2ac1416..60c80f4 100644 --- a/parsefastaqcommand.h +++ b/parsefastaqcommand.h @@ -15,6 +15,7 @@ #include "trimoligos.h" #include "sequence.hpp" #include "groupmap.h" +#include "oligos.h" struct fastqRead2 { string quality; @@ -50,16 +51,11 @@ private: vector outputNames; string outputDir, fastaQFile, format, oligosfile, groupfile; - bool abort, fasta, qual, pacbio, pairedOligos; - int pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, split; + bool abort, fasta, qual, pacbio, pairedOligos, reorient; + int pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, split, numBarcodes, numPrimers, numLinkers, numSpacers, numRPrimers; GroupMap* groupMap; + Oligos oligos; - //oligos file data structures - vector linker, spacer, primerNameVector, barcodeNameVector, revPrimer; - map barcodes; - map primers; - map pairedBarcodes; - map pairedPrimers; vector > fastqFileNames; string noMatchFile; @@ -67,9 +63,8 @@ private: vector convertTable; bool readOligos(string oligosFile); bool readGroup(string oligosFile); - string reverseOligo(string oligo); fastqRead2 readFastq(ifstream&, bool&); - int findGroup(fastqRead2, int&, int&, TrimOligos*&, int, int); + int findGroup(fastqRead2, int&, int&, TrimOligos*&, TrimOligos*&, int, int); int findGroup(fastqRead2, int&, int&, string); }; diff --git a/pcrseqscommand.h b/pcrseqscommand.h index ba90624..ad69961 100644 --- a/pcrseqscommand.h +++ b/pcrseqscommand.h @@ -16,6 +16,7 @@ #include "alignment.hpp" #include "needlemanoverlap.hpp" #include "counttable.h" +#include "oligos.h" class PcrSeqsCommand : public Command { public: @@ -45,25 +46,23 @@ private: }; vector lines; - bool getOligos(vector >&, vector >&, vector >&); - bool abort, keepprimer, keepdots, fileAligned; + bool abort, keepprimer, keepdots, fileAligned, pairedOligos; string fastafile, oligosfile, taxfile, groupfile, namefile, countfile, ecolifile, outputDir, nomatch; - int start, end, processors, length, pdiffs; + int start, end, processors, length, pdiffs, numFPrimers, numRPrimers; + Oligos oligos; - vector revPrimer, outputNames; - map primers; + vector outputNames; int writeAccnos(set); int readName(set&); int readGroup(set); int readTax(set); int readCount(set); - bool readOligos(); + int readOligos(); bool readEcoli(); int driverPcr(string, string, string, string, set&, linePair, int&, bool&); int createProcesses(string, string, string, set&); bool isAligned(string, map&); - string reverseOligo(string); int adjustDots(string, string, int, int); }; @@ -79,22 +78,17 @@ struct pcrData { unsigned long long fend; int count, start, end, length, pdiffs, pstart, pend; MothurOut* m; - map primers; - vector revPrimer; set badSeqNames; bool keepprimer, keepdots, fileAligned, adjustNeeded; - pcrData(){} - pcrData(string f, string gf, string bfn, string loc, MothurOut* mout, string ol, string ec, map pr, vector rpr, string nm, bool kp, bool kd, int st, int en, int l, int pd, unsigned long long fst, unsigned long long fen) { + pcrData(string f, string gf, string bfn, string loc, MothurOut* mout, string ol, string ec, string nm, bool kp, bool kd, int st, int en, int l, int pd, unsigned long long fst, unsigned long long fen) { filename = f; goodFasta = gf; badFasta = bfn; m = mout; oligosfile = ol; ecolifile = ec; - primers = pr; - revPrimer = rpr; nomatch = nm; keepprimer = kp; keepdots = kd; @@ -144,7 +138,26 @@ static DWORD WINAPI MyPcrThreadFunction(LPVOID lpParam){ map faked; set locations; //locations = beginning locations - TrimOligos trim(pDataArray->pdiffs, 0, pDataArray->primers, faked, pDataArray->revPrimer); + Oligos oligos(pDataArray->oligosfile); + int numFPrimers, numRPrimers; + map primers; + map barcodes; //not used + vector revPrimer; + if (oligos.hasPairedBarcodes()) { + numFPrimers = oligos.getPairedPrimers().size(); + map primerPairs = oligos.getPairedPrimers(); + for (map::iterator it = primerPairs.begin(); it != primerPairs.end(); it++) { + primers[(it->second).forward] = it->first; + revPrimer.push_back((it->second).reverse); + } + }else { + numFPrimers = oligos.getPrimers().size(); + primers = oligos.getPrimers(); + revPrimer = oligos.getReversePrimers(); + } + numRPrimers = oligos.getReversePrimers().size(); + + TrimOligos trim(pDataArray->pdiffs, 0, primers, barcodes, revPrimer); for(int i = 0; i < pDataArray->fend; i++){ //end is the number of sequences to process pDataArray->count++; @@ -179,7 +192,7 @@ static DWORD WINAPI MyPcrThreadFunction(LPVOID lpParam){ /////////////////////////////////////////////////////////////// //process primers - if (pDataArray->primers.size() != 0) { + if (numFPrimers != 0) { int primerStart = 0; int primerEnd = 0; bool good = trim.findForward(currSeq, primerStart, primerEnd); @@ -224,7 +237,7 @@ static DWORD WINAPI MyPcrThreadFunction(LPVOID lpParam){ } //process reverse primers - if (pDataArray->revPrimer.size() != 0) { + if (numRPrimers != 0) { int primerStart = 0; int primerEnd = 0; bool good = trim.findReverse(currSeq, primerStart, primerEnd); @@ -327,7 +340,7 @@ static DWORD WINAPI MyPcrThreadFunction(LPVOID lpParam){ if (pDataArray->fileAligned && !pDataArray->keepdots) { //print out smallest start value and largest end value if (locations.size() > 1) { pDataArray->adjustNeeded = true; } - if (pDataArray->primers.size() != 0) { set::iterator it = locations.begin(); pDataArray->pstart = *it; } + if (numFPrimers != 0) { set::iterator it = locations.begin(); pDataArray->pstart = *it; } } return 0; diff --git a/prcseqscommand.cpp b/prcseqscommand.cpp index ee15bed..722aca5 100644 --- a/prcseqscommand.cpp +++ b/prcseqscommand.cpp @@ -312,7 +312,7 @@ int PcrSeqsCommand::execute(){ if (abort == true) { if (calledHelp) { return 0; } return 2; } int start = time(NULL); - fileAligned = true; + fileAligned = true; pairedOligos = false; string thisOutputDir = outputDir; if (outputDir == "") { thisOutputDir += m->hasPath(fastafile); } @@ -326,7 +326,7 @@ int PcrSeqsCommand::execute(){ length = 0; - if(oligosfile != ""){ readOligos(); if (m->debug) { m->mothurOut("[DEBUG]: read oligos file. numprimers = " + toString(primers.size()) + ", revprimers = " + toString(revPrimer.size()) + ".\n"); } } if (m->control_pressed) { return 0; } + if(oligosfile != ""){ readOligos(); if (m->debug) { m->mothurOut("[DEBUG]: read oligos file. numprimers = " + toString(numFPrimers) + ", revprimers = " + toString(numRPrimers) + ".\n"); } } if (m->control_pressed) { return 0; } if(ecolifile != "") { readEcoli(); } if (m->control_pressed) { return 0; } vector positions; @@ -529,7 +529,7 @@ int PcrSeqsCommand::createProcesses(string filename, string goodFileName, string if (i!=0) {extension += toString(i) + ".temp"; processIDS.push_back(i); } // Allocate memory for thread data. - pcrData* tempPcr = new pcrData(filename, goodFileName+extension, badFileName+extension, locationsFile+extension, m, oligosfile, ecolifile, primers, revPrimer, nomatch, keepprimer, keepdots, start, end, length, pdiffs, lines[i].start, lines[i].end); + pcrData* tempPcr = new pcrData(filename, goodFileName+extension, badFileName+extension, locationsFile+extension, m, oligosfile, ecolifile, nomatch, keepprimer, keepdots, start, end, length, pdiffs, lines[i].start, lines[i].end); pDataArray.push_back(tempPcr); //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier @@ -586,8 +586,8 @@ int PcrSeqsCommand::createProcesses(string filename, string goodFileName, string string name = ""; int thisStart = -1; int thisEnd = -1; - if (primers.size() != 0) { inLocations >> name >> thisStart; m->gobble(inLocations); } - if (revPrimer.size() != 0) { inLocations >> name >> thisEnd; m->gobble(inLocations); } + if (numFPrimers != 0) { inLocations >> name >> thisStart; m->gobble(inLocations); } + if (numRPrimers != 0) { inLocations >> name >> thisEnd; m->gobble(inLocations); } else { pend = -1; break; } int myDiff = 0; @@ -642,8 +642,22 @@ int PcrSeqsCommand::driverPcr(string filename, string goodFasta, string badFasta set locations; //locations[0] = beginning locations, //pdiffs, bdiffs, primers, barcodes, revPrimers - map faked; - TrimOligos trim(pdiffs, 0, primers, faked, revPrimer); + map primers; + map barcodes; //not used + vector revPrimer; + if (pairedOligos) { + map primerPairs = oligos.getPairedPrimers(); + for (map::iterator it = primerPairs.begin(); it != primerPairs.end(); it++) { + primers[(it->second).forward] = it->first; + revPrimer.push_back((it->second).reverse); + } + if (pdiffs != 0) { m->mothurOut("[WARNING]: Pcr.seqs is only designed to allow diffs in forward primers. Reverse primers must be an exact match.\n"); } + }else{ + primers = oligos.getPrimers(); + revPrimer = oligos.getReversePrimers(); + } + + TrimOligos trim(pdiffs, 0, primers, barcodes, revPrimer); while (!done) { @@ -871,8 +885,8 @@ int PcrSeqsCommand::adjustDots(string goodFasta, string locations, int pstart, i string name = ""; int thisStart = -1; int thisEnd = -1; - if (primers.size() != 0) { inLocations >> name >> thisStart; m->gobble(inLocations); } - if (revPrimer.size() != 0) { inLocations >> name >> thisEnd; m->gobble(inLocations); } + if (numFPrimers != 0) { inLocations >> name >> thisStart; m->gobble(inLocations); } + if (numRPrimers != 0) { inLocations >> name >> thisEnd; m->gobble(inLocations); } //cout << seq.getName() << '\t' << thisStart << '\t' << thisEnd << '\t' << seq.getAligned().length() << endl; @@ -926,130 +940,6 @@ int PcrSeqsCommand::adjustDots(string goodFasta, string locations, int pstart, i exit(1); } } -//********************************************************************/ -string PcrSeqsCommand::reverseOligo(string oligo){ - try { - string reverse = ""; - - for(int i=oligo.length()-1;i>=0;i--){ - - if(oligo[i] == 'A') { reverse += 'T'; } - else if(oligo[i] == 'T'){ reverse += 'A'; } - else if(oligo[i] == 'U'){ reverse += 'A'; } - - else if(oligo[i] == 'G'){ reverse += 'C'; } - else if(oligo[i] == 'C'){ reverse += 'G'; } - - else if(oligo[i] == 'R'){ reverse += 'Y'; } - else if(oligo[i] == 'Y'){ reverse += 'R'; } - - else if(oligo[i] == 'M'){ reverse += 'K'; } - else if(oligo[i] == 'K'){ reverse += 'M'; } - - else if(oligo[i] == 'W'){ reverse += 'W'; } - else if(oligo[i] == 'S'){ reverse += 'S'; } - - else if(oligo[i] == 'B'){ reverse += 'V'; } - else if(oligo[i] == 'V'){ reverse += 'B'; } - - else if(oligo[i] == 'D'){ reverse += 'H'; } - else if(oligo[i] == 'H'){ reverse += 'D'; } - - else { reverse += 'N'; } - } - - - return reverse; - } - catch(exception& e) { - m->errorOut(e, "PcrSeqsCommand", "reverseOligo"); - exit(1); - } -} - -//*************************************************************************************************************** -bool PcrSeqsCommand::readOligos(){ - try { - ifstream inOligos; - m->openInputFile(oligosfile, inOligos); - - string type, oligo, group; - int primerCount = 0; - - while(!inOligos.eof()){ - - inOligos >> type; - - if(type[0] == '#'){ //ignore - while (!inOligos.eof()) { char c = inOligos.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there - m->gobble(inOligos); - }else{ - m->gobble(inOligos); - //make type case insensitive - for(int i=0;i> oligo; - - for(int i=0;i> group; - }else if(type == "PRIMER"){ - m->gobble(inOligos); - primers[oligo] = primerCount; primerCount++; - - string roligo=""; - inOligos >> roligo; - - for(int i=0;imothurOut(type + " is not recognized as a valid type. Choices are primer, forward, reverse, linker, spacer and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); m->control_pressed = true; } - } - m->gobble(inOligos); - } - inOligos.close(); - - if ((primers.size() == 0) && (revPrimer.size() == 0)) { - m->mothurOut("[ERROR]: your oligos file does not contain valid primers or reverse primers. Please correct."); m->mothurOutEndLine(); - m->control_pressed = true; - return false; - } - - return true; - - }catch(exception& e) { - m->errorOut(e, "PcrSeqsCommand", "readOligos"); - exit(1); - } -} //*************************************************************************************************************** bool PcrSeqsCommand::readEcoli(){ try { @@ -1321,6 +1211,35 @@ int PcrSeqsCommand::readCount(set badSeqNames){ exit(1); } } +//*************************************************************************************************************** + +int PcrSeqsCommand::readOligos(){ + try { + oligos.read(oligosfile); + + if (m->control_pressed) { return false; } //error in reading oligos + + if (oligos.hasPairedBarcodes()) { + pairedOligos = true; + numFPrimers = oligos.getPairedPrimers().size(); + }else { + pairedOligos = false; + numFPrimers = oligos.getPrimers().size(); + } + numRPrimers = oligos.getReversePrimers().size(); + + if (oligos.getLinkers().size() != 0) { m->mothurOut("[WARNING]: pcr.seqs is not setup to remove linkers, ignoring.\n"); } + if (oligos.getSpacers().size() != 0) { m->mothurOut("[WARNING]: pcr.seqs is not setup to remove spacers, ignoring.\n"); } + + return true; + + } + catch(exception& e) { + m->errorOut(e, "PcrSeqsCommand", "readOligos"); + exit(1); + } +} + /**************************************************************************************/ diff --git a/qualityscores.cpp b/qualityscores.cpp index 3a1687b..916dab3 100644 --- a/qualityscores.cpp +++ b/qualityscores.cpp @@ -23,7 +23,19 @@ QualityScores::QualityScores(){ exit(1); } } +/**************************************************************************************************/ +QualityScores::QualityScores(string n, vector s){ + try { + m = MothurOut::getInstance(); + setName(n); + setScores(s); + } + catch(exception& e) { + m->errorOut(e, "QualityScores", "QualityScores"); + exit(1); + } +} /**************************************************************************************************/ QualityScores::QualityScores(ifstream& qFile){ @@ -344,10 +356,9 @@ double QualityScores::calculateAverage(bool logTransform){ if (logTransform) { aveQScore += pow(10.0, qScores[i]); } else { aveQScore += qScores[i]; } } - aveQScore /= (double) seqLength; - if (logTransform) { aveQScore = log10(aveQScore /(double) seqLength); } - else { aveQScore /= (double) seqLength; } + if (logTransform) { aveQScore = log10(aveQScore /(double) seqLength); } + else { aveQScore /= (double) seqLength; } return aveQScore; } @@ -366,6 +377,8 @@ bool QualityScores::cullQualAverage(Sequence& sequence, double qAverage, bool lo double aveQScore = calculateAverage(logTransform); + if (m->debug) { m->mothurOut("[DEBUG]: " + sequence.getName() + " average = " + toString(aveQScore) + "\n"); } + if(aveQScore >= qAverage) { success = 1; } else { success = 0; } diff --git a/qualityscores.h b/qualityscores.h index a802636..756b36f 100644 --- a/qualityscores.h +++ b/qualityscores.h @@ -22,6 +22,7 @@ class QualityScores { public: QualityScores(); + QualityScores(string n, vector qs); QualityScores(ifstream&); string getName(); int getLength(){ return (int)qScores.size(); } diff --git a/sffinfocommand.cpp b/sffinfocommand.cpp index 26d4832..dc8eb54 100755 --- a/sffinfocommand.cpp +++ b/sffinfocommand.cpp @@ -18,6 +18,7 @@ vector SffInfoCommand::setParameters(){ try { CommandParameter psff("sff", "InputTypes", "", "", "none", "none", "none","",false,false,true); parameters.push_back(psff); CommandParameter poligos("oligos", "InputTypes", "", "", "oligosGroup", "none", "none","",false,false); parameters.push_back(poligos); + CommandParameter preorient("checkorient", "Boolean", "", "F", "", "", "","",false,false,true); parameters.push_back(preorient); CommandParameter pgroup("group", "InputTypes", "", "", "oligosGroup", "none", "none","",false,false); parameters.push_back(pgroup); CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(paccnos); CommandParameter psfftxt("sfftxt", "String", "", "", "", "", "","",false,false); parameters.push_back(psfftxt); @@ -47,7 +48,7 @@ string SffInfoCommand::getHelpString(){ try { string helpString = ""; helpString += "The sffinfo command reads a sff file and extracts the sequence data, or you can use it to parse a sfftxt file.\n"; - helpString += "The sffinfo command parameters are sff, fasta, qfile, accnos, flow, sfftxt, oligos, group, bdiffs, tdiffs, ldiffs, sdiffs, pdiffs and trim. sff is required. \n"; + helpString += "The sffinfo command parameters are sff, fasta, qfile, accnos, flow, sfftxt, oligos, group, bdiffs, tdiffs, ldiffs, sdiffs, pdiffs, checkorient and trim. sff is required. \n"; helpString += "The sff parameter allows you to enter the sff file you would like to extract data from. You may enter multiple files by separating them by -'s.\n"; helpString += "The fasta parameter allows you to indicate if you would like a fasta formatted file generated. Default=True. \n"; helpString += "The qfile parameter allows you to indicate if you would like a quality file generated. Default=True. \n"; @@ -58,6 +59,7 @@ string SffInfoCommand::getHelpString(){ helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n"; helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n"; helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n"; + helpString += "The checkorient parameter will check look for the reverse compliment of the barcode or primer in the sequence. The default is false.\n"; helpString += "The flow parameter allows you to indicate if you would like a flowgram file generated. Default=True. \n"; helpString += "The sfftxt parameter allows you to indicate if you would like a sff.txt file generated. Default=False. \n"; helpString += "If you want to parse an existing sfftxt file into flow, fasta and quality file, enter the file name using the sfftxt parameter. \n"; @@ -492,6 +494,8 @@ SffInfoCommand::SffInfoCommand(string option) { else { m->mothurOut("[ERROR]: you must provide a valid sff or sfftxt file."); m->mothurOutEndLine(); abort=true; } } + temp = validParameter.validFile(parameters, "checkorient", false); if (temp == "not found") { temp = "F"; } + reorient = m->isTrue(temp); } } @@ -563,13 +567,22 @@ int SffInfoCommand::execute(){ //********************************************************************************************************************** int SffInfoCommand::extractSffInfo(string input, string accnos, string oligos){ try { + oligosObject = new Oligos(); currentFileName = input; if (outputDir == "") { outputDir += m->hasPath(input); } if (accnos != "") { readAccnosFile(accnos); } else { seqNames.clear(); } - if (hasOligos) { readOligos(oligos); split = 2; } + TrimOligos* trimOligos = NULL; TrimOligos* rtrimOligos = NULL; + if (hasOligos) { + readOligos(oligos); split = 2; + if (m->control_pressed) { delete oligosObject; return 0; } + trimOligos = new TrimOligos(pdiffs, bdiffs, ldiffs, sdiffs, oligosObject->getPrimers(), oligosObject->getBarcodes(), oligosObject->getReversePrimers(), oligosObject->getLinkers(), oligosObject->getSpacers()); numFPrimers = oligosObject->getPrimers().size(); numBarcodes = oligosObject->getBarcodes().size(); + if (reorient) { + rtrimOligos = new TrimOligos(pdiffs, bdiffs, 0, 0, oligosObject->getReorientedPairedPrimers(), oligosObject->getReorientedPairedBarcodes()); numBarcodes = oligosObject->getReorientedPairedBarcodes().size(); + } + } if (hasGroup) { readGroup(oligos); split = 2; } ofstream outSfftxt, outFasta, outQual, outFlow; @@ -599,8 +612,8 @@ int SffInfoCommand::extractSffInfo(string input, string accnos, string oligos){ int count = 0; //check magic number and version - if (header.magicNumber != 779314790) { m->mothurOut("Magic Number is not correct, not a valid .sff file"); m->mothurOutEndLine(); return count; } - if (header.version != "0001") { m->mothurOut("Version is not supported, only support version 0001."); m->mothurOutEndLine(); return count; } + if (header.magicNumber != 779314790) { m->mothurOut("Magic Number is not correct, not a valid .sff file"); m->mothurOutEndLine(); delete oligosObject; if (hasOligos) { delete trimOligos; if (reorient) { delete rtrimOligos; } } return count; } + if (header.version != "0001") { m->mothurOut("Version is not supported, only support version 0001."); m->mothurOutEndLine(); delete oligosObject; if (hasOligos) { delete trimOligos; if (reorient) { delete rtrimOligos; } } return count; } //print common header if (sfftxt) { printCommonHeader(outSfftxt, header); } @@ -613,7 +626,7 @@ int SffInfoCommand::extractSffInfo(string input, string accnos, string oligos){ //read data seqRead read; Header readheader; - readSeqData(in, read, header.numFlowsPerRead, readheader); + readSeqData(in, read, header.numFlowsPerRead, readheader, trimOligos, rtrimOligos); bool okay = sanityCheck(readheader, read); if (!okay) { break; } @@ -700,6 +713,9 @@ int SffInfoCommand::extractSffInfo(string input, string accnos, string oligos){ else { outputNames.push_back(noMatchFile); outputTypes["sff"].push_back(noMatchFile); } } + delete oligosObject; + if (hasOligos) { delete trimOligos; if (reorient) { delete rtrimOligos; } } + return count; } catch(exception& e) { @@ -1036,7 +1052,7 @@ int SffInfoCommand::adjustCommonHeader(CommonHeader header){ } } //********************************************************************************************************************** -bool SffInfoCommand::readSeqData(ifstream& in, seqRead& read, int numFlowReads, Header& header){ +bool SffInfoCommand::readSeqData(ifstream& in, seqRead& read, int numFlowReads, Header& header, TrimOligos*& trimOligos, TrimOligos*& rtrimOligos){ try { unsigned long long startSpotInFile = in.tellg(); if (!in.eof()) { @@ -1144,7 +1160,7 @@ bool SffInfoCommand::readSeqData(ifstream& in, seqRead& read, int numFlowReads, int barcodeIndex, primerIndex, trashCodeLength; - if (hasOligos) { trashCodeLength = findGroup(header, read, barcodeIndex, primerIndex); } + if (hasOligos) { trashCodeLength = findGroup(header, read, barcodeIndex, primerIndex, trimOligos, rtrimOligos); } else if (hasGroup) { trashCodeLength = findGroup(header, read, barcodeIndex, primerIndex, "groupMode"); } else { m->mothurOut("[ERROR]: uh oh, we shouldn't be here...\n"); } @@ -1189,10 +1205,8 @@ bool SffInfoCommand::readSeqData(ifstream& in, seqRead& read, int numFlowReads, } } //********************************************************************************************************************** -int SffInfoCommand::findGroup(Header header, seqRead read, int& barcode, int& primer) { +int SffInfoCommand::findGroup(Header header, seqRead read, int& barcode, int& primer, TrimOligos*& trimOligos, TrimOligos*& rtrimOligos) { try { - //find group read belongs to - TrimOligos trimOligos(pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, revPrimer, linker, spacer); int success = 1; string trashCode = ""; @@ -1229,55 +1243,81 @@ int SffInfoCommand::findGroup(Header header, seqRead read, int& barcode, int& pr Sequence currSeq(header.name, seq); QualityScores currQual; + //for reorient + Sequence savedSeq(currSeq.getName(), currSeq.getAligned()); + QualityScores savedQual(currQual.getName(), currQual.getScores()); + if(numLinkers != 0){ - success = trimOligos.stripLinker(currSeq, currQual); + success = trimOligos->stripLinker(currSeq, currQual); if(success > ldiffs) { trashCode += 'k'; } else{ currentSeqsDiffs += success; } } - if(barcodes.size() != 0){ - success = trimOligos.stripBarcode(currSeq, currQual, barcode); + if(numBarcodes != 0){ + success = trimOligos->stripBarcode(currSeq, currQual, barcode); if(success > bdiffs) { trashCode += 'b'; } else{ currentSeqsDiffs += success; } } if(numSpacers != 0){ - success = trimOligos.stripSpacer(currSeq, currQual); + success = trimOligos->stripSpacer(currSeq, currQual); if(success > sdiffs) { trashCode += 's'; } else{ currentSeqsDiffs += success; } } if(numFPrimers != 0){ - success = trimOligos.stripForward(currSeq, currQual, primer, true); + success = trimOligos->stripForward(currSeq, currQual, primer, true); if(success > pdiffs) { trashCode += 'f'; } else{ currentSeqsDiffs += success; } } if (currentSeqsDiffs > tdiffs) { trashCode += 't'; } - if(revPrimer.size() != 0){ - success = trimOligos.stripReverse(currSeq, currQual); + if(numRPrimers != 0){ + success = trimOligos->stripReverse(currSeq, currQual); if(!success) { trashCode += 'r'; } } - if (trashCode.length() == 0) { //is this sequence in the ignore group - string thisGroup = ""; + if (reorient && (trashCode != "")) { //if you failed and want to check the reverse + int thisSuccess = 0; + string thisTrashCode = ""; + int thisCurrentSeqsDiffs = 0; - if(barcodes.size() != 0){ - thisGroup = barcodeNameVector[barcode]; - if (numFPrimers != 0) { - if (primerNameVector[primer] != "") { - if(thisGroup != "") { - thisGroup += "." + primerNameVector[primer]; - }else { - thisGroup = primerNameVector[primer]; - } - } - } + int thisBarcodeIndex = 0; + int thisPrimerIndex = 0; + //cout << currSeq.getName() << '\t' << savedSeq.getUnaligned() << endl; + if(numBarcodes != 0){ + thisSuccess = rtrimOligos->stripBarcode(savedSeq, savedQual, thisBarcodeIndex); + if(thisSuccess > bdiffs) { thisTrashCode += "b"; } + else{ thisCurrentSeqsDiffs += thisSuccess; } + } + //cout << currSeq.getName() << '\t' << savedSeq.getUnaligned() << endl; + if(numFPrimers != 0){ + thisSuccess = rtrimOligos->stripForward(savedSeq, savedQual, thisPrimerIndex, true); + if(thisSuccess > pdiffs) { thisTrashCode += "f"; } + else{ thisCurrentSeqsDiffs += thisSuccess; } } + if (thisCurrentSeqsDiffs > tdiffs) { thisTrashCode += 't'; } + + if (thisTrashCode == "") { + trashCode = thisTrashCode; + success = thisSuccess; + currentSeqsDiffs = thisCurrentSeqsDiffs; + barcode = thisBarcodeIndex; + primer = thisPrimerIndex; + savedSeq.reverseComplement(); + currSeq.setAligned(savedSeq.getAligned()); + savedQual.flipQScores(); + currQual.setScores(savedQual.getScores()); + }else { trashCode += "(" + thisTrashCode + ")"; } + } + + if (trashCode.length() == 0) { //is this sequence in the ignore group + string thisGroup = oligosObject->getGroupName(barcode, primer); + int pos = thisGroup.find("ignore"); if (pos != string::npos) { trashCode += "i"; } } @@ -1297,12 +1337,6 @@ int SffInfoCommand::findGroup(Header header, seqRead read, int& barcode, int& pr string group = groupMap->getGroup(header.name); if (group == "not found") { trashCode += "g"; } //scrap for group - else { //find file group - map::iterator it = barcodes.find(group); - if (it != barcodes.end()) { - barcode = it->second; - }else { trashCode += "g"; } - } return trashCode.length(); } @@ -1833,121 +1867,59 @@ bool SffInfoCommand::readOligos(string oligoFile){ numSplitReads.clear(); filehandlesHeaders.clear(); - ifstream inOligos; - m->openInputFile(oligoFile, inOligos); - - string type, oligo, group; + bool allBlank = false; + oligosObject->read(oligoFile); - int indexPrimer = 0; - int indexBarcode = 0; - - while(!inOligos.eof()){ - - inOligos >> type; - - if(type[0] == '#'){ - while (!inOligos.eof()) { char c = inOligos.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there - m->gobble(inOligos); - } - else{ - m->gobble(inOligos); - //make type case insensitive - for(int i=0;i> oligo; - - for(int i=0;i::iterator itPrime = primers.find(oligo); - if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); } - - primers[oligo]=indexPrimer; indexPrimer++; - primerNameVector.push_back(group); - - }else if(type == "REVERSE"){ - //Sequence oligoRC("reverse", oligo); - //oligoRC.reverseComplement(); - string oligoRC = reverseOligo(oligo); - revPrimer.push_back(oligoRC); - } - else if(type == "BARCODE"){ - inOligos >> group; - - - //check for repeat barcodes - map::iterator itBar = barcodes.find(oligo); - if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); } - - barcodes[oligo]=indexBarcode; indexBarcode++; - barcodeNameVector.push_back(group); - }else if(type == "LINKER"){ - linker.push_back(oligo); - }else if(type == "SPACER"){ - spacer.push_back(oligo); - } - else{ m->mothurOut("[WARNING]: " + type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); } - } - m->gobble(inOligos); - } - inOligos.close(); - - if(barcodeNameVector.size() == 0 && primerNameVector[0] == ""){ split = 1; } - - //add in potential combos - if(barcodeNameVector.size() == 0){ - barcodes[""] = 0; - barcodeNameVector.push_back(""); - } - - if(primerNameVector.size() == 0){ - primers[""] = 0; - primerNameVector.push_back(""); - } - - filehandles.resize(barcodeNameVector.size()); + if (m->control_pressed) { return false; } //error in reading oligos + + if (oligosObject->hasPairedBarcodes()) { + pairedOligos = true; + m->mothurOut("[ERROR]: sffinfo does not support paired barcodes and primers, aborting.\n"); m->control_pressed = true; return true; + }else { + pairedOligos = false; + numFPrimers = oligosObject->getPrimers().size(); + numBarcodes = oligosObject->getBarcodes().size(); + } + + numLinkers = oligosObject->getLinkers().size(); + numSpacers = oligosObject->getSpacers().size(); + numRPrimers = oligosObject->getReversePrimers().size(); + + vector groupNames = oligosObject->getGroupNames(); + if (groupNames.size() == 0) { allBlank = true; } + + filehandles.resize(oligosObject->getBarcodeNames().size()); for(int i=0;igetPrimerNames().size();j++){ filehandles[i].push_back(""); } } - if(split > 1){ - set uniqueNames; //used to cleanup outputFileNames - for(map::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){ - for(map::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){ - - string primerName = primerNameVector[itPrimer->second]; - string barcodeName = barcodeNameVector[itBar->second]; - + if(split > 1){ + set uniqueNames; //used to cleanup outputFileNames + map barcodes = oligosObject->getBarcodes() ; + map primers = oligosObject->getPrimers(); + for(map::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){ + for(map::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){ + + string primerName = oligosObject->getPrimerName(itPrimer->second); + string barcodeName = oligosObject->getBarcodeName(itBar->second); + if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing + else if ((primerName == "") && (barcodeName == "")) { } //do nothing else { string comboGroupName = ""; string fastaFileName = ""; string qualFileName = ""; string nameFileName = ""; + string countFileName = ""; if(primerName == ""){ - comboGroupName = barcodeNameVector[itBar->second]; - } - else{ + comboGroupName = barcodeName; + }else{ if(barcodeName == ""){ - comboGroupName = primerNameVector[itPrimer->second]; + comboGroupName = primerName; } else{ - comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second]; + comboGroupName = barcodeName + "." + primerName; } } @@ -1965,33 +1937,16 @@ bool SffInfoCommand::readOligos(string oligoFile){ filehandles[itBar->second][itPrimer->second] = thisFilename; temp.open(thisFilename.c_str(), ios::binary); temp.close(); } - } - } - } - numFPrimers = primers.size(); - numLinkers = linker.size(); - numSpacers = spacer.size(); + } + } + } + map variables; variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(currentFileName)); variables["[group]"] = "scrap"; noMatchFile = getOutputFileName("sff",variables); m->mothurRemove(noMatchFile); numNoMatch = 0; - - - bool allBlank = true; - for (int i = 0; i < barcodeNameVector.size(); i++) { - if (barcodeNameVector[i] != "") { - allBlank = false; - break; - } - } - for (int i = 0; i < primerNameVector.size(); i++) { - if (primerNameVector[i] != "") { - allBlank = false; - break; - } - } filehandlesHeaders.resize(filehandles.size()); numSplitReads.resize(filehandles.size()); @@ -2023,7 +1978,6 @@ bool SffInfoCommand::readGroup(string oligoFile){ filehandles.clear(); numSplitReads.clear(); filehandlesHeaders.clear(); - barcodes.clear(); groupMap = new GroupMap(); groupMap->readMap(oligoFile); @@ -2045,7 +1999,6 @@ bool SffInfoCommand::readGroup(string oligoFile){ ofstream temp; m->openOutputFileBinary(thisFilename, temp); temp.close(); filehandles[i].push_back(thisFilename); - barcodes[groups[i]] = i; } } diff --git a/sffinfocommand.h b/sffinfocommand.h index 12f0180..b304e48 100644 --- a/sffinfocommand.h +++ b/sffinfocommand.h @@ -12,6 +12,8 @@ #include "command.hpp" #include "groupmap.h" +#include "oligos.h" +#include "trimoligos.h" /**********************************************************/ @@ -37,22 +39,20 @@ public: private: string sffFilename, sfftxtFilename, outputDir, accnosName, currentFileName, oligosfile, noMatchFile, groupfile; vector filenames, outputNames, accnosFileNames, oligosFileNames, groupFileNames; - bool abort, fasta, qual, trim, flow, sfftxt, hasAccnos, hasOligos, hasGroup; - int mycount, split, numFPrimers, numLinkers, numSpacers, pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, numNoMatch; + bool abort, fasta, qual, trim, flow, sfftxt, hasAccnos, hasOligos, hasGroup, reorient, pairedOligos; + int mycount, split, numBarcodes, numFPrimers, numLinkers, numSpacers, numRPrimers, pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, numNoMatch; set seqNames; - map barcodes; - map primers; GroupMap* groupMap; - vector linker, spacer, primerNameVector, barcodeNameVector, revPrimer; vector > numSplitReads; vector > filehandles; vector > filehandlesHeaders; + Oligos* oligosObject; //extract sff file functions int extractSffInfo(string, string, string); int readCommonHeader(ifstream&, CommonHeader&); int readHeader(ifstream&, Header&); - bool readSeqData(ifstream&, seqRead&, int, Header&); + bool readSeqData(ifstream&, seqRead&, int, Header&, TrimOligos*&, TrimOligos*&); int decodeName(string&, string&, string&, string); bool readOligos(string oligosFile); bool readGroup(string oligosFile); @@ -67,7 +67,7 @@ private: int parseSffTxt(); bool sanityCheck(Header&, seqRead&); int adjustCommonHeader(CommonHeader); - int findGroup(Header header, seqRead read, int& barcode, int& primer); + int findGroup(Header header, seqRead read, int& barcode, int& primer, TrimOligos*&, TrimOligos*&); int findGroup(Header header, seqRead read, int& barcode, int& primer, string); string reverseOligo(string oligo); diff --git a/sracommand.cpp b/sracommand.cpp index cde7955..d840dc8 100644 --- a/sracommand.cpp +++ b/sracommand.cpp @@ -18,6 +18,7 @@ vector SRACommand::setParameters(){ CommandParameter pfile("file", "InputTypes", "", "", "sffFastQFile-oligos", "sffFastQFile", "none","xml",false,false); parameters.push_back(pfile); CommandParameter pfastq("fastq", "InputTypes", "", "", "sffFastQFile", "sffFastQFile", "none","xml",false,false); parameters.push_back(pfastq); CommandParameter pcontact("project", "InputTypes", "", "", "none", "none", "none","xml",false,true,true); parameters.push_back(pcontact); + CommandParameter preorient("checkorient", "Boolean", "", "F", "", "", "","",false,false,true); parameters.push_back(preorient); CommandParameter pmimark("mimark", "InputTypes", "", "", "none", "none", "none","xml",false,true,true); parameters.push_back(pmimark); //choose only one multiple options CommandParameter pplatform("platform", "Multiple", "_LS454-ILLUMINA-ION_TORRENT-PACBIO_SMRT", "_LS454", "", "", "","",false,false); parameters.push_back(pplatform); @@ -51,7 +52,7 @@ string SRACommand::getHelpString(){ try { string helpString = ""; helpString += "The sra command creates the necessary files for a NCBI submission. The xml file and individual sff or fastq files parsed from the original sff or fastq file.\n"; - helpString += "The sra command parameters are: sff, fastq, file, oligos, project, mimarksfile, pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, platform, orientation, libstrategy, datatype, libsource, libselection and instrument.\n"; + helpString += "The sra command parameters are: sff, fastq, file, oligos, project, mimarksfile, pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, checkorient, platform, orientation, libstrategy, datatype, libsource, libselection and instrument.\n"; helpString += "The sff parameter is used to provide the original sff file.\n"; helpString += "The fastq parameter is used to provide the original fastq file.\n"; helpString += "The project parameter is used to provide your project file.\n"; @@ -63,6 +64,7 @@ string SRACommand::getHelpString(){ helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n"; helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n"; helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n"; + helpString += "The checkorient parameter will check look for the reverse compliment of the barcode or primer in the sequence. The default is false.\n"; helpString += "The platform parameter is used to specify platform you are using choices are: _LS454,ILLUMINA,ION_TORRENT,PACBIO_SMRT. Default=_LS454. This is a controlled vocabulary section in the XML file that will be generated.\n"; helpString += "The orientation parameter is used to specify sequence orientation. Choices are: forward and reverse. Default=forward. This is a controlled vocabulary section in the XML file that will be generated.\n"; helpString += "The instrument parameter is used to specify instrument. Choices are 454_GS-454_GS_20-454_GS_FLX-454_GS_FLX_Titanium-454_GS_Junior-Illumina_Genome_Analyzer-Illumina_Genome_Analyzer_II-Illumina_Genome_Analyzer_IIx-Illumina_HiSeq_2000-Illumina_HiSeq_1000-Illumina_MiSeq-PacBio_RS-Ion_Torrent_PGM-unspecified. Default=454_GS. This is a controlled vocabulary section in the XML file that will be generated. \n"; @@ -134,7 +136,7 @@ SRACommand::SRACommand(string option) { outputTypes["xml"] = tempOutNames; //if the user changes the input directory command factory will send this info to us in the output parameter - string inputDir = validParameter.validFile(parameters, "inputdir", false); + inputDir = validParameter.validFile(parameters, "inputdir", false); if (inputDir == "not found"){ inputDir = ""; } else { @@ -284,6 +286,8 @@ SRACommand::SRACommand(string option) { m->mothurConvert(temp, tdiffs); if(tdiffs == 0){ tdiffs = bdiffs + pdiffs + ldiffs + sdiffs; } + + checkorient = validParameter.validFile(parameters, "checkorient", false); if (temp == "not found") { temp = "F"; } } @@ -816,12 +820,21 @@ int SRACommand::readFile(map >& files){ if (m->debug) { m->mothurOut("[DEBUG]: group = " + group + ", thisFileName1 = " + thisFileName1 + ", thisFileName2 = " + thisFileName2 + ".\n"); } + if (inputDir != "") { + string path = m->hasPath(thisFileName1); + if (path == "") { thisFileName1 = inputDir + thisFileName1; } + + path = m->hasPath(thisFileName2); + if (path == "") { thisFileName2 = inputDir + thisFileName2; } + } + //check to make sure both are able to be opened ifstream in2; int openForward = m->openInputFile(thisFileName1, in2, "noerror"); //if you can't open it, try default location if (openForward == 1) { + if (m->getDefaultPath() != "") { //default path is set string tryPath = m->getDefaultPath() + m->getSimpleName(thisFileName1); m->mothurOut("Unable to open " + thisFileName1 + ". Trying default " + tryPath); m->mothurOutEndLine(); @@ -941,6 +954,7 @@ int SRACommand::parseSffFile(map >& files){ if (ldiffs != 0) { commandString += ", ldiffs=" + toString(ldiffs); } if (sdiffs != 0) { commandString += ", sdiffs=" + toString(sdiffs); } if (tdiffs != 0) { commandString += ", tdiffs=" + toString(tdiffs); } + if (m->isTrue(checkorient)) { commandString += ", checkorient=" + checkorient; } m->mothurOutEndLine(); m->mothurOut("/******************************************/"); m->mothurOutEndLine(); @@ -986,6 +1000,7 @@ int SRACommand::parseFastqFile(map >& files){ if (ldiffs != 0) { commandString += ", ldiffs=" + toString(ldiffs); } if (sdiffs != 0) { commandString += ", sdiffs=" + toString(sdiffs); } if (tdiffs != 0) { commandString += ", tdiffs=" + toString(tdiffs); } + if (m->isTrue(checkorient)) { commandString += ", checkorient=" + checkorient; } m->mothurOutEndLine(); m->mothurOut("/******************************************/"); m->mothurOutEndLine(); @@ -1072,201 +1087,40 @@ int SRACommand::checkGroups(map >& files){ //*************************************************************************************************************** int SRACommand::readOligos(){ try { - ifstream inOligos; - m->openInputFile(oligosfile, inOligos); - - string type, oligo, roligo, group; - bool hasPrimer = false; bool hasPairedBarcodes = false; pairedOligos = false; - map pairedBarcodes; - map pairedPrimers; - map barcodes; - map primers; - vector linker; - vector spacer, revPrimer; - int indexPrimer = 0; - int indexBarcode = 0; - int indexPairedPrimer = 0; - int indexPairedBarcode = 0; - set uniquePrimers; - set uniqueBarcodes; - - while(!inOligos.eof()){ - - inOligos >> type; - - if (m->debug) { m->mothurOut("[DEBUG]: reading type - " + type + ".\n"); } - - if(type[0] == '#'){ - while (!inOligos.eof()) { char c = inOligos.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there - m->gobble(inOligos); - } - else{ - m->gobble(inOligos); - //make type case insensitive - for(int i=0;i> oligo; - - if (m->debug) { m->mothurOut("[DEBUG]: reading - " + oligo + ".\n"); } - - for(int i=0;i::iterator itPrime = primers.find(oligo); - if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); } - - if (m->debug) { if (group != "") { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); }else{ m->mothurOut("[DEBUG]: no group for primer " + oligo + ".\n"); } } - - primers[oligo] = indexPrimer; indexPrimer++; - primerNameVector.push_back(group); - } - else if (type == "PRIMER"){ - m->gobble(inOligos); - - inOligos >> roligo; - - for(int i=0;idebug) { m->mothurOut("[DEBUG]: primer pair " + newPrimer.forward + " " + newPrimer.reverse + ", and group = " + group + ".\n"); } - - //check for repeat barcodes - string tempPair = oligo+roligo; - if (uniquePrimers.count(tempPair) != 0) { m->mothurOut("primer pair " + newPrimer.forward + " " + newPrimer.reverse + " is in your oligos file already."); m->mothurOutEndLine(); } - else { uniquePrimers.insert(tempPair); } - - if (m->debug) { if (group != "") { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); }else{ m->mothurOut("[DEBUG]: no group for primer pair " + newPrimer.forward + " " + newPrimer.reverse + ".\n"); } } - - pairedPrimers[indexPairedPrimer]=newPrimer; indexPairedPrimer++; - primerNameVector.push_back(group); - hasPrimer = true; - } - else if(type == "REVERSE"){ - //Sequence oligoRC("reverse", oligo); - //oligoRC.reverseComplement(); - string oligoRC = reverseOligo(oligo); - revPrimer.push_back(oligoRC); - } - else if(type == "BARCODE"){ - inOligos >> group; - - //barcode lines can look like BARCODE atgcatgc groupName - for 454 seqs - //or BARCODE atgcatgc atgcatgc groupName - for illumina data that has forward and reverse info - - string temp = ""; - while (!inOligos.eof()) { - char c = inOligos.get(); - if (c == 10 || c == 13 || c == -1){ break; } - else if (c == 32 || c == 9){;} //space or tab - else { temp += c; } - } - - //then this is illumina data with 4 columns - if (temp != "") { - hasPairedBarcodes = true; - string reverseBarcode = group; //reverseOligo(group); //reverse barcode - group = temp; - - for(int i=0;idebug) { m->mothurOut("[DEBUG]: barcode pair " + newPair.forward + " " + newPair.reverse + ", and group = " + group + ".\n"); } - //check for repeat barcodes - string tempPair = oligo+reverseBarcode; - if (uniqueBarcodes.count(tempPair) != 0) { m->mothurOut("barcode pair " + newPair.forward + " " + newPair.reverse + " is in your oligos file already, disregarding."); m->mothurOutEndLine(); } - else { uniqueBarcodes.insert(tempPair); } - - pairedBarcodes[indexPairedBarcode]=newPair; indexPairedBarcode++; - barcodeNameVector.push_back(group); - }else { - //check for repeat barcodes - map::iterator itBar = barcodes.find(oligo); - if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); } - - barcodes[oligo]=indexBarcode; indexBarcode++; - barcodeNameVector.push_back(group); - } - }else if(type == "LINKER"){ - linker.push_back(oligo); - }else if(type == "SPACER"){ - spacer.push_back(oligo); - } - else{ m->mothurOut("[WARNING]: " + type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); } - } - m->gobble(inOligos); - } - inOligos.close(); - - if (hasPairedBarcodes || hasPrimer) { - pairedOligos = true; - if ((primers.size() != 0) || (barcodes.size() != 0) || (linker.size() != 0) || (spacer.size() != 0) || (revPrimer.size() != 0)) { m->control_pressed = true; m->mothurOut("[ERROR]: cannot mix paired primers and barcodes with non paired or linkers and spacers, quitting."); m->mothurOutEndLine(); return 0; } - } - + Oligos oligos(oligosfile); - //add in potential combos - if(barcodeNameVector.size() == 0){ - barcodeNameVector.push_back(""); - } - - if(primerNameVector.size() == 0){ - primerNameVector.push_back(""); - } + if (m->control_pressed) { return false; } //error in reading oligos + + if (oligos.hasPairedBarcodes()) { pairedOligos = true; } + else { pairedOligos = false; } + set uniqueNames; //used to cleanup outputFileNames if (pairedOligos) { - for(map::iterator itBar = pairedBarcodes.begin();itBar != pairedBarcodes.end();itBar++){ - for(map::iterator itPrimer = pairedPrimers.begin();itPrimer != pairedPrimers.end(); itPrimer++){ + map barcodes = oligos.getPairedBarcodes(); + map primers = oligos.getPairedPrimers(); + for(map::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){ + for(map::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){ - string primerName = primerNameVector[itPrimer->first]; - string barcodeName = barcodeNameVector[itBar->first]; + string primerName = oligos.getPrimerName(itPrimer->first); + string barcodeName = oligos.getBarcodeName(itBar->first); if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing + else if ((primerName == "") && (barcodeName == "")) { } //do nothing else { string comboGroupName = ""; - string fastqFileName = ""; + string fastaFileName = ""; + string qualFileName = ""; + string nameFileName = ""; + string countFileName = ""; if(primerName == ""){ - comboGroupName = barcodeNameVector[itBar->first]; - } - else{ + comboGroupName = barcodeName; + }else{ if(barcodeName == ""){ - comboGroupName = primerNameVector[itPrimer->first]; + comboGroupName = primerName; } else{ - comboGroupName = barcodeNameVector[itBar->first] + "." + primerNameVector[itPrimer->first]; + comboGroupName = barcodeName + "." + primerName; } } uniqueNames.insert(comboGroupName); @@ -1290,26 +1144,31 @@ int SRACommand::readOligos(){ } } }else { + map barcodes = oligos.getBarcodes() ; + map primers = oligos.getPrimers(); for(map::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){ for(map::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){ - string primerName = primerNameVector[itPrimer->second]; - string barcodeName = barcodeNameVector[itBar->second]; + string primerName = oligos.getPrimerName(itPrimer->second); + string barcodeName = oligos.getBarcodeName(itBar->second); if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing + else if ((primerName == "") && (barcodeName == "")) { } //do nothing else { string comboGroupName = ""; - string fastqFileName = ""; + string fastaFileName = ""; + string qualFileName = ""; + string nameFileName = ""; + string countFileName = ""; if(primerName == ""){ - comboGroupName = barcodeNameVector[itBar->second]; - } - else{ + comboGroupName = barcodeName; + }else{ if(barcodeName == ""){ - comboGroupName = primerNameVector[itPrimer->second]; + comboGroupName = primerName; } else{ - comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second]; + comboGroupName = barcodeName + "." + primerName; } } uniqueNames.insert(comboGroupName); @@ -1333,10 +1192,8 @@ int SRACommand::readOligos(){ } } } - - - if (m->debug) { int count = 0; for (set::iterator it = uniqueNames.begin(); it != uniqueNames.end(); it++) { m->mothurOut("[DEBUG]: " + toString(count) + " groupName = " + *it + "\n"); count++; } } + if (m->debug) { int count = 0; for (set::iterator it = uniqueNames.begin(); it != uniqueNames.end(); it++) { m->mothurOut("[DEBUG]: " + toString(count) + " groupName = " + *it + "\n"); count++; } } return true; diff --git a/sracommand.h b/sracommand.h index 6c30bd9..dde6561 100644 --- a/sracommand.h +++ b/sracommand.h @@ -11,6 +11,7 @@ #include "command.hpp" #include "trimoligos.h" +#include "oligos.h" /**************************************************************************************************/ @@ -37,12 +38,10 @@ private: bool abort, isSFF, pairedOligos; int tdiffs, bdiffs, pdiffs, sdiffs, ldiffs; string sfffile, fastqfile, outputDir, file, oligosfile, contactfile, inputfile, mimarksfile; - string libStrategy, libSource, libSelection, libLayout, platform, instrumentModel, fileType, dataType; + string libStrategy, libSource, libSelection, libLayout, platform, instrumentModel, fileType, dataType, checkorient; string submissionName, lastName, firstName, email, centerName, centerType, description, website, orientation, packageType; - string projectName, grantId, grantTitle, grantAgency, projectTitle; + string projectName, grantId, grantTitle, grantAgency, projectTitle, inputDir; vector outputNames, Groups; - vector primerNameVector; - vector barcodeNameVector; map > Group2Barcode; map > Group2Primer; map Group2Organism; diff --git a/trimflowscommand.cpp b/trimflowscommand.cpp index 5800cde..ec724de 100644 --- a/trimflowscommand.cpp +++ b/trimflowscommand.cpp @@ -16,6 +16,7 @@ vector TrimFlowsCommand::setParameters(){ try { CommandParameter pflow("flow", "InputTypes", "", "", "none", "none", "none","flow-file",false,true,true); parameters.push_back(pflow); CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none","",false,false,true); parameters.push_back(poligos); + CommandParameter preorient("checkorient", "Boolean", "", "F", "", "", "","",false,false,true); parameters.push_back(preorient); CommandParameter pmaxhomop("maxhomop", "Number", "", "9", "", "", "","",false,false); parameters.push_back(pmaxhomop); CommandParameter pmaxflows("maxflows", "Number", "", "450", "", "", "","",false,false); parameters.push_back(pmaxflows); CommandParameter pminflows("minflows", "Number", "", "450", "", "", "","",false,false); parameters.push_back(pminflows); @@ -47,6 +48,14 @@ string TrimFlowsCommand::getHelpString(){ try { string helpString = ""; helpString += "The trim.flows command reads a flowgram file and creates .....\n"; + helpString += "The oligos parameter allows you to provide an oligos file.\n"; + helpString += "The maxhomop parameter allows you to set a maximum homopolymer length. \n"; + helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n"; + helpString += "The checkorient parameter will check look for the reverse compliment of the barcode or primer in the sequence. The default is false.\n"; + helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n"; + helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n"; + helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n"; + helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n"; helpString += "The order parameter options are A, B or I. Default=A. A = TACG and B = TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC and I = TACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGC.\n"; helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n"; helpString += "For more details please check out the wiki http://www.mothur.org/wiki/Trim.flows.\n"; @@ -231,6 +240,9 @@ TrimFlowsCommand::TrimFlowsCommand(string option) { if(oligoFileName == "") { allFiles = 0; } else { allFiles = 1; } + + temp = validParameter.validFile(parameters, "checkorient", false); if (temp == "not found") { temp = "F"; } + reorient = m->isTrue(temp); numFPrimers = 0; numRPrimers = 0; @@ -416,7 +428,15 @@ int TrimFlowsCommand::driverCreateTrim(string flowFileName, string trimFlowFileN int count = 0; bool moreSeqs = 1; - TrimOligos trimOligos(pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, revPrimer, linker, spacer); + TrimOligos* trimOligos = NULL; + if (pairedOligos) { trimOligos = new TrimOligos(pdiffs, bdiffs, 0, 0, oligos.getPairedPrimers(), oligos.getPairedBarcodes()); } + else { trimOligos = new TrimOligos(pdiffs, bdiffs, ldiffs, sdiffs, oligos.getPrimers(), oligos.getBarcodes(), oligos.getReversePrimers(), oligos.getLinkers(), oligos.getSpacers()); } + + TrimOligos* rtrimOligos = NULL; + if (reorient) { + rtrimOligos = new TrimOligos(pdiffs, bdiffs, 0, 0, oligos.getReorientedPairedPrimers(), oligos.getReorientedPairedBarcodes()); numBarcodes = oligos.getReorientedPairedBarcodes().size(); + } + while(moreSeqs) { @@ -430,7 +450,9 @@ int TrimFlowsCommand::driverCreateTrim(string flowFileName, string trimFlowFileN flowData.capFlows(maxFlows); Sequence currSeq = flowData.getSequence(); - //cout << currSeq.getName() << '\t' << currSeq.getUnaligned() << endl; + //for reorient + Sequence savedSeq(currSeq.getName(), currSeq.getAligned()); + if(!flowData.hasMinFlows(minFlows)){ //screen to see if sequence is of a minimum number of flows success = 0; trashCode += 'l'; @@ -444,7 +466,7 @@ int TrimFlowsCommand::driverCreateTrim(string flowFileName, string trimFlowFileN int barcodeIndex = 0; if(numLinkers != 0){ - success = trimOligos.stripLinker(currSeq); + success = trimOligos->stripLinker(currSeq); if(success > ldiffs) { trashCode += 'k'; } else{ currentSeqDiffs += success; } @@ -452,21 +474,21 @@ int TrimFlowsCommand::driverCreateTrim(string flowFileName, string trimFlowFileN if (m->debug) { m->mothurOut("[DEBUG]: " + currSeq.getName() + " " + currSeq.getUnaligned() + "\n"); } - if(barcodes.size() != 0){ - success = trimOligos.stripBarcode(currSeq, barcodeIndex); + if(numBarcodes != 0){ + success = trimOligos->stripBarcode(currSeq, barcodeIndex); if(success > bdiffs) { trashCode += 'b'; } else{ currentSeqDiffs += success; } } if(numSpacers != 0){ - success = trimOligos.stripSpacer(currSeq); + success = trimOligos->stripSpacer(currSeq); if(success > sdiffs) { trashCode += 's'; } else{ currentSeqDiffs += success; } } if(numFPrimers != 0){ - success = trimOligos.stripForward(currSeq, primerIndex); + success = trimOligos->stripForward(currSeq, primerIndex); if(success > pdiffs) { trashCode += 'f'; } else{ currentSeqDiffs += success; } } @@ -474,24 +496,45 @@ int TrimFlowsCommand::driverCreateTrim(string flowFileName, string trimFlowFileN if (currentSeqDiffs > tdiffs) { trashCode += 't'; } if(numRPrimers != 0){ - success = trimOligos.stripReverse(currSeq); + success = trimOligos->stripReverse(currSeq); if(!success) { trashCode += 'r'; } } - - if(trashCode.length() == 0){ - string thisGroup = ""; - if(barcodes.size() != 0){ - thisGroup = barcodeNameVector[barcodeIndex]; - if (primers.size() != 0) { - if (primerNameVector[primerIndex] != "") { - if(thisGroup != "") { - thisGroup += "." + primerNameVector[primerIndex]; - }else { - thisGroup = primerNameVector[primerIndex]; - } - } - } + + if (reorient && (trashCode != "")) { //if you failed and want to check the reverse + int thisSuccess = 0; + string thisTrashCode = ""; + int thisCurrentSeqsDiffs = 0; + + int thisBarcodeIndex = 0; + int thisPrimerIndex = 0; + //cout << currSeq.getName() << '\t' << savedSeq.getUnaligned() << endl; + if(numBarcodes != 0){ + thisSuccess = rtrimOligos->stripBarcode(savedSeq, thisBarcodeIndex); + if(thisSuccess > bdiffs) { thisTrashCode += "b"; } + else{ thisCurrentSeqsDiffs += thisSuccess; } } + //cout << currSeq.getName() << '\t' << savedSeq.getUnaligned() << endl; + if(numFPrimers != 0){ + thisSuccess = rtrimOligos->stripForward(savedSeq, thisPrimerIndex); + if(thisSuccess > pdiffs) { thisTrashCode += "f"; } + else{ thisCurrentSeqsDiffs += thisSuccess; } + } + + if (thisCurrentSeqsDiffs > tdiffs) { thisTrashCode += 't'; } + + if (thisTrashCode == "") { + trashCode = thisTrashCode; + success = thisSuccess; + currentSeqDiffs = thisCurrentSeqsDiffs; + barcodeIndex = thisBarcodeIndex; + primerIndex = thisPrimerIndex; + savedSeq.reverseComplement(); + currSeq.setAligned(savedSeq.getAligned()); + }else { trashCode += "(" + thisTrashCode + ")"; } + } + + if(trashCode.length() == 0){ + string thisGroup = oligos.getGroupName(barcodeIndex, primerIndex); int pos = thisGroup.find("ignore"); if (pos == string::npos) { @@ -534,6 +577,8 @@ int TrimFlowsCommand::driverCreateTrim(string flowFileName, string trimFlowFileN scrapFlowFile.close(); flowFile.close(); if(fasta){ fastaFile.close(); } + delete trimOligos; + if (reorient) { delete rtrimOligos; } return count; } @@ -545,199 +590,131 @@ int TrimFlowsCommand::driverCreateTrim(string flowFileName, string trimFlowFileN //*************************************************************************************************************** -void TrimFlowsCommand::getOligos(vector >& outFlowFileNames){ +int TrimFlowsCommand::getOligos(vector >& outFlowFileNames){ try { - ifstream oligosFile; - m->openInputFile(oligoFileName, oligosFile); - - string type, oligo, group; - - int indexPrimer = 0; - int indexBarcode = 0; - - while(!oligosFile.eof()){ - - oligosFile >> type; //get the first column value of the row - is it a comment or a feature we are interested in? - - if (m->debug) { m->mothurOut("[DEBUG]: type = " + type + ".\n"); } - - if(type[0] == '#'){ //igore the line because there's a comment - while (!oligosFile.eof()) { char c = oligosFile.get(); if (c == 10 || c == 13){ break; } } - m->gobble(oligosFile);// get rest of line if there's any crap there - } - else{ //there's a feature we're interested in - m->gobble(oligosFile); - for(int i=0;i> oligo; //get the DNA sequence for the feature - - for(int i=0;idebug) { m->mothurOut("[DEBUG]: oligos = " + oligo + ".\n"); } - - if(type == "FORWARD"){ //if the feature is a forward primer... - group = ""; - - while (!oligosFile.eof()) { // get rest of line in case there is a primer name = will have the name of the primer - char c = oligosFile.get(); - if (c == 10 || c == 13 || c == -1){ break; } - else if (c == 32 || c == 9){;} //space or tab - else { group += c; } - } - - //have we seen this primer already? - map::iterator itPrimer = primers.find(oligo); - if (itPrimer != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); } - - primers[oligo]=indexPrimer; indexPrimer++; - primerNameVector.push_back(group); - - } - else if(type == "REVERSE"){ - string oligoRC = reverseOligo(oligo); - revPrimer.push_back(oligoRC); - if (m->debug) { m->mothurOut("[DEBUG]: reverse oligos = " + oligoRC + ".\n"); } - } - else if(type == "BARCODE"){ - oligosFile >> group; - - //check for repeat barcodes - map::iterator itBar = barcodes.find(oligo); - if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); } - - if (m->debug) { m->mothurOut("[DEBUG]: group = " + group + ".\n"); } - - barcodes[oligo]=indexBarcode; indexBarcode++; - barcodeNameVector.push_back(group); - }else if(type == "LINKER"){ - linker.push_back(oligo); - }else if(type == "SPACER"){ - spacer.push_back(oligo); - } - else{ - m->mothurOut(type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); - } - } - - m->gobble(oligosFile); - } - oligosFile.close(); - - if(barcodeNameVector.size() == 0 && primerNameVector[0] == ""){ allFiles = 0; } - - //add in potential combos - if(barcodeNameVector.size() == 0){ - barcodes[""] = 0; - barcodeNameVector.push_back(""); - } - - if(primerNameVector.size() == 0){ - primers[""] = 0; - primerNameVector.push_back(""); - } - - outFlowFileNames.resize(barcodeNameVector.size()); + bool allBlank = false; + oligos.read(oligoFileName); + + if (m->control_pressed) { return 0; } //error in reading oligos + + if (oligos.hasPairedBarcodes()) { + pairedOligos = true; + numFPrimers = oligos.getPairedPrimers().size(); + numBarcodes = oligos.getPairedBarcodes().size(); + }else { + pairedOligos = false; + numFPrimers = oligos.getPrimers().size(); + numBarcodes = oligos.getBarcodes().size(); + } + + numLinkers = oligos.getLinkers().size(); + numSpacers = oligos.getSpacers().size(); + numRPrimers = oligos.getReversePrimers().size(); + + vector groupNames = oligos.getGroupNames(); + if (groupNames.size() == 0) { allFiles = 0; allBlank = true; } + + + outFlowFileNames.resize(oligos.getBarcodeNames().size()); for(int i=0;i::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){ - for(map::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){ - string primerName = primerNameVector[itPrimer->second]; - string barcodeName = barcodeNameVector[itBar->second]; - - if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing - else { - string comboGroupName = ""; - string fileName = ""; + if (allFiles) { + set uniqueNames; //used to cleanup outputFileNames + if (pairedOligos) { + map barcodes = oligos.getPairedBarcodes(); + map primers = oligos.getPairedPrimers(); + for(map::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){ + for(map::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){ - map variables; - variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(flowFileName)); + string primerName = oligos.getPrimerName(itPrimer->first); + string barcodeName = oligos.getBarcodeName(itBar->first); - if(primerName == ""){ - comboGroupName = barcodeNameVector[itBar->second]; - variables["[tag]"] = comboGroupName; - fileName = getOutputFileName("flow", variables); - } - else{ - if(barcodeName == ""){ - comboGroupName = primerNameVector[itPrimer->second]; - } - else{ - comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second]; + if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing + else if ((primerName == "") && (barcodeName == "")) { } //do nothing + else { + string comboGroupName = ""; + + if(primerName == ""){ + comboGroupName = barcodeName; + }else{ + if(barcodeName == ""){ + comboGroupName = primerName; + } + else{ + comboGroupName = barcodeName + "." + primerName; + } } + + + ofstream temp; + map variables; + variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(flowFileName)); variables["[tag]"] = comboGroupName; - fileName = getOutputFileName("flow", variables); + string fileName = getOutputFileName("flow", variables); + if (uniqueNames.count(fileName) == 0) { + outputNames.push_back(fileName); + outputTypes["flow"].push_back(fileName); + uniqueNames.insert(fileName); + } + + outFlowFileNames[itBar->first][itPrimer->first] = fileName; + m->openOutputFile(fileName, temp); temp.close(); } + } + } + }else { + map barcodes = oligos.getBarcodes() ; + map primers = oligos.getPrimers(); + for(map::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){ + for(map::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){ - outFlowFileNames[itBar->second][itPrimer->second] = fileName; + string primerName = oligos.getPrimerName(itPrimer->second); + string barcodeName = oligos.getBarcodeName(itBar->second); - ofstream temp; - m->openOutputFile(fileName, temp); - temp.close(); + if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing + else if ((primerName == "") && (barcodeName == "")) { } //do nothing + else { + string comboGroupName = ""; + + if(primerName == ""){ + comboGroupName = barcodeName; + }else{ + if(barcodeName == ""){ + comboGroupName = primerName; + } + else{ + comboGroupName = barcodeName + "." + primerName; + } + } + + ofstream temp; + map variables; + variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(flowFileName)); + variables["[tag]"] = comboGroupName; + string fileName = getOutputFileName("flow", variables); + if (uniqueNames.count(fileName) == 0) { + outputNames.push_back(fileName); + outputTypes["flow"].push_back(fileName); + uniqueNames.insert(fileName); + } + + outFlowFileNames[itBar->second][itPrimer->second] = fileName; + m->openOutputFile(fileName, temp); temp.close(); + } } - } - } - } - - numFPrimers = primers.size(); - numRPrimers = revPrimer.size(); - numLinkers = linker.size(); - numSpacers = spacer.size(); - - } - catch(exception& e) { - m->errorOut(e, "TrimSeqsCommand", "getOligos"); - exit(1); - } -} -//********************************************************************/ -string TrimFlowsCommand::reverseOligo(string oligo){ - try { - string reverse = ""; - - for(int i=oligo.length()-1;i>=0;i--){ - - if(oligo[i] == 'A') { reverse += 'T'; } - else if(oligo[i] == 'T'){ reverse += 'A'; } - else if(oligo[i] == 'U'){ reverse += 'A'; } - - else if(oligo[i] == 'G'){ reverse += 'C'; } - else if(oligo[i] == 'C'){ reverse += 'G'; } - - else if(oligo[i] == 'R'){ reverse += 'Y'; } - else if(oligo[i] == 'Y'){ reverse += 'R'; } - - else if(oligo[i] == 'M'){ reverse += 'K'; } - else if(oligo[i] == 'K'){ reverse += 'M'; } - - else if(oligo[i] == 'W'){ reverse += 'W'; } - else if(oligo[i] == 'S'){ reverse += 'S'; } - - else if(oligo[i] == 'B'){ reverse += 'V'; } - else if(oligo[i] == 'V'){ reverse += 'B'; } - - else if(oligo[i] == 'D'){ reverse += 'H'; } - else if(oligo[i] == 'H'){ reverse += 'D'; } + } + } - else { reverse += 'N'; } } - - - return reverse; - } + return 0; + } catch(exception& e) { - m->errorOut(e, "TrimFlowsCommand", "reverseOligo"); + m->errorOut(e, "TrimFlowsCommand", "getOligos"); exit(1); } } - /**************************************************************************************************/ vector TrimFlowsCommand::getFlowFileBreaks() { @@ -884,7 +861,7 @@ int TrimFlowsCommand::createProcessesCreateTrim(string flowFileName, string trim //Windows version shared memory, so be careful when passing variables through the trimFlowData struct. //Above fork() will clone, so memory is separate, but that's not the case with windows, ////////////////////////////////////////////////////////////////////////////////////////////////////// - + /* vector pDataArray; DWORD dwThreadIdArray[processors-1]; HANDLE hThreadArray[processors-1]; @@ -958,7 +935,7 @@ int TrimFlowsCommand::createProcessesCreateTrim(string flowFileName, string trim CloseHandle(hThreadArray[i]); delete pDataArray[i]; } - + */ #endif //append files diff --git a/trimflowscommand.h b/trimflowscommand.h index f5493eb..58d3ed0 100644 --- a/trimflowscommand.h +++ b/trimflowscommand.h @@ -16,6 +16,7 @@ #include "flowdata.h" #include "groupmap.h" #include "trimoligos.h" +#include "oligos.h" class TrimFlowsCommand : public Command { public: @@ -47,43 +48,29 @@ private: int comboStarts; vector processIDS; //processid vector lines; - - vector getFlowFileBreaks(); - int createProcessesCreateTrim(string, string, string, string, vector >); - int driverCreateTrim(string, string, string, string, vector >, linePair*); - string reverseOligo(string); - - vector outputNames; + vector outputNames; set filesToRemove; - - void getOligos(vector >&); //a rewrite of what is in trimseqscommand.h - - bool allFiles; + bool allFiles; int processors; - int numFPrimers, numRPrimers; + int numFPrimers, numRPrimers, numBarcodes; int maxFlows, minFlows, minLength, maxLength, maxHomoP, tdiffs, bdiffs, pdiffs, sdiffs, ldiffs, numLinkers, numSpacers; int numFlows; float signal, noise; - bool fasta; - string flowOrder; - - string flowFileName, oligoFileName, outputDir; + bool fasta, pairedOligos, reorient; + string flowOrder, flowFileName, oligoFileName, outputDir; + Oligos oligos; - map barcodes; - map primers; - vector revPrimer; - vector linker; - vector spacer; - - vector primerNameVector; //needed here? - vector barcodeNameVector; //needed here? - map combos; //needed here? - map groupToIndex; //needed here? + vector getFlowFileBreaks(); + int createProcessesCreateTrim(string, string, string, string, vector >); + int driverCreateTrim(string, string, string, string, vector >, linePair*); + int getOligos(vector >&); //a rewrite of what is in trimseqscommand.h + + }; -/**************************************************************************************************/ +/************************************************************************************************** //custom data structure for threads to use. // This is passed by void pointer so it can be any data type // that can be passed using a single void pointer (LPVOID). @@ -133,7 +120,7 @@ struct trimFlowData { } }; -/**************************************************************************************************/ +/************************************************************************************************** #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) #else static DWORD WINAPI MyTrimFlowThreadFunction(LPVOID lpParam){ @@ -260,6 +247,6 @@ static DWORD WINAPI MyTrimFlowThreadFunction(LPVOID lpParam){ } } #endif - +*/ #endif diff --git a/trimoligos.cpp b/trimoligos.cpp index 0f2a9cb..c7016d8 100644 --- a/trimoligos.cpp +++ b/trimoligos.cpp @@ -800,6 +800,14 @@ int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, Quality if(minDiff > bdiffs) { success = minDiff; } //no good matches else { + if (m->debug) { + string fMatches = ""; + for (int i = 0; i < minFGroup.size(); i++) { for (int j = 0; j < minFGroup[i].size(); j++) { fMatches += toString(minFGroup[i][j]) + " "; } } + m->mothurOut("[DEBUG]: forward matches = " + fMatches + "\n"); + string rMatches = ""; + for (int i = 0; i < minRGroup.size(); i++) { for (int j = 0; j < minRGroup[i].size(); j++) { rMatches += toString(minRGroup[i][j]) + " "; } } + m->mothurOut("[DEBUG]: reverse matches = " + rMatches + "\n"); + } bool foundMatch = false; vector matches; for (int i = 0; i < minFGroup.size(); i++) { @@ -817,6 +825,9 @@ int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, Quality group = matches[0]; fStart = minFPos[0]; rStart = minRPos[0]; + if (m->debug) { m->mothurOut("[DEBUG]: found match = " + toString(matches[0]) + ".\n"); } + }else { + if (m->debug) { m->mothurOut("[DEBUG]: failed too many matches.\n"); } } //we have an acceptable match for the forward and reverse, but do they match? @@ -1065,6 +1076,222 @@ int TrimOligos::stripPairedBarcode(Sequence& seq, QualityScores& qual, int& grou exit(1); } +} +//*******************************************************************/ +int TrimOligos::stripPairedBarcode(Sequence& seq, int& group){ + try { + //look for forward barcode + string rawSeq = seq.getUnaligned(); + int success = bdiffs + 1; //guilty until proven innocent + //cout << seq.getName() << endl; + //can you find the forward barcode + for(map::iterator it=ipbarcodes.begin();it!=ipbarcodes.end();it++){ + string foligo = it->second.forward; + string roligo = it->second.reverse; + //cout << it->first << '\t' << foligo << '\t' << roligo << endl; + //cout << it->first << '\t' << rawSeq.substr(0,foligo.length()) << '\t' << rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length()) << endl; + if(rawSeq.length() < foligo.length()){ //let's just assume that the barcodes are the same length + success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out + break; + } + if(rawSeq.length() < roligo.length()){ //let's just assume that the barcodes are the same length + success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out + break; + } + + if (rawSeq.length() < (foligo.length() + roligo.length())) { success = pdiffs + 10; break; } + + if((compareDNASeq(foligo, rawSeq.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length())))) { + group = it->first; + string trimmedSeq = rawSeq.substr(foligo.length()); //trim forward barcode + seq.setUnaligned(trimmedSeq.substr(0,(trimmedSeq.length()-roligo.length()))); //trim reverse barcode + success = 0; + break; + } + } + //cout << "success=" << success << endl; + //if you found the barcode or if you don't want to allow for diffs + if ((bdiffs == 0) || (success == 0)) { return success; } + else { //try aligning and see if you can find it + Alignment* alignment; + if (ifbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1)); } + else{ alignment = NULL; } + + //can you find the barcode + int minDiff = 1e6; + int minCount = 1; + vector< vector > minFGroup; + vector minFPos; + + //the pair can have duplicates, but we only want to search for the unique forward and reverses, example + /* + 1 Sarah Westcott + 2 John Westcott + 3 Anna Westcott + 4 Sarah Schloss + 5 Pat Schloss + 6 Gail Brown + 7 Pat Moore + only want to look for forward = Sarah, John, Anna, Pat, Gail + reverse = Westcott, Schloss, Brown, Moore + but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same. so both barcodes map to same group. + */ + //cout << endl << forwardSeq.getName() << endl; + for(map >::iterator it=ifbarcodes.begin();it!=ifbarcodes.end();it++){ + string oligo = it->first; + + if(rawSeq.length() < maxFBarcodeLength){ //let's just assume that the barcodes are the same length + success = bdiffs + 10; + break; + } + //cout << "before = " << oligo << '\t' << rawSeq.substr(0,oligo.length()+bdiffs) << endl; + //use needleman to align first barcode.length()+numdiffs of sequence to each barcode + alignment->alignPrimer(oligo, rawSeq.substr(0,oligo.length()+bdiffs)); + oligo = alignment->getSeqAAln(); + string temp = alignment->getSeqBAln(); + + int alnLength = oligo.length(); + + for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } } + oligo = oligo.substr(0,alnLength); + temp = temp.substr(0,alnLength); + int numDiff = countDiffs(oligo, temp); + + if (alnLength == 0) { numDiff = bdiffs + 100; } + //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl; + + if(numDiff < minDiff){ + minDiff = numDiff; + minCount = 1; + minFGroup.clear(); + minFGroup.push_back(it->second); + int tempminFPos = 0; + minFPos.clear(); + for(int i=0;isecond); + int tempminFPos = 0; + for(int i=0;i bdiffs) { success = minDiff; } //no good matches + else{ + //check for reverse match + if (alignment != NULL) { delete alignment; } + + if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRBarcodeLength+bdiffs+1)); } + else{ alignment = NULL; } + + //can you find the barcode + minDiff = 1e6; + minCount = 1; + vector< vector > minRGroup; + vector minRPos; + + string rawRSequence = reverseOligo(seq.getUnaligned()); + //cout << irbarcodes.size() << '\t' << maxRBarcodeLength << endl; + for(map >::iterator it=irbarcodes.begin();it!=irbarcodes.end();it++){ + string oligo = reverseOligo(it->first); + //cout << "r before = " << reverseOligo(oligo) << '\t' << reverseOligo(rawRSequence.substr(0,oligo.length()+bdiffs)) << endl; + if(rawRSequence.length() < maxRBarcodeLength){ //let's just assume that the barcodes are the same length + success = bdiffs + 10; + break; + } + + //use needleman to align first barcode.length()+numdiffs of sequence to each barcode + alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+bdiffs)); + oligo = alignment->getSeqAAln(); + string temp = alignment->getSeqBAln(); + + int alnLength = oligo.length(); + for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } } + oligo = oligo.substr(0,alnLength); + temp = temp.substr(0,alnLength); + int numDiff = countDiffs(oligo, temp); + if (alnLength == 0) { numDiff = bdiffs + 100; } + + //cout << "r after = " << reverseOligo(oligo) << '\t' << reverseOligo(temp) << '\t' << numDiff << endl; + if(numDiff < minDiff){ + minDiff = numDiff; + minCount = 1; + minRGroup.clear(); + minRGroup.push_back(it->second); + int tempminRPos = 0; + minRPos.clear(); + for(int i=0;isecond); + } + + } + + if(minDiff > bdiffs) { success = minDiff; } //no good matches + else { + bool foundMatch = false; + vector matches; + for (int i = 0; i < minFGroup.size(); i++) { + for (int j = 0; j < minFGroup[i].size(); j++) { + for (int k = 0; k < minRGroup.size(); k++) { + if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); } + } + } + } + + int fStart = 0; + int rStart = 0; + if (matches.size() == 1) { + foundMatch = true; + group = matches[0]; + fStart = minFPos[0]; + rStart = rawSeq.length() - minRPos[0]; + if (fStart > rStart) { foundMatch = false; } //only barcodes not a good sequence + } + + //we have an acceptable match for the forward and reverse, but do they match? + if (foundMatch) { + string trimmedSeq = rawSeq.substr(0, rStart); //trim reverse barcode + seq.setUnaligned(trimmedSeq.substr(fStart)); //trim forward barcode + success = minDiff; + //cout << "barcode = " << ipbarcodes[group].forward << '\t' << ipbarcodes[group].reverse << endl; + }else { success = bdiffs + 100; } + } + } + + if (alignment != NULL) { delete alignment; } + } + + return success; + + } + catch(exception& e) { + m->errorOut(e, "TrimOligos", "stripPairedBarcode"); + exit(1); + } + } //*******************************************************************/ int TrimOligos::stripPairedPrimers(Sequence& seq, QualityScores& qual, int& group, bool keepForward){ @@ -1295,7 +1522,223 @@ int TrimOligos::stripPairedPrimers(Sequence& seq, QualityScores& qual, int& grou } } - +//*******************************************************************/ +int TrimOligos::stripPairedPrimers(Sequence& seq, int& group){ + try { + //look for forward + string rawSeq = seq.getUnaligned(); + int success = pdiffs + 1; //guilty until proven innocent + //cout << seq.getName() << endl; + //can you find the forward + for(map::iterator it=ipprimers.begin();it!=ipprimers.end();it++){ + string foligo = it->second.forward; + string roligo = it->second.reverse; + + //cout << it->first << '\t' << foligo << '\t' << roligo << endl; + //cout << it->first << '\t' << rawSeq.substr(0,foligo.length()) << '\t' << rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length()) << endl; + if(rawSeq.length() < foligo.length()){ //let's just assume that the barcodes are the same length + success = pdiffs + 10; //if the sequence is shorter than the barcode then bail out + break; + } + if(rawSeq.length() < roligo.length()){ //let's just assume that the barcodes are the same length + success = pdiffs + 10; //if the sequence is shorter than the barcode then bail out + break; + } + + if (rawSeq.length() < (foligo.length() + roligo.length())) { success = pdiffs + 10; break; } + + if((compareDNASeq(foligo, rawSeq.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length())))) { + group = it->first; + string trimmedSeq = rawSeq.substr(foligo.length()); //trim forward barcode + seq.setUnaligned(trimmedSeq.substr(0,(trimmedSeq.length()-roligo.length()))); //trim reverse barcode + success = 0; + break; + } + } + //cout << "success=" << success << endl; + //if you found the barcode or if you don't want to allow for diffs + if ((pdiffs == 0) || (success == 0)) { return success; } + else { //try aligning and see if you can find it + Alignment* alignment; + if (ifprimers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1)); } + else{ alignment = NULL; } + + //can you find the barcode + int minDiff = 1e6; + int minCount = 1; + vector< vector > minFGroup; + vector minFPos; + + //the pair can have duplicates, but we only want to search for the unique forward and reverses, example + /* + 1 Sarah Westcott + 2 John Westcott + 3 Anna Westcott + 4 Sarah Schloss + 5 Pat Schloss + 6 Gail Brown + 7 Pat Moore + only want to look for forward = Sarah, John, Anna, Pat, Gail + reverse = Westcott, Schloss, Brown, Moore + but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same. so both barcodes map to same group. + */ + //cout << endl << forwardSeq.getName() << endl; + for(map >::iterator it=ifprimers.begin();it!=ifprimers.end();it++){ + string oligo = it->first; + + if(rawSeq.length() < maxFPrimerLength){ //let's just assume that the barcodes are the same length + success = pdiffs + 10; + break; + } + //cout << "before = " << oligo << '\t' << rawSeq.substr(0,oligo.length()+pdiffs) << endl; + //use needleman to align first barcode.length()+numdiffs of sequence to each barcode + alignment->alignPrimer(oligo, rawSeq.substr(0,oligo.length()+pdiffs)); + oligo = alignment->getSeqAAln(); + string temp = alignment->getSeqBAln(); + + int alnLength = oligo.length(); + + for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } } + oligo = oligo.substr(0,alnLength); + temp = temp.substr(0,alnLength); + int numDiff = countDiffs(oligo, temp); + + if (alnLength == 0) { numDiff = pdiffs + 100; } + //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl; + + if(numDiff < minDiff){ + minDiff = numDiff; + minCount = 1; + minFGroup.clear(); + minFGroup.push_back(it->second); + int tempminFPos = 0; + minFPos.clear(); + for(int i=0;isecond); + int tempminFPos = 0; + for(int i=0;i pdiffs) { success = minDiff; } //no good matches + else{ + //check for reverse match + if (alignment != NULL) { delete alignment; } + + if (irprimers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRPrimerLength+pdiffs+1)); } + else{ alignment = NULL; } + + //can you find the barcode + minDiff = 1e6; + minCount = 1; + vector< vector > minRGroup; + vector minRPos; + + string rawRSequence = reverseOligo(seq.getUnaligned()); + + for(map >::iterator it=irprimers.begin();it!=irprimers.end();it++){ + string oligo = reverseOligo(it->first); + //cout << "r before = " << reverseOligo(oligo) << '\t' << reverseOligo(rawRSequence.substr(0,oligo.length()+pdiffs)) << endl; + if(rawRSequence.length() < maxRPrimerLength){ //let's just assume that the barcodes are the same length + success = pdiffs + 10; + break; + } + + //use needleman to align first barcode.length()+numdiffs of sequence to each barcode + alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+pdiffs)); + oligo = alignment->getSeqAAln(); + string temp = alignment->getSeqBAln(); + + int alnLength = oligo.length(); + for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } } + oligo = oligo.substr(0,alnLength); + temp = temp.substr(0,alnLength); + int numDiff = countDiffs(oligo, temp); + if (alnLength == 0) { numDiff = pdiffs + 100; } + + //cout << "r after = " << reverseOligo(oligo) << '\t' << reverseOligo(temp) << '\t' << numDiff << endl; + if(numDiff < minDiff){ + minDiff = numDiff; + minCount = 1; + minRGroup.clear(); + minRGroup.push_back(it->second); + int tempminRPos = 0; + minRPos.clear(); + for(int i=0;isecond); + } + + } + + if(minDiff > pdiffs) { success = minDiff; } //no good matches + else { + bool foundMatch = false; + vector matches; + for (int i = 0; i < minFGroup.size(); i++) { + for (int j = 0; j < minFGroup[i].size(); j++) { + for (int k = 0; k < minRGroup.size(); k++) { + if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); } + } + } + } + + int fStart = 0; + int rStart = 0; + if (matches.size() == 1) { + foundMatch = true; + group = matches[0]; + fStart = minFPos[0]; + rStart = rawSeq.length() - minRPos[0]; + if (fStart > rStart) { foundMatch = false; } //only primers not a good sequence + } + + //we have an acceptable match for the forward and reverse, but do they match? + if (foundMatch) { + string trimmedSeq = rawSeq.substr(0, rStart); //trim reverse barcode + seq.setUnaligned(trimmedSeq.substr(fStart)); //trim forward barcode + success = minDiff; + //cout << "primer = " << ipprimers[group].forward << '\t' << ipprimers[group].reverse << endl; + }else { success = pdiffs + 100; } + } + } + + if (alignment != NULL) { delete alignment; } + } + + return success; + + } + catch(exception& e) { + m->errorOut(e, "TrimOligos", "stripPairedPrimers"); + exit(1); + } + +} //*******************************************************************/ int TrimOligos::stripForward(Sequence& forwardSeq, Sequence& reverseSeq, QualityScores& forwardQual, QualityScores& reverseQual, int& group){ try { @@ -1733,6 +2176,8 @@ int TrimOligos::stripForward(Sequence& forwardSeq, Sequence& reverseSeq, int& gr int TrimOligos::stripBarcode(Sequence& seq, int& group){ try { + if (paired) { int success = stripPairedBarcode(seq, group); return success; } + string rawSequence = seq.getUnaligned(); int success = bdiffs + 1; //guilty until proven innocent @@ -1836,6 +2281,8 @@ int TrimOligos::stripBarcode(Sequence& seq, int& group){ int TrimOligos::stripForward(Sequence& seq, int& group){ try { + if (paired) { int success = stripPairedPrimers(seq, group); return success; } + string rawSequence = seq.getUnaligned(); int success = pdiffs + 1; //guilty until proven innocent diff --git a/trimoligos.h b/trimoligos.h index a86eb9a..5f4f9f0 100644 --- a/trimoligos.h +++ b/trimoligos.h @@ -15,14 +15,6 @@ #include "sequence.hpp" #include "qualityscores.h" -struct oligosPair { - string forward; - string reverse; - - oligosPair() { forward = ""; reverse = ""; } - oligosPair(string f, string r) : forward(f), reverse(r) {} - ~oligosPair() {} -}; class TrimOligos { @@ -82,6 +74,9 @@ class TrimOligos { int stripPairedBarcode(Sequence& seq, QualityScores& qual, int& group); int stripPairedPrimers(Sequence& seq, QualityScores& qual, int& group, bool); + int stripPairedBarcode(Sequence& seq, int& group); + int stripPairedPrimers(Sequence& seq, int& group); + }; #endif diff --git a/trimseqscommand.cpp b/trimseqscommand.cpp index de607c6..0607330 100644 --- a/trimseqscommand.cpp +++ b/trimseqscommand.cpp @@ -12,6 +12,7 @@ #include "trimoligos.h" + //********************************************************************************************************************** vector TrimSeqsCommand::setParameters(){ try { @@ -65,7 +66,7 @@ string TrimSeqsCommand::getHelpString(){ helpString += "The trim.seqs command parameters are fasta, name, count, flip, checkorient, oligos, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim, keepfirst, removelast, logtransform and allfiles.\n"; helpString += "The fasta parameter is required.\n"; helpString += "The flip parameter will output the reverse compliment of your trimmed sequence. The default is false.\n"; - helpString += "The checkorient parameter will check the reverse compliment of the sequence if the barcodes and primers cannot be found in the forward. The default is false.\n"; + helpString += "The checkorient parameter will check look for the reverse compliment of the barcode or primer in the sequence. If found the sequence is flipped. The default is false.\n"; helpString += "The oligos parameter allows you to provide an oligos file.\n"; helpString += "The name parameter allows you to provide a names file with your fasta file.\n"; helpString += "The count parameter allows you to provide a count file with your fasta file.\n"; @@ -385,6 +386,7 @@ int TrimSeqsCommand::execute(){ vector > fastaFileNames; vector > qualFileNames; vector > nameFileNames; + map uniqueFastaNames;// so we don't add the same groupfile multiple times map variables; variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFile)); @@ -440,7 +442,7 @@ int TrimSeqsCommand::execute(){ string outputGroupFileName; if(oligoFile != ""){ - createGroup = getOligos(fastaFileNames, qualFileNames, nameFileNames); + createGroup = getOligos(fastaFileNames, qualFileNames, nameFileNames, uniqueFastaNames); if ((createGroup) && (countfile == "")){ map myvariables; myvariables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFile)); @@ -464,7 +466,6 @@ int TrimSeqsCommand::execute(){ if (m->control_pressed) { return 0; } if(allFiles){ - map uniqueFastaNames;// so we don't add the same groupfile multiple times map::iterator it; set namesToRemove; for(int i=0;imothurRemove(nameFileNames[i][j]); namesToRemove.insert(nameFileNames[i][j]); } - }else{ - it = uniqueFastaNames.find(fastaFileNames[i][j]); - if (it == uniqueFastaNames.end()) { - uniqueFastaNames[fastaFileNames[i][j]] = barcodeNameVector[i]; - } + uniqueFastaNames.erase(fastaFileNames[i][j]); //remove from list for group file print } } } @@ -679,40 +676,13 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string int count = 0; bool moreSeqs = 1; - int numBarcodes = barcodes.size(); TrimOligos* trimOligos = NULL; - if (pairedOligos) { trimOligos = new TrimOligos(pdiffs, bdiffs, 0, 0, pairedPrimers, pairedBarcodes); numBarcodes = pairedBarcodes.size(); } - else { trimOligos = new TrimOligos(pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, revPrimer, linker, spacer); } + if (pairedOligos) { trimOligos = new TrimOligos(pdiffs, bdiffs, 0, 0, oligos.getPairedPrimers(), oligos.getPairedBarcodes()); } + else { trimOligos = new TrimOligos(pdiffs, bdiffs, ldiffs, sdiffs, oligos.getPrimers(), oligos.getBarcodes(), oligos.getReversePrimers(), oligos.getLinkers(), oligos.getSpacers()); } TrimOligos* rtrimOligos = NULL; if (reorient) { - //create reoriented primer and barcode pairs - map rpairedPrimers, rpairedBarcodes; - for (map::iterator it = pairedPrimers.begin(); it != pairedPrimers.end(); it++) { - oligosPair tempPair(reverseOligo((it->second).reverse), (reverseOligo((it->second).forward))); //reversePrimer, rc ForwardPrimer - rpairedPrimers[it->first] = tempPair; - //cout << reverseOligo((it->second).reverse) << '\t' << (reverseOligo((it->second).forward)) << '\t' << primerNameVector[it->first] << endl; - } - for (map::iterator it = pairedBarcodes.begin(); it != pairedBarcodes.end(); it++) { - oligosPair tempPair(reverseOligo((it->second).reverse), (reverseOligo((it->second).forward))); //reverseBarcode, rc ForwardBarcode - rpairedBarcodes[it->first] = tempPair; - //cout << reverseOligo((it->second).reverse) << '\t' << (reverseOligo((it->second).forward)) << '\t' << barcodeNameVector[it->first] << endl; - } - int index = rpairedBarcodes.size(); - for (map::iterator it = barcodes.begin(); it != barcodes.end(); it++) { - oligosPair tempPair("", reverseOligo((it->first))); //reverseBarcode, rc ForwardBarcode - rpairedBarcodes[index] = tempPair; index++; - //cout << reverseOligo((it->second).reverse) << '\t' << (reverseOligo((it->second).forward)) << '\t' << barcodeNameVector[it->first] << endl; - } - - index = rpairedPrimers.size(); - for (map::iterator it = primers.begin(); it != primers.end(); it++) { - oligosPair tempPair("", reverseOligo((it->first))); //reverseBarcode, rc ForwardBarcode - rpairedPrimers[index] = tempPair; index++; - //cout << reverseOligo((it->second).reverse) << '\t' << (reverseOligo((it->second).forward)) << '\t' << primerNameVector[it->first] << endl; - } - - rtrimOligos = new TrimOligos(pdiffs, bdiffs, 0, 0, rpairedPrimers, rpairedBarcodes); numBarcodes = rpairedBarcodes.size(); + rtrimOligos = new TrimOligos(pdiffs, bdiffs, 0, 0, oligos.getReorientedPairedPrimers(), oligos.getReorientedPairedBarcodes()); numBarcodes = oligos.getReorientedPairedBarcodes().size(); } while (moreSeqs) { @@ -871,20 +841,7 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string if(trashCode.length() == 0){ string thisGroup = ""; - if (createGroup) { - if(numBarcodes != 0){ - thisGroup = barcodeNameVector[barcodeIndex]; - if (numFPrimers != 0) { - if (primerNameVector[primerIndex] != "") { - if(thisGroup != "") { - thisGroup += "." + primerNameVector[primerIndex]; - }else { - thisGroup = primerNameVector[primerIndex]; - } - } - } - } - } + if (createGroup) { thisGroup = oligos.getGroupName(barcodeIndex, primerIndex); } int pos = thisGroup.find("ignore"); if (pos == string::npos) { @@ -1189,8 +1146,8 @@ int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName tempPrimerQualFileNames, tempNameFileNames, lines[h].start, lines[h].end, qLines[h].start, qLines[h].end, m, - pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, primers, barcodes, revPrimer, linker, spacer, pairedBarcodes, pairedPrimers, pairedOligos, - primerNameVector, barcodeNameVector, createGroup, allFiles, keepforward, keepFirst, removeLast, + pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, oligoFile, + createGroup, allFiles, keepforward, keepFirst, removeLast, qWindowStep, qWindowSize, qWindowAverage, qtrim, qThreshold, qAverage, qRollAverage, logtransform, minLength, maxAmbig, maxHomoP, maxLength, flip, reorient, nameMap, nameCount); pDataArray.push_back(tempTrim); @@ -1501,9 +1458,201 @@ int TrimSeqsCommand::setLines(string filename, string qfilename) { //*************************************************************************************************************** -bool TrimSeqsCommand::getOligos(vector >& fastaFileNames, vector >& qualFileNames, vector >& nameFileNames){ +bool TrimSeqsCommand::getOligos(vector >& fastaFileNames, vector >& qualFileNames, vector >& nameFileNames, map& fastaFile2Group){ try { - ifstream inOligos; + + bool allBlank = false; + oligos.read(oligoFile); + + if (m->control_pressed) { return false; } //error in reading oligos + + if (oligos.hasPairedBarcodes()) { + pairedOligos = true; + numFPrimers = oligos.getPairedPrimers().size(); + numBarcodes = oligos.getPairedBarcodes().size(); + }else { + pairedOligos = false; + numFPrimers = oligos.getPrimers().size(); + numBarcodes = oligos.getBarcodes().size(); + } + + numLinkers = oligos.getLinkers().size(); + numSpacers = oligos.getSpacers().size(); + numRPrimers = oligos.getReversePrimers().size(); + + vector groupNames = oligos.getGroupNames(); + if (groupNames.size() == 0) { allFiles = 0; allBlank = true; } + + + fastaFileNames.resize(oligos.getBarcodeNames().size()); + for(int i=0;i uniqueNames; //used to cleanup outputFileNames + if (pairedOligos) { + map barcodes = oligos.getPairedBarcodes(); + map primers = oligos.getPairedPrimers(); + for(map::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){ + for(map::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){ + + string primerName = oligos.getPrimerName(itPrimer->first); + string barcodeName = oligos.getBarcodeName(itBar->first); + + if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing + else if ((primerName == "") && (barcodeName == "")) { } //do nothing + else { + string comboGroupName = ""; + string fastaFileName = ""; + string qualFileName = ""; + string nameFileName = ""; + string countFileName = ""; + + if(primerName == ""){ + comboGroupName = barcodeName; + }else{ + if(barcodeName == ""){ + comboGroupName = primerName; + } + else{ + comboGroupName = barcodeName + "." + primerName; + } + } + + + ofstream temp; + map variables; + variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFile)); + variables["[tag]"] = comboGroupName; + fastaFileName = getOutputFileName("fasta", variables); + if (uniqueNames.count(fastaFileName) == 0) { + outputNames.push_back(fastaFileName); + outputTypes["fasta"].push_back(fastaFileName); + uniqueNames.insert(fastaFileName); + fastaFile2Group[fastaFileName] = comboGroupName; + } + + fastaFileNames[itBar->first][itPrimer->first] = fastaFileName; + m->openOutputFile(fastaFileName, temp); temp.close(); + + if(qFileName != ""){ + variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(qFileName)); + qualFileName = getOutputFileName("qfile", variables); + if (uniqueNames.count(qualFileName) == 0) { + outputNames.push_back(qualFileName); + outputTypes["qfile"].push_back(qualFileName); + } + + qualFileNames[itBar->first][itPrimer->first] = qualFileName; + m->openOutputFile(qualFileName, temp); temp.close(); + } + + if(nameFile != ""){ + variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(nameFile)); + nameFileName = getOutputFileName("name", variables); + if (uniqueNames.count(nameFileName) == 0) { + outputNames.push_back(nameFileName); + outputTypes["name"].push_back(nameFileName); + } + + nameFileNames[itBar->first][itPrimer->first] = nameFileName; + m->openOutputFile(nameFileName, temp); temp.close(); + } + } + } + } + }else { + map barcodes = oligos.getBarcodes() ; + map primers = oligos.getPrimers(); + for(map::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){ + for(map::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){ + + string primerName = oligos.getPrimerName(itPrimer->second); + string barcodeName = oligos.getBarcodeName(itBar->second); + + if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing + else if ((primerName == "") && (barcodeName == "")) { } //do nothing + else { + string comboGroupName = ""; + string fastaFileName = ""; + string qualFileName = ""; + string nameFileName = ""; + string countFileName = ""; + + if(primerName == ""){ + comboGroupName = barcodeName; + }else{ + if(barcodeName == ""){ + comboGroupName = primerName; + } + else{ + comboGroupName = barcodeName + "." + primerName; + } + } + + + + ofstream temp; + map variables; + variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFile)); + variables["[tag]"] = comboGroupName; + fastaFileName = getOutputFileName("fasta", variables); + if (uniqueNames.count(fastaFileName) == 0) { + outputNames.push_back(fastaFileName); + outputTypes["fasta"].push_back(fastaFileName); + uniqueNames.insert(fastaFileName); + fastaFile2Group[fastaFileName] = comboGroupName; + } + + fastaFileNames[itBar->second][itPrimer->second] = fastaFileName; + m->openOutputFile(fastaFileName, temp); temp.close(); + + if(qFileName != ""){ + variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(qFileName)); + qualFileName = getOutputFileName("qfile", variables); + if (uniqueNames.count(qualFileName) == 0) { + outputNames.push_back(qualFileName); + outputTypes["qfile"].push_back(qualFileName); + } + + qualFileNames[itBar->second][itPrimer->second] = qualFileName; + m->openOutputFile(qualFileName, temp); temp.close(); + } + + if(nameFile != ""){ + variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(nameFile)); + nameFileName = getOutputFileName("name", variables); + if (uniqueNames.count(nameFileName) == 0) { + outputNames.push_back(nameFileName); + outputTypes["name"].push_back(nameFileName); + } + + nameFileNames[itBar->second][itPrimer->second] = nameFileName; + m->openOutputFile(nameFileName, temp); temp.close(); + } + } + } + } + } + + } + + + + if (allBlank) { + m->mothurOut("[WARNING]: your oligos file does not contain any group names. mothur will not create a groupfile."); m->mothurOutEndLine(); + allFiles = false; + return false; + } + + + + /*ifstream inOligos; m->openInputFile(oligoFile, inOligos); ofstream test; @@ -1520,7 +1669,7 @@ bool TrimSeqsCommand::getOligos(vector >& fastaFileNames, vector< while(!inOligos.eof()){ - inOligos >> type; + inOligos >> type; if (m->debug) { m->mothurOut("[DEBUG]: reading type - " + type + ".\n"); } @@ -1825,33 +1974,8 @@ bool TrimSeqsCommand::getOligos(vector >& fastaFileNames, vector< } } } - numFPrimers = primers.size(); - if (pairedOligos) { numFPrimers = pairedPrimers.size(); } - numRPrimers = revPrimer.size(); - numLinkers = linker.size(); - numSpacers = spacer.size(); - - bool allBlank = true; - for (int i = 0; i < barcodeNameVector.size(); i++) { - if (barcodeNameVector[i] != "") { - allBlank = false; - break; - } - } - for (int i = 0; i < primerNameVector.size(); i++) { - if (primerNameVector[i] != "") { - allBlank = false; - break; - } - } - - if (allBlank) { - m->mothurOut("[WARNING]: your oligos file does not contain any group names. mothur will not create a groupfile."); m->mothurOutEndLine(); - allFiles = false; - return false; - } - - return true; + */ + return true; } catch(exception& e) { @@ -1951,46 +2075,6 @@ bool TrimSeqsCommand::cullHomoP(Sequence& seq){ } } -//********************************************************************/ -string TrimSeqsCommand::reverseOligo(string oligo){ - try { - string reverse = ""; - - for(int i=oligo.length()-1;i>=0;i--){ - - if(oligo[i] == 'A') { reverse += 'T'; } - else if(oligo[i] == 'T'){ reverse += 'A'; } - else if(oligo[i] == 'U'){ reverse += 'A'; } - - else if(oligo[i] == 'G'){ reverse += 'C'; } - else if(oligo[i] == 'C'){ reverse += 'G'; } - - else if(oligo[i] == 'R'){ reverse += 'Y'; } - else if(oligo[i] == 'Y'){ reverse += 'R'; } - - else if(oligo[i] == 'M'){ reverse += 'K'; } - else if(oligo[i] == 'K'){ reverse += 'M'; } - - else if(oligo[i] == 'W'){ reverse += 'W'; } - else if(oligo[i] == 'S'){ reverse += 'S'; } - - else if(oligo[i] == 'B'){ reverse += 'V'; } - else if(oligo[i] == 'V'){ reverse += 'B'; } - - else if(oligo[i] == 'D'){ reverse += 'H'; } - else if(oligo[i] == 'H'){ reverse += 'D'; } - - else { reverse += 'N'; } - } - - - return reverse; - } - catch(exception& e) { - m->errorOut(e, "TrimSeqsCommand", "reverseOligo"); - exit(1); - } -} //*************************************************************************************************************** diff --git a/trimseqscommand.h b/trimseqscommand.h index 14eed17..2448669 100644 --- a/trimseqscommand.h +++ b/trimseqscommand.h @@ -16,6 +16,7 @@ #include "qualityscores.h" #include "trimoligos.h" #include "counttable.h" +#include "oligos.h" class TrimSeqsCommand : public Command { @@ -44,34 +45,22 @@ private: linePair() {} }; - bool getOligos(vector >&, vector >&, vector >&); + Oligos oligos; + bool getOligos(vector >&, vector >&, vector >&, map&); bool keepFirstTrim(Sequence&, QualityScores&); bool removeLastTrim(Sequence&, QualityScores&); bool cullLength(Sequence&); bool cullHomoP(Sequence&); bool cullAmbigs(Sequence&); - string reverseOligo(string); - bool abort, createGroup; string fastaFile, oligoFile, qFileName, groupfile, nameFile, countfile, outputDir; bool flip, allFiles, qtrim, keepforward, pairedOligos, reorient, logtransform; - int numFPrimers, numRPrimers, numLinkers, numSpacers, maxAmbig, maxHomoP, minLength, maxLength, processors, tdiffs, bdiffs, pdiffs, ldiffs, sdiffs, comboStarts; + int numBarcodes, numFPrimers, numRPrimers, numLinkers, numSpacers, maxAmbig, maxHomoP, minLength, maxLength, processors, tdiffs, bdiffs, pdiffs, ldiffs, sdiffs, comboStarts; int qWindowSize, qWindowStep, keepFirst, removeLast; double qRollAverage, qThreshold, qWindowAverage, qAverage; - vector revPrimer, outputNames; set filesToRemove; - map pairedBarcodes; - map pairedPrimers; - map barcodes; - vector groupVector; - map primers; - vector linker; - vector spacer; - map combos; - map groupToIndex; - vector primerNameVector; //needed here? - vector barcodeNameVector; //needed here? + vector groupVector,outputNames; map groupCounts; map nameMap; map nameCount; //for countfile name -> repCount @@ -93,34 +82,23 @@ private: struct trimData { unsigned long long start, end; MothurOut* m; - string filename, qFileName, trimFileName, scrapFileName, trimQFileName, scrapQFileName, trimNFileName, scrapNFileName, trimCFileName, scrapCFileName, groupFileName, nameFile, countfile; + string filename, qFileName, oligosfile, trimFileName, scrapFileName, trimQFileName, scrapQFileName, trimNFileName, scrapNFileName, trimCFileName, scrapCFileName, groupFileName, nameFile, countfile; vector > fastaFileNames; vector > qualFileNames; vector > nameFileNames; unsigned long long lineStart, lineEnd, qlineStart, qlineEnd; - bool flip, allFiles, qtrim, keepforward, createGroup, pairedOligos, reorient, logtransform; - int numFPrimers, numRPrimers, numLinkers, numSpacers, maxAmbig, maxHomoP, minLength, maxLength, tdiffs, bdiffs, pdiffs, ldiffs, sdiffs; + bool flip, allFiles, qtrim, keepforward, createGroup, reorient, logtransform; + int maxAmbig, maxHomoP, minLength, maxLength, tdiffs, bdiffs, pdiffs, ldiffs, sdiffs; int qWindowSize, qWindowStep, keepFirst, removeLast, count; double qRollAverage, qThreshold, qWindowAverage, qAverage; - vector revPrimer; - map barcodes; - map primers; map nameCount; - vector linker; - vector spacer; - map combos; - vector primerNameVector; - vector barcodeNameVector; map groupCounts; map nameMap; map groupMap; - map pairedBarcodes; - map pairedPrimers; trimData(){} trimData(string fn, string qn, string nf, string cf, string tn, string sn, string tqn, string sqn, string tnn, string snn, string tcn, string scn,string gn, vector > ffn, vector > qfn, vector > nfn, unsigned long long lstart, unsigned long long lend, unsigned long long qstart, unsigned long long qend, MothurOut* mout, - int pd, int bd, int ld, int sd, int td, map pri, map bar, vector revP, vector li, vector spa, map pbr, map ppr, bool po, - vector priNameVector, vector barNameVector, bool cGroup, bool aFiles, bool keepF, int keepfi, int removeL, + int pd, int bd, int ld, int sd, int td, string o, bool cGroup, bool aFiles, bool keepF, int keepfi, int removeL, int WindowStep, int WindowSize, int WindowAverage, bool trim, double Threshold, double Average, double RollAverage, bool lt, int minL, int maxA, int maxH, int maxL, bool fli, bool reo, map nm, map ncount) { filename = fn; @@ -145,22 +123,13 @@ struct trimData { qlineEnd = qend; m = mout; nameCount = ncount; + oligosfile = o; pdiffs = pd; bdiffs = bd; ldiffs = ld; sdiffs = sd; tdiffs = td; - barcodes = bar; - pairedPrimers = ppr; - pairedBarcodes = pbr; - pairedOligos = po; - primers = pri; numFPrimers = primers.size(); - revPrimer = revP; numRPrimers = revPrimer.size(); - linker = li; numLinkers = linker.size(); - spacer = spa; numSpacers = spacer.size(); - primerNameVector = priNameVector; - barcodeNameVector = barNameVector; createGroup = cGroup; allFiles = aFiles; @@ -260,37 +229,30 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){ } } + int numBarcodes, numLinkers, numSpacers, numRPrimers, numFPrimers; + bool pairedOligos; + Oligos oligos(pDataArray->oligosfile); + if (oligos.hasPairedBarcodes()) { + pairedOligos = true; + numFPrimers = oligos.getPairedPrimers().size(); + numBarcodes = oligos.getPairedBarcodes().size(); + }else { + pairedOligos = false; + numFPrimers = oligos.getPrimers().size(); + numBarcodes = oligos.getBarcodes().size(); + } + + numLinkers = oligos.getLinkers().size(); + numSpacers = oligos.getSpacers().size(); + numRPrimers = oligos.getReversePrimers().size(); + TrimOligos* trimOligos = NULL; - int numBarcodes = pDataArray->barcodes.size(); - if (pDataArray->pairedOligos) { trimOligos = new TrimOligos(pDataArray->pdiffs, pDataArray->bdiffs, 0, 0, pDataArray->pairedPrimers, pDataArray->pairedBarcodes); numBarcodes = pDataArray->pairedBarcodes.size(); pDataArray->numFPrimers = pDataArray->pairedPrimers.size(); } - else { trimOligos = new TrimOligos(pDataArray->pdiffs, pDataArray->bdiffs, pDataArray->ldiffs, pDataArray->sdiffs, pDataArray->primers, pDataArray->barcodes, pDataArray->revPrimer, pDataArray->linker, pDataArray->spacer); } + if (pairedOligos) { trimOligos = new TrimOligos(pDataArray->pdiffs, pDataArray->bdiffs, 0, 0, oligos.getPairedPrimers(), oligos.getPairedBarcodes()); } + else { trimOligos = new TrimOligos(pDataArray->pdiffs, pDataArray->bdiffs, pDataArray->ldiffs, pDataArray->sdiffs, oligos.getPrimers(), oligos.getBarcodes(), oligos.getReversePrimers(), oligos.getLinkers(), oligos.getSpacers()); } TrimOligos* rtrimOligos = NULL; if (pDataArray->reorient) { - //create reoriented primer and barcode pairs - map rpairedPrimers, rpairedBarcodes; - for (map::iterator it = pDataArray->pairedPrimers.begin(); it != pDataArray->pairedPrimers.end(); it++) { - oligosPair tempPair(trimOligos->reverseOligo((it->second).reverse), (trimOligos->reverseOligo((it->second).forward))); //reversePrimer, rc ForwardPrimer - rpairedPrimers[it->first] = tempPair; - } - for (map::iterator it = pDataArray->pairedBarcodes.begin(); it != pDataArray->pairedBarcodes.end(); it++) { - oligosPair tempPair(trimOligos->reverseOligo((it->second).reverse), (trimOligos->reverseOligo((it->second).forward))); //reverseBarcode, rc ForwardBarcode - rpairedBarcodes[it->first] = tempPair; - } - - int index = rpairedBarcodes.size(); - for (map::iterator it = pDataArray->barcodes.begin(); it != pDataArray->barcodes.end(); it++) { - oligosPair tempPair("", trimOligos->reverseOligo((it->first))); //reverseBarcode, rc ForwardBarcode - rpairedBarcodes[index] = tempPair; index++; - } - - index = rpairedPrimers.size(); - for (map::iterator it = pDataArray->primers.begin(); it != pDataArray->primers.end(); it++) { - oligosPair tempPair("", trimOligos->reverseOligo((it->first))); //reverseBarcode, rc ForwardBarcode - rpairedPrimers[index] = tempPair; index++; - } - - rtrimOligos = new TrimOligos(pDataArray->pdiffs, pDataArray->bdiffs, 0, 0, rpairedPrimers, rpairedBarcodes); numBarcodes = rpairedBarcodes.size(); + rtrimOligos = new TrimOligos(pDataArray->pdiffs, pDataArray->bdiffs, 0, 0, oligos.getReorientedPairedPrimers(), oligos.getReorientedPairedBarcodes()); numBarcodes = oligos.getReorientedPairedBarcodes().size(); } pDataArray->count = 0; @@ -329,7 +291,7 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){ int barcodeIndex = 0; int primerIndex = 0; - if(pDataArray->numLinkers != 0){ + if(numLinkers != 0){ success = trimOligos->stripLinker(currSeq, currQual); if(success > pDataArray->ldiffs) { trashCode += 'k'; } else{ currentSeqsDiffs += success; } @@ -341,14 +303,14 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){ else{ currentSeqsDiffs += success; } } - if(pDataArray->numSpacers != 0){ + if(numSpacers != 0){ success = trimOligos->stripSpacer(currSeq, currQual); if(success > pDataArray->sdiffs) { trashCode += 's'; } else{ currentSeqsDiffs += success; } } - if(pDataArray->numFPrimers != 0){ + if(numFPrimers != 0){ success = trimOligos->stripForward(currSeq, currQual, primerIndex, pDataArray->keepforward); if(success > pDataArray->pdiffs) { trashCode += 'f'; } else{ currentSeqsDiffs += success; } @@ -356,7 +318,7 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){ if (currentSeqsDiffs > pDataArray->tdiffs) { trashCode += 't'; } - if(pDataArray->numRPrimers != 0){ + if(numRPrimers != 0){ success = trimOligos->stripReverse(currSeq, currQual); if(!success) { trashCode += 'r'; } } @@ -375,7 +337,7 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){ else{ thisCurrentSeqsDiffs += thisSuccess; } } - if(pDataArray->numFPrimers != 0){ + if(numFPrimers != 0){ thisSuccess = rtrimOligos->stripForward(savedSeq, savedQual, thisPrimerIndex, pDataArray->keepforward); if(thisSuccess > pDataArray->pdiffs) { thisTrashCode += 'f'; } else{ thisCurrentSeqsDiffs += thisSuccess; } @@ -478,20 +440,7 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){ if(trashCode.length() == 0){ string thisGroup = ""; - if (pDataArray->createGroup) { - if(numBarcodes != 0){ - thisGroup = pDataArray->barcodeNameVector[barcodeIndex]; - if (pDataArray->numFPrimers != 0) { - if (pDataArray->primerNameVector[primerIndex] != "") { - if(thisGroup != "") { - thisGroup += "." + pDataArray->primerNameVector[primerIndex]; - }else { - thisGroup = pDataArray->primerNameVector[primerIndex]; - } - } - } - } - } + if (pDataArray->createGroup) { thisGroup = oligos.getGroupName(barcodeIndex, primerIndex); } int pos = thisGroup.find("ignore"); if (pos == string::npos) { -- 2.39.2