219C1DE01552C4BD004209F9 /* newcommandtemplate.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 219C1DDF1552C4BD004209F9 /* newcommandtemplate.cpp */; };
219C1DE41559BCCF004209F9 /* getcoremicrobiomecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 219C1DE31559BCCD004209F9 /* getcoremicrobiomecommand.cpp */; };
483C952E188F0CAD0035E7B7 /* sharedrjsd.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 483C952D188F0CAD0035E7B7 /* sharedrjsd.cpp */; };
+ 4893DE2918EEF28100C615DF /* oligos.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4893DE2818EEF28100C615DF /* oligos.cpp */; };
48A85BAD18E1AF2000199B6F /* getmimarkspackagecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48A85BAC18E1AF2000199B6F /* getmimarkspackagecommand.cpp */; };
48E981CF189C38FB0042BE9D /* mergesfffilecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48E981CE189C38FB0042BE9D /* mergesfffilecommand.cpp */; };
7E6BE10A12F710D8007ADDBE /* refchimeratest.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 7E6BE10912F710D8007ADDBE /* refchimeratest.cpp */; };
219C1DE51559BCF2004209F9 /* getcoremicrobiomecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = getcoremicrobiomecommand.h; sourceTree = "<group>"; };
483C952C188F0C960035E7B7 /* sharedrjsd.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = sharedrjsd.h; sourceTree = "<group>"; };
483C952D188F0CAD0035E7B7 /* sharedrjsd.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = sharedrjsd.cpp; sourceTree = "<group>"; };
+ 4893DE2718EEF27300C615DF /* oligos.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = oligos.h; sourceTree = "<group>"; };
+ 4893DE2818EEF28100C615DF /* oligos.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = oligos.cpp; sourceTree = "<group>"; };
48A85BAB18E1AF0600199B6F /* getmimarkspackagecommand.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = getmimarkspackagecommand.h; sourceTree = "<group>"; };
48A85BAC18E1AF2000199B6F /* getmimarkspackagecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = getmimarkspackagecommand.cpp; sourceTree = "<group>"; };
48E981CD189C38E60042BE9D /* mergesfffilecommand.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = mergesfffilecommand.h; sourceTree = "<group>"; };
A7E9B74012D37EC400DA6239 /* listvector.hpp */,
A7E9B75F12D37EC400DA6239 /* nameassignment.cpp */,
A7E9B76012D37EC400DA6239 /* nameassignment.hpp */,
+ 4893DE2718EEF27300C615DF /* oligos.h */,
+ 4893DE2818EEF28100C615DF /* oligos.cpp */,
A7E9B77712D37EC400DA6239 /* ordervector.cpp */,
A7E9B77812D37EC400DA6239 /* ordervector.hpp */,
A7E9B79F12D37EC400DA6239 /* qualityscores.cpp */,
A7E9B8DC12D37EC400DA6239 /* gotohoverlap.cpp in Sources */,
A7E9B8DD12D37EC400DA6239 /* gower.cpp in Sources */,
A7E9B8DE12D37EC400DA6239 /* groupmap.cpp in Sources */,
+ 4893DE2918EEF28100C615DF /* oligos.cpp in Sources */,
A7E9B8DF12D37EC400DA6239 /* hamming.cpp in Sources */,
A7E9B8E012D37EC400DA6239 /* hcluster.cpp in Sources */,
A7E9B8E112D37EC400DA6239 /* hclustercommand.cpp in Sources */,
//**********************************************************************************************************************
AlignCommand::AlignCommand(string option) {
try {
- abort = false; calledHelp = false;
+ abort = false; calledHelp = false;
ReferenceDB* rdb = ReferenceDB::getInstance();
//allow user to run help
try {
int num = 0;
processIDS.resize(0);
+ bool recalc = false;
+
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
int process = 1;
exit(0);
}else {
- m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
- for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
- exit(0);
+ m->mothurOut("[ERROR]: unable to spawn the number of processes you requested, reducing number to " + toString(process) + "\n"); processors = process;
+ for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
+ recalc = true;
+ break;
}
}
+ if (recalc) {
+ for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear();
+ vector<unsigned long long> positions;
+ positions = m->divideFile(filename, processors);
+ for (int i = 0; i < (positions.size()-1); i++) { lines.push_back(new linePair(positions[i], positions[(i+1)])); }
+
+ num = 0;
+ processIDS.resize(0);
+ process = 1;
+
+ while (process != processors) {
+ pid_t pid = fork();
+
+ if (pid > 0) {
+ processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
+ process++;
+ }else if (pid == 0){
+ num = driver(lines[process], alignFileName + toString(m->mothurGetpid(process)) + ".temp", reportFileName + toString(m->mothurGetpid(process)) + ".temp", accnosFName + m->mothurGetpid(process) + ".temp", filename);
+
+ //pass numSeqs to parent
+ ofstream out;
+ string tempFile = alignFileName + toString(m->mothurGetpid(process)) + ".num.temp";
+ m->openOutputFile(tempFile, out);
+ out << num << endl;
+ out.close();
+
+ exit(0);
+ }else {
+ m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
+ for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
+ exit(0);
+ }
+ }
+ }
+
//do my part
num = driver(lines[0], alignFileName, reportFileName, accnosFName, filename);
#include "getmimarkspackagecommand.h"
#include "groupmap.h"
+
//**********************************************************************************************************************
vector<string> GetMIMarksPackageCommand::setParameters(){
try {
outputTypes["tsv"] = tempOutNames;
//if the user changes the input directory command factory will send this info to us in the output parameter
- string inputDir = validParameter.validFile(parameters, "inputdir", false);
+ inputDir = validParameter.validFile(parameters, "inputdir", false);
if (inputDir == "not found"){ inputDir = ""; }
else {
if (abort == true) { if (calledHelp) { return 0; } return 2; }
- if (oligosfile != "") { readOligos(); }
+ if (oligosfile != "") { Oligos oligos(oligosfile); Groups = oligos.getGroupNames(); }
else if (file != "") { readFile(); }
else { GroupMap groupmap(groupfile); groupmap.readMap(); Groups = groupmap.getNamesOfGroups(); }
- for (set<string>::iterator it = uniqueNames.begin(); it != uniqueNames.end(); it++) { Groups.push_back(*it); }
-
if (outputDir == "") { outputDir += m->hasPath(inputfile); }
map<string, string> variables;
variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(inputfile));
}
}
//***************************************************************************************************************
-int GetMIMarksPackageCommand::readOligos(){
- try {
- ifstream inOligos;
- m->openInputFile(oligosfile, inOligos);
-
- string type, oligo, roligo, group;
- vector<string> primerNameVector, barcodeNameVector;
- set<string> uniquePrimers;
- set<string> uniqueBarcodes;
-
- while(!inOligos.eof()){
-
- inOligos >> type;
-
- if (m->debug) { m->mothurOut("[DEBUG]: reading type - " + type + ".\n"); }
-
- if(type[0] == '#'){
- while (!inOligos.eof()) { char c = inOligos.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there
- m->gobble(inOligos);
- }
- else{
- m->gobble(inOligos);
- //make type case insensitive
- for(int i=0;i<type.length();i++){ type[i] = toupper(type[i]); }
-
- inOligos >> oligo;
-
- if (m->debug) { m->mothurOut("[DEBUG]: reading - " + oligo + ".\n"); }
-
- for(int i=0;i<oligo.length();i++){
- oligo[i] = toupper(oligo[i]);
- if(oligo[i] == 'U') { oligo[i] = 'T'; }
- }
-
- if(type == "FORWARD"){
- group = "";
-
- // get rest of line in case there is a primer name
- while (!inOligos.eof()) {
- char c = inOligos.get();
- if (c == 10 || c == 13 || c == -1){ break; }
- else if (c == 32 || c == 9){;} //space or tab
- else { group += c; }
- }
-
- primerNameVector.push_back(group);
- }
- else if (type == "PRIMER"){
- m->gobble(inOligos);
-
- inOligos >> roligo;
-
- for(int i=0;i<roligo.length();i++){
- roligo[i] = toupper(roligo[i]);
- if(roligo[i] == 'U') { roligo[i] = 'T'; }
- }
-
- group = "";
-
- // get rest of line in case there is a primer name
- while (!inOligos.eof()) {
- char c = inOligos.get();
- if (c == 10 || c == 13 || c == -1){ break; }
- else if (c == 32 || c == 9){;} //space or tab
- else { group += c; }
- }
-
- primerNameVector.push_back(group);
- }else if(type == "BARCODE"){
- inOligos >> group;
-
- //barcode lines can look like BARCODE atgcatgc groupName - for 454 seqs
- //or BARCODE atgcatgc atgcatgc groupName - for illumina data that has forward and reverse info
-
- string temp = "";
- while (!inOligos.eof()) {
- char c = inOligos.get();
- if (c == 10 || c == 13 || c == -1){ break; }
- else if (c == 32 || c == 9){;} //space or tab
- else { temp += c; }
- }
-
- //then this is illumina data with 4 columns
- if (temp != "") {
-
- string reverseBarcode = group; //reverseOligo(group); //reverse barcode
- group = temp;
-
- barcodeNameVector.push_back(group);
- }else {
- barcodeNameVector.push_back(group);
- }
- }
- }
- m->gobble(inOligos);
- }
- inOligos.close();
-
- //add in potential combos
- if(barcodeNameVector.size() == 0){
- barcodeNameVector.push_back("");
- }
-
- if(primerNameVector.size() == 0){
- primerNameVector.push_back("");
- }
-
-
- for(int i = 0; i < barcodeNameVector.size(); i++){
- for(int j = 0; j < primerNameVector.size(); j++){
-
- string primerName = primerNameVector[j];
- string barcodeName = barcodeNameVector[i];
-
- if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing
- else if ((primerName == "") && (barcodeName == "")) { }
- else {
- string comboGroupName = "";
-
- if(primerName == ""){
- comboGroupName = barcodeNameVector[i];
- }
- else{
- if(barcodeName == ""){
- comboGroupName = primerNameVector[j];
- }
- else{
- comboGroupName = barcodeNameVector[i] + "." + primerNameVector[j];
- }
- }
- uniqueNames.insert(comboGroupName);
- }
- }
- }
-
-
-
- if (m->debug) { int count = 0; for (set<string>::iterator it = uniqueNames.begin(); it != uniqueNames.end(); it++) { m->mothurOut("[DEBUG]: " + toString(count) + " groupName = " + *it + "\n"); count++; } }
-
-
- return true;
-
- }
- catch(exception& e) {
- m->errorOut(e, "GetMIMarksPackageCommand", "readOligos");
- exit(1);
- }
-}
-//**********************************************************************************************************************
+
// going to have to rework this to allow for other options --
/*
file option 1
int GetMIMarksPackageCommand::readFile(){
try {
- //vector<string> theseFiles;
+ Oligos oligos;
inputfile = file;
ifstream in;
if (m->debug) { m->mothurOut("[DEBUG]: group = " + group + ", thisFileName1 = " + thisFileName1 + ", thisFileName2 = " + thisFileName2 + ".\n"); }
+ if (inputDir != "") {
+ string path = m->hasPath(thisFileName2);
+ if (path == "") { thisFileName2 = inputDir + thisFileName2; }
+
+ path = m->hasPath(thisFileName1);
+ if (path == "") { thisFileName1 = inputDir + thisFileName1; }
+ }
+
//check to make sure both are able to be opened
ifstream in2;
int openForward = m->openInputFile(thisFileName1, in2, "noerror");
if ((pieces.size() == 2) && (openForward != 1) && (openReverse != 1)) { //good pair and sff or fastq and oligos
oligosfile = thisFileName2;
if (m->debug) { m->mothurOut("[DEBUG]: about to read oligos\n"); }
- readOligos();
+ oligos.read(oligosfile);
}else if((pieces.size() == 3) && (openForward != 1) && (openReverse != 1)) { //good pair and paired read
Groups.push_back(group);
}
}
in.close();
+ Groups = oligos.getGroupNames();
+
inputfile = file;
return 0;
#define Mothur_getmimarkspackagecommand_h
#include "command.hpp"
+#include "oligos.h"
/**************************************************************************************************/
private:
bool abort, requiredonly;
- string oligosfile, groupfile, package, inputfile, file;
+ string oligosfile, groupfile, package, inputfile, file, inputDir;
string outputDir;
vector<string> outputNames, Groups;
- set<string> uniqueNames;
- int readOligos();
int readFile();
};
CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(ppdiffs);
CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(pbdiffs);
CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ptdiffs);
-
+ CommandParameter preorient("checkorient", "Boolean", "", "F", "", "", "","",false,false,true); parameters.push_back(preorient);
CommandParameter palign("align", "Multiple", "needleman-gotoh", "needleman", "", "", "","",false,false); parameters.push_back(palign);
CommandParameter pallfiles("allfiles", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pallfiles);
CommandParameter ptrimoverlap("trimoverlap", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(ptrimoverlap);
//helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n";
helpString += "The match parameter allows you to specify the bonus for having the same base. The default is 1.0.\n";
helpString += "The mistmatch parameter allows you to specify the penalty for having different bases. The default is -1.0.\n";
+ helpString += "The checkorient parameter will check look for the reverse compliment of the barcode or primer in the sequence. If found the sequence is flipped. The default is false.\n";
helpString += "The deltaq parameter allows you to specify the delta allowed between quality scores of a mismatched base. For example in the overlap, if deltaq=5 and in the alignment seqA, pos 200 has a quality score of 30 and the same position in seqB has a quality score of 20, you take the base from seqA (30-20 >= 5). If the quality score in seqB is 28 then the base in the consensus will be an N (30-28<5) The default is 6.\n";
helpString += "The gapopen parameter allows you to specify the penalty for opening a gap in an alignment. The default is -2.0.\n";
helpString += "The gapextend parameter allows you to specify the penalty for extending a gap in an alignment. The default is -1.0.\n";
//if the user changes the input directory command factory will send this info to us in the output parameter
- string inputDir = validParameter.validFile(parameters, "inputdir", false);
+ inputDir = validParameter.validFile(parameters, "inputdir", false);
if (inputDir == "not found"){ inputDir = ""; }
else {
string path;
abort=true;
}
+ temp = validParameter.validFile(parameters, "checkorient", false); if (temp == "not found") { temp = "F"; }
+ reorient = m->isTrue(temp);
+
//fill convert table - goes from solexa to sanger. Used fq_all2std.pl as a reference.
for (int i = -64; i < 65; i++) {
char temp = (char) ((int)(33 + 10*log(1+pow(10,(i/10.0)))/log(10)+0.499));
groupCounts.clear();
groupMap.clear();
vector<vector<string> > fastaFileNames;
+ map<string, string> uniqueFastaNames;// so we don't add the same groupfile multiple times
createOligosGroup = false;
+ oligos = new Oligos();
+ numBarcodes = 0; numFPrimers= 0; numLinkers= 0; numSpacers = 0; numRPrimers = 0;
string outputGroupFileName;
map<string, string> variables;
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir = m->hasPath(filesToProcess[l][0][0]); }
variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(filesToProcess[l][0][0]));
variables["[tag]"] = "";
- if(oligosfile != ""){ createOligosGroup = getOligos(fastaFileNames, variables["[filename]"]); }
+ if(oligosfile != ""){ createOligosGroup = getOligos(fastaFileNames, variables["[filename]"], uniqueFastaNames); }
if (createOligosGroup || createFileGroup) {
outputGroupFileName = getOutputFileName("group",variables);
}
//remove temp fasta and qual files
for (int i = 0; i < processors; i++) { for(int j = 0; j < filesToProcess[l][i].size(); j++) { m->mothurRemove(filesToProcess[l][i][j]); } }
- if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
+ if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } delete oligos; return 0; }
if(allFiles){
- map<string, string> uniqueFastaNames;// so we don't add the same groupfile multiple times
+ // so we don't add the same groupfile multiple times
map<string, string>::iterator it;
set<string> namesToRemove;
for(int i=0;i<fastaFileNames.size();i++){
if(m->isBlank(fastaFileNames[i][j])){
m->mothurRemove(fastaFileNames[i][j]);
namesToRemove.insert(fastaFileNames[i][j]);
- }else{
- it = uniqueFastaNames.find(fastaFileNames[i][j]);
- if (it == uniqueFastaNames.end()) {
- uniqueFastaNames[fastaFileNames[i][j]] = barcodeNameVector[i];
- }
+ uniqueFastaNames.erase(fastaFileNames[i][j]); //remove from list for group file print
}
}
}
m->openInputFile(it->first, in);
ofstream out;
- string thisGroupName = thisOutputDir + m->getRootName(m->getSimpleName(it->first));
- thisGroupName += getOutputFileName("group",variables); outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName);
+ variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(it->first));
+ string thisGroupName = getOutputFileName("group",variables); outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName);
m->openOutputFile(thisGroupName, out);
while (!in.eof()){
}
}
m->mothurOut("Done.\n");
+ delete oligos;
}
m->mothurOut("It took " + toString(time(NULL) - start) + " secs to process " + toString(numReads) + " sequences.\n");
}
}
- contigsData* tempcontig = new contigsData(group, files[h], (outputFasta + extension), (outputScrapFasta + extension), (outputMisMatches + extension), align, m, match, misMatch, gapOpen, gapExtend, insert, deltaq, barcodes, primers, tempFASTAFileNames, barcodeNameVector, primerNameVector, pdiffs, bdiffs, tdiffs, createOligosGroup, createFileGroup, allFiles, trimOverlap, h);
+ contigsData* tempcontig = new contigsData(group, files[h], (outputFasta + extension), (outputScrapFasta + extension), (outputMisMatches + extension), align, m, match, misMatch, gapOpen, gapExtend, insert, deltaq, tempFASTAFileNames, oligosfile, reorient, pdiffs, bdiffs, tdiffs, createOligosGroup, createFileGroup, allFiles, trimOverlap, h);
pDataArray.push_back(tempcontig);
hThreadArray[h] = CreateThread(NULL, 0, MyContigsThreadFunction, pDataArray[h], 0, &dwThreadIdArray[h]);
m->openOutputFile(outputMisMatches, outMisMatch);
outMisMatch << "Name\tLength\tOverlap_Length\tOverlap_Start\tOverlap_End\tMisMatches\tNum_Ns\n";
- TrimOligos trimOligos(pdiffs, bdiffs, 0, 0, primers, barcodes);
+ TrimOligos trimOligos(pdiffs, bdiffs, 0, 0, oligos->getPairedPrimers(), oligos->getPairedBarcodes());
+
+ TrimOligos* rtrimOligos = NULL;
+ if (reorient) {
+ rtrimOligos = new TrimOligos(pdiffs, bdiffs, 0, 0, oligos->getReorientedPairedPrimers(), oligos->getReorientedPairedBarcodes()); numBarcodes = oligos->getReorientedPairedBarcodes().size();
+ }
while ((!inFFasta.eof()) && (!inRFasta.eof())) {
int barcodeIndex = 0;
int primerIndex = 0;
+ Sequence savedFSeq(fSeq.getName(), fSeq.getAligned()); Sequence savedRSeq(rSeq.getName(), rSeq.getAligned());
+ Sequence savedFindex(findexBarcode.getName(), findexBarcode.getAligned()); Sequence savedRIndex(rindexBarcode.getName(), rindexBarcode.getAligned());
+ QualityScores* savedFQual = NULL; QualityScores* savedRQual = NULL;
+ if (thisfqualfile != "") {
+ savedFQual = new QualityScores(fQual->getName(), fQual->getQualityScores());
+ savedRQual = new QualityScores(rQual->getName(), rQual->getQualityScores());
+ }
- if(barcodes.size() != 0){
+ if(numBarcodes != 0){
if (thisfqualfile != "") {
if ((thisfindexfile != "") || (thisrindexfile != "")) {
success = trimOligos.stripBarcode(findexBarcode, rindexBarcode, *fQual, *rQual, barcodeIndex);
else{ currentSeqsDiffs += success; }
}
- if(primers.size() != 0){
+ if(numFPrimers != 0){
if (thisfqualfile != "") {
success = trimOligos.stripForward(fSeq, rSeq, *fQual, *rQual, primerIndex);
}else {
if (currentSeqsDiffs > tdiffs) { trashCode += 't'; }
+ if (reorient && (trashCode != "")) { //if you failed and want to check the reverse
+ int thisSuccess = 0;
+ string thisTrashCode = "";
+ int thisCurrentSeqsDiffs = 0;
+
+ int thisBarcodeIndex = 0;
+ int thisPrimerIndex = 0;
+
+ if(numBarcodes != 0){
+ if (thisfqualfile != "") {
+ if ((thisfindexfile != "") || (thisrindexfile != "")) {
+ thisSuccess = rtrimOligos->stripBarcode(savedFindex, savedRIndex, *savedFQual, *savedRQual, thisBarcodeIndex);
+ }else {
+ thisSuccess = rtrimOligos->stripBarcode(savedFSeq, savedRSeq, *savedFQual, *savedRQual, thisBarcodeIndex);
+ }
+ }else {
+ thisSuccess = rtrimOligos->stripBarcode(savedFSeq, savedRSeq, thisBarcodeIndex);
+ }
+ if(thisSuccess > bdiffs) { thisTrashCode += 'b'; }
+ else{ thisCurrentSeqsDiffs += thisSuccess; }
+ }
+
+ if(numFPrimers != 0){
+ if (thisfqualfile != "") {
+ thisSuccess = rtrimOligos->stripForward(savedFSeq, savedRSeq, *savedFQual, *savedRQual, thisPrimerIndex);
+ }else {
+ thisSuccess = rtrimOligos->stripForward(savedFSeq, savedRSeq, thisPrimerIndex);
+ }
+ if(thisSuccess > pdiffs) { thisTrashCode += 'f'; }
+ else{ thisCurrentSeqsDiffs += thisSuccess; }
+ }
+
+ if (thisCurrentSeqsDiffs > tdiffs) { thisTrashCode += 't'; }
+
+ if (thisTrashCode == "") {
+ trashCode = thisTrashCode;
+ success = thisSuccess;
+ currentSeqsDiffs = thisCurrentSeqsDiffs;
+ barcodeIndex = thisBarcodeIndex;
+ primerIndex = thisPrimerIndex;
+ savedFSeq.reverseComplement();
+ savedRSeq.reverseComplement();
+ fSeq.setAligned(savedFSeq.getAligned());
+ rSeq.setAligned(savedRSeq.getAligned());
+ if(thisfqualfile != ""){
+ savedFQual->flipQScores(); savedRQual->flipQScores();
+ fQual->setScores(savedFQual->getScores()); rQual->setScores(savedRQual->getScores());
+ }
+ }else { trashCode += "(" + thisTrashCode + ")"; }
+ }
+
+
//flip the reverse reads
rSeq.reverseComplement();
if (thisfqualfile != "") { rQual->flipQScores(); }
if (thisfqualfile != "") {
scores1 = fQual->getQualityScores();
scores2 = rQual->getQualityScores();
- delete fQual; delete rQual;
+ delete fQual; delete rQual; delete savedFQual; delete savedRQual;
}
// if (num < 5) { cout << fSeq.getStartPos() << '\t' << fSeq.getEndPos() << '\t' << rSeq.getStartPos() << '\t' << rSeq.getEndPos() << endl; }
if (m->debug) { m->mothurOut(fSeq.getName()); }
if (createOligosGroup) {
- if(barcodes.size() != 0){
- string thisGroup = barcodeNameVector[barcodeIndex];
- if (primers.size() != 0) {
- if (primerNameVector[primerIndex] != "") {
- if(thisGroup != "") {
- thisGroup += "." + primerNameVector[primerIndex];
- }else {
- thisGroup = primerNameVector[primerIndex];
- }
- }
- }
-
+ string thisGroup = oligos->getGroupName(barcodeIndex, primerIndex);
if (m->debug) { m->mothurOut(", group= " + thisGroup + "\n"); }
int pos = thisGroup.find("ignore");
if (it == groupCounts.end()) { groupCounts[thisGroup] = 1; }
else { groupCounts[it->first] ++; }
}else { ignore = true; }
-
- }
}else if (createFileGroup) {
int pos = group.find("ignore");
if (pos == string::npos) {
}else { ignore = true; }
}
if (m->debug) { m->mothurOut("\n"); }
-
+
if(allFiles && !ignore){
ofstream output;
m->openOutputFileAppend(fastaFileNames[barcodeIndex][primerIndex], output);
inRQual.close();
}
delete alignment;
+ if (reorient) { delete rtrimOligos; }
if (m->control_pressed) { m->mothurRemove(outputFasta); m->mothurRemove(outputScrapFasta);m->mothurRemove(outputMisMatches); }
if (m->debug) { m->mothurOut("[DEBUG]: group = " + group + ", forward = " + forward + ", reverse = " + reverse + ", forwardIndex = " + findex + ", reverseIndex = " + rindex + ".\n"); }
+ if (inputDir != "") {
+ string path = m->hasPath(forward);
+ if (path == "") { forward = inputDir + forward; }
+
+ path = m->hasPath(reverse);
+ if (path == "") { reverse = inputDir + reverse; }
+
+ if (findex != "") {
+ path = m->hasPath(findex);
+ if (path == "") { findex = inputDir + findex; }
+ }
+
+ if (rindex != "") {
+ path = m->hasPath(rindex);
+ if (path == "") { rindex = inputDir + rindex; }
+ }
+ }
+
//check to make sure both are able to be opened
ifstream in2;
int openForward = m->openInputFile(forward, in2, "noerror");
//illumina data requires paired forward and reverse data
//BARCODE atgcatgc atgcatgc groupName
//PRIMER atgcatgc atgcatgc groupName
-//PRIMER atgcatgc atgcatgc
-bool MakeContigsCommand::getOligos(vector<vector<string> >& fastaFileNames, string rootname){
+//PRIMER atgcatgc atgcatgc
+bool MakeContigsCommand::getOligos(vector<vector<string> >& fastaFileNames, string rootname, map<string, string>& fastaFile2Group){
try {
- ifstream in;
- m->openInputFile(oligosfile, in);
-
- ofstream test;
-
- string type, foligo, roligo, group;
+ if (m->debug) { m->mothurOut("[DEBUG]: oligosfile = " + oligosfile + "\n"); }
- int indexPrimer = 0;
- int indexBarcode = 0;
- set<string> uniquePrimers;
- set<string> uniqueBarcodes;
-
- while(!in.eof()){
-
- in >> type;
+ bool allBlank = false;
+ oligos->read(oligosfile, false);
+
+ if (m->control_pressed) { return false; } //error in reading oligos
+
+ if (oligos->hasPairedBarcodes()) {
+ numFPrimers = oligos->getPairedPrimers().size();
+ numBarcodes = oligos->getPairedBarcodes().size();
+ }else {
+ m->mothurOut("[ERROR]: make.contigs requires paired barcodes and primers. You can set one end to NONE if you are using an index file.\n"); m->control_pressed = true;
+ }
- if (m->debug) { m->mothurOut("[DEBUG]: reading type - " + type + ".\n"); }
-
- if(type[0] == '#'){
- while (!in.eof()) { char c = in.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there
- m->gobble(in);
- }
- else{
- m->gobble(in);
- //make type case insensitive
- for(int i=0;i<type.length();i++){ type[i] = toupper(type[i]); }
-
- in >> foligo;
-
- if (m->debug) { m->mothurOut("[DEBUG]: reading - " + foligo + ".\n"); }
-
- for(int i=0;i<foligo.length();i++){
- foligo[i] = toupper(foligo[i]);
- if(foligo[i] == 'U') { foligo[i] = 'T'; }
- }
-
- if(type == "PRIMER"){
- m->gobble(in);
-
- in >> roligo;
-
- for(int i=0;i<roligo.length();i++){
- roligo[i] = toupper(roligo[i]);
- if(roligo[i] == 'U') { roligo[i] = 'T'; }
- }
- //roligo = reverseOligo(roligo);
-
- if (m->debug) { m->mothurOut("[DEBUG]: reading - " + roligo + ".\n"); }
-
- group = "";
-
- // get rest of line in case there is a primer name
- while (!in.eof()) {
- char c = in.get();
- if (c == 10 || c == 13 || c == -1){ break; }
- else if (c == 32 || c == 9){;} //space or tab
- else { group += c; }
- }
-
- oligosPair newPrimer(foligo, roligo);
-
- if (m->debug) { m->mothurOut("[DEBUG]: primer pair " + newPrimer.forward + " " + newPrimer.reverse + ", and group = " + group + ".\n"); }
-
- //check for repeat barcodes
- string tempPair = foligo+roligo;
- if (uniquePrimers.count(tempPair) != 0) { m->mothurOut("primer pair " + newPrimer.forward + " " + newPrimer.reverse + " is in your oligos file already."); m->mothurOutEndLine(); }
- else { uniquePrimers.insert(tempPair); }
-
- if (m->debug) { if (group != "") { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); }else{ m->mothurOut("[DEBUG]: no group for primer pair " + newPrimer.forward + " " + newPrimer.reverse + ".\n"); } }
- primers[indexPrimer]=newPrimer; indexPrimer++;
- primerNameVector.push_back(group);
- }else if(type == "BARCODE"){
- m->gobble(in);
-
- in >> roligo;
-
- for(int i=0;i<roligo.length();i++){
- roligo[i] = toupper(roligo[i]);
- if(roligo[i] == 'U') { roligo[i] = 'T'; }
- }
- //roligo = reverseOligo(roligo);
-
- oligosPair newPair(foligo, roligo);
-
- if ((foligo == "NONE") || (roligo == "NONE")) { if (!noneOk) { m->mothurOut("[ERROR]: barcodes must be paired unless you are using an index file.\n"); m->control_pressed = true; } }
-
- group = "";
- while (!in.eof()) {
- char c = in.get();
- if (c == 10 || c == 13 || c == -1){ break; }
- else if (c == 32 || c == 9){;} //space or tab
- else { group += c; }
- }
-
- if (m->debug) { m->mothurOut("[DEBUG]: barcode pair " + newPair.forward + " " + newPair.reverse + ", and group = " + group + ".\n"); }
-
- //check for repeat barcodes
- string tempPair = foligo+roligo;
- if (uniqueBarcodes.count(tempPair) != 0) { m->mothurOut("barcode pair " + newPair.forward + " " + newPair.reverse + " is in your oligos file already, disregarding."); m->mothurOutEndLine(); }
- else { uniqueBarcodes.insert(tempPair); }
-
- barcodes[indexBarcode]=newPair; indexBarcode++;
- barcodeNameVector.push_back(group);
- }else if(type == "LINKER"){
- linker.push_back(foligo);
- m->mothurOut("[WARNING]: make.contigs is not setup to remove linkers, ignoring.\n");
- }else if(type == "SPACER"){
- spacer.push_back(foligo);
- m->mothurOut("[WARNING]: make.contigs is not setup to remove spacers, ignoring.\n");
- }
- else{ m->mothurOut("[WARNING]: " + type + " is not recognized as a valid type. Choices are primer, barcode, linker and spacer. Ignoring " + foligo + "."); m->mothurOutEndLine(); }
- }
- m->gobble(in);
- }
- in.close();
-
- if(barcodeNameVector.size() == 0 && primerNameVector[0] == ""){ allFiles = 0; }
-
- //add in potential combos
- if(barcodeNameVector.size() == 0){
- oligosPair temp("", "");
- barcodes[0] = temp;
- barcodeNameVector.push_back("");
- }
-
- if(primerNameVector.size() == 0){
- oligosPair temp("", "");
- primers[0] = temp;
- primerNameVector.push_back("");
- }
-
- fastaFileNames.resize(barcodeNameVector.size());
+ if (m->control_pressed) { return false; }
+
+ numLinkers = oligos->getLinkers().size();
+ numSpacers = oligos->getSpacers().size();
+ numRPrimers = oligos->getReversePrimers().size();
+ if (numLinkers != 0) { m->mothurOut("[WARNING]: make.contigs is not setup to remove linkers, ignoring.\n"); }
+ if (numSpacers != 0) { m->mothurOut("[WARNING]: make.contigs is not setup to remove spacers, ignoring.\n"); }
+
+ vector<string> groupNames = oligos->getGroupNames();
+ if (groupNames.size() == 0) { allFiles = 0; allBlank = true; }
+
+
+ fastaFileNames.resize(oligos->getBarcodeNames().size());
for(int i=0;i<fastaFileNames.size();i++){
- fastaFileNames[i].assign(primerNameVector.size(), "");
+ for(int j=0;j<oligos->getPrimerNames().size();j++){ fastaFileNames[i].push_back(""); }
}
-
- if(allFiles){
- set<string> uniqueNames; //used to cleanup outputFileNames
- for(map<int, oligosPair>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
- for(map<int, oligosPair>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
-
- string primerName = primerNameVector[itPrimer->first];
- string barcodeName = barcodeNameVector[itBar->first];
+
+ if (allFiles) {
+ set<string> uniqueNames; //used to cleanup outputFileNames
+ map<int, oligosPair> barcodes = oligos->getPairedBarcodes();
+ map<int, oligosPair> primers = oligos->getPairedPrimers();
+ for(map<int, oligosPair>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
+ for(map<int, oligosPair>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
- if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing
- else {
+ string primerName = oligos->getPrimerName(itPrimer->first);
+ string barcodeName = oligos->getBarcodeName(itBar->first);
+
+ if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing
+ else if ((primerName == "") && (barcodeName == "")) { } //do nothing
+ else {
string comboGroupName = "";
string fastaFileName = "";
string qualFileName = "";
string countFileName = "";
if(primerName == ""){
- comboGroupName = barcodeNameVector[itBar->first];
- }
- else{
+ comboGroupName = barcodeName;
+ }else{
if(barcodeName == ""){
- comboGroupName = primerNameVector[itPrimer->first];
+ comboGroupName = primerName;
}
else{
- comboGroupName = barcodeNameVector[itBar->first] + "." + primerNameVector[itPrimer->first];
+ comboGroupName = barcodeName + "." + primerName;
}
}
ofstream temp;
- fastaFileName = rootname + comboGroupName + ".fasta";
+ map<string, string> variables;
+ variables["[filename]"] = rootname;
+ variables["[tag]"] = comboGroupName;
+ fastaFileName = getOutputFileName("fasta", variables);
if (uniqueNames.count(fastaFileName) == 0) {
outputNames.push_back(fastaFileName);
outputTypes["fasta"].push_back(fastaFileName);
uniqueNames.insert(fastaFileName);
+ fastaFile2Group[fastaFileName] = comboGroupName;
}
fastaFileNames[itBar->first][itPrimer->first] = fastaFileName;
m->openOutputFile(fastaFileName, temp); temp.close();
+ cout << fastaFileName << endl;
}
- }
- }
- }
-
- bool allBlank = true;
- for (int i = 0; i < barcodeNameVector.size(); i++) {
- if (barcodeNameVector[i] != "") {
- allBlank = false;
- break;
- }
- }
- for (int i = 0; i < primerNameVector.size(); i++) {
- if (primerNameVector[i] != "") {
- allBlank = false;
- break;
- }
- }
-
- if (allBlank) {
- m->mothurOut("[WARNING]: your oligos file does not contain any group names. mothur will not create a groupfile."); m->mothurOutEndLine();
- allFiles = false;
- return false;
- }
-
- return true;
-
+ }
+ }
+ }
+
+ if (allBlank) {
+ m->mothurOut("[WARNING]: your oligos file does not contain any group names. mothur will not create a groupfile."); m->mothurOutEndLine();
+ allFiles = false;
+ return false;
+ }
+
+ return true;
+
}
catch(exception& e) {
m->errorOut(e, "MakeContigsCommand", "getOligos");
exit(1);
}
}
-//********************************************************************/
-string MakeContigsCommand::reverseOligo(string oligo){
- try {
- string reverse = "";
-
- for(int i=oligo.length()-1;i>=0;i--){
-
- if(oligo[i] == 'A') { reverse += 'T'; }
- else if(oligo[i] == 'T'){ reverse += 'A'; }
- else if(oligo[i] == 'U'){ reverse += 'A'; }
-
- else if(oligo[i] == 'G'){ reverse += 'C'; }
- else if(oligo[i] == 'C'){ reverse += 'G'; }
-
- else if(oligo[i] == 'R'){ reverse += 'Y'; }
- else if(oligo[i] == 'Y'){ reverse += 'R'; }
-
- else if(oligo[i] == 'M'){ reverse += 'K'; }
- else if(oligo[i] == 'K'){ reverse += 'M'; }
-
- else if(oligo[i] == 'W'){ reverse += 'W'; }
- else if(oligo[i] == 'S'){ reverse += 'S'; }
-
- else if(oligo[i] == 'B'){ reverse += 'V'; }
- else if(oligo[i] == 'V'){ reverse += 'B'; }
-
- else if(oligo[i] == 'D'){ reverse += 'H'; }
- else if(oligo[i] == 'H'){ reverse += 'D'; }
-
- else { reverse += 'N'; }
- }
-
-
- return reverse;
- }
- catch(exception& e) {
- m->errorOut(e, "MakeContigsCommand", "reverseOligo");
- exit(1);
- }
-}
//**********************************************************************************************************************
vector<int> MakeContigsCommand::convertQual(string qual) {
try {
vector<int> qualScores;
bool negativeScores = false;
- for (int i = 0; i < qual.length(); i++) {
+ for (int i = 0; i < qual.length(); i++) {
int temp = 0;
temp = int(qual[i]);
#include "blastalign.hpp"
#include "noalign.hpp"
#include "trimoligos.h"
+#include "oligos.h"
struct fastqRead {
vector<int> scores;
void help() { m->mothurOut(getHelpString()); }
private:
- bool abort, allFiles, trimOverlap, createFileGroup, createOligosGroup, makeCount, noneOk;
- string outputDir, ffastqfile, rfastqfile, align, oligosfile, rfastafile, ffastafile, rqualfile, fqualfile, findexfile, rindexfile, file, format;
+ bool abort, allFiles, trimOverlap, createFileGroup, createOligosGroup, makeCount, noneOk, reorient;
+ string outputDir, ffastqfile, rfastqfile, align, oligosfile, rfastafile, ffastafile, rqualfile, fqualfile, findexfile, rindexfile, file, format, inputDir;
float match, misMatch, gapOpen, gapExtend;
- int processors, longestBase, insert, tdiffs, bdiffs, pdiffs, ldiffs, sdiffs, deltaq;
+ int processors, longestBase, insert, tdiffs, bdiffs, pdiffs, ldiffs, sdiffs, deltaq, numBarcodes, numFPrimers, numLinkers, numSpacers, numRPrimers;
vector<string> outputNames;
-
- map<int, oligosPair> barcodes;
- map<int, oligosPair> primers;
- vector<string> linker;
- vector<string> spacer;
- vector<string> primerNameVector;
- vector<string> barcodeNameVector;
+ Oligos* oligos;
vector<char> convertTable;
map<string, int> groupCounts;
//bool checkReads(fastqRead&, fastqRead&, string, string);
int createProcesses(vector< vector<string> >, string, string, string, vector<vector<string> >, int);
int driver(vector<string>, string, string, string, vector<vector<string> >, int, string);
- bool getOligos(vector<vector<string> >&, string);
- string reverseOligo(string);
+ bool getOligos(vector<vector<string> >&, string, map<string, string>&);
vector<pairFastqRead> getReads(bool ignoref, bool ignorer, fastqRead forward, fastqRead reverse, map<string, fastqRead>& uniques, bool);
vector<pairFastqRead> mergeReads(vector<pairFastqRead> frReads, vector<pairFastqRead> friReads, map<string, pairFastqRead>& pairUniques);
};
string outputFasta;
string outputScrapFasta;
string outputMisMatches;
- string align, group;
+ string align, group, oligosfile;
vector<string> files;
vector<vector<string> > fastaFileNames;
MothurOut* m;
float match, misMatch, gapOpen, gapExtend;
int count, insert, threadID, pdiffs, bdiffs, tdiffs, deltaq;
- bool allFiles, createOligosGroup, createFileGroup, done, trimOverlap;
+ bool allFiles, createOligosGroup, createFileGroup, done, trimOverlap, reorient;
map<string, int> groupCounts;
map<string, string> groupMap;
- vector<string> primerNameVector;
- vector<string> barcodeNameVector;
- map<int, oligosPair> barcodes;
- map<int, oligosPair> primers;
+
contigsData(){}
- contigsData(string g, vector<string> f, string of, string osf, string om, string al, MothurOut* mout, float ma, float misMa, float gapO, float gapE, int thr, int delt, map<int, oligosPair> br, map<int, oligosPair> pr, vector<vector<string> > ffn, vector<string>bnv, vector<string> pnv, int pdf, int bdf, int tdf, bool cg, bool cfg, bool all, bool to, int tid) {
+ contigsData(string g, vector<string> f, string of, string osf, string om, string al, MothurOut* mout, float ma, float misMa, float gapO, float gapE, int thr, int delt, vector<vector<string> > ffn, string olig, bool ro, int pdf, int bdf, int tdf, bool cg, bool cfg, bool all, bool to, int tid) {
files = f;
outputFasta = of;
outputMisMatches = om;
count = 0;
outputScrapFasta = osf;
fastaFileNames = ffn;
- barcodes = br;
- primers = pr;
- barcodeNameVector = bnv;
- primerNameVector = pnv;
+ oligosfile = olig;
pdiffs = pdf;
bdiffs = bdf;
tdiffs = tdf;
createFileGroup = cfg;
threadID = tid;
deltaq = delt;
+ reorient = ro;
done=false;
}
};
outMisMatch << "Name\tLength\tOverlap_Length\tOverlap_Start\tOverlap_End\tMisMatches\tNum_Ns\n";
- TrimOligos trimOligos(pDataArray->pdiffs, pDataArray->bdiffs, 0, 0, pDataArray->primers, pDataArray->barcodes);
+ Oligos oligos;
+ if (pDataArray->oligosfile != "") { oligos.read(pDataArray->oligosfile); }
+ int numFPrimers = oligos.getPairedPrimers().size();
+ int numBarcodes = oligos.getPairedBarcodes().size();
+
+
+ TrimOligos trimOligos(pDataArray->pdiffs, pDataArray->bdiffs, 0, 0, oligos.getPairedPrimers(), oligos.getPairedBarcodes());
+ TrimOligos* rtrimOligos = NULL;
+ if (pDataArray->reorient) {
+ rtrimOligos = new TrimOligos(pDataArray->pdiffs, pDataArray->bdiffs, 0, 0, oligos.getReorientedPairedPrimers(), oligos.getReorientedPairedBarcodes()); numBarcodes = oligos.getReorientedPairedBarcodes().size();
+ }
while ((!inFFasta.eof()) && (!inRFasta.eof())) {
int barcodeIndex = 0;
int primerIndex = 0;
+ Sequence savedFSeq(fSeq.getName(), fSeq.getAligned()); Sequence savedRSeq(rSeq.getName(), rSeq.getAligned());
+ Sequence savedFindex(findexBarcode.getName(), findexBarcode.getAligned()); Sequence savedRIndex(rindexBarcode.getName(), rindexBarcode.getAligned());
+ QualityScores* savedFQual = NULL; QualityScores* savedRQual = NULL;
+ if (thisfqualfile != "") {
+ savedFQual = new QualityScores(fQual->getName(), fQual->getQualityScores());
+ savedRQual = new QualityScores(rQual->getName(), rQual->getQualityScores());
+ }
- if(pDataArray->barcodes.size() != 0){
+ if(numBarcodes != 0){
if (thisfqualfile != "") {
if ((thisfindexfile != "") || (thisrindexfile != "")) {
success = trimOligos.stripBarcode(findexBarcode, rindexBarcode, *fQual, *rQual, barcodeIndex);
else{ currentSeqsDiffs += success; }
}
- if(pDataArray->primers.size() != 0){
+ if(numFPrimers != 0){
if (thisfqualfile != "") {
success = trimOligos.stripForward(fSeq, rSeq, *fQual, *rQual, primerIndex);
}else {
if (currentSeqsDiffs > pDataArray->tdiffs) { trashCode += 't'; }
+ if (pDataArray->reorient && (trashCode != "")) { //if you failed and want to check the reverse
+ int thisSuccess = 0;
+ string thisTrashCode = "";
+ int thisCurrentSeqsDiffs = 0;
+
+ int thisBarcodeIndex = 0;
+ int thisPrimerIndex = 0;
+
+ if(numBarcodes != 0){
+ if (thisfqualfile != "") {
+ if ((thisfindexfile != "") || (thisrindexfile != "")) {
+ thisSuccess = rtrimOligos->stripBarcode(savedFindex, savedRIndex, *savedFQual, *savedRQual, thisBarcodeIndex);
+ }else {
+ thisSuccess = rtrimOligos->stripBarcode(savedFSeq, savedRSeq, *savedFQual, *savedRQual, thisBarcodeIndex);
+ }
+ }else {
+ thisSuccess = rtrimOligos->stripBarcode(savedFSeq, savedRSeq, thisBarcodeIndex);
+ }
+ if(thisSuccess > pDataArray->bdiffs) { thisTrashCode += 'b'; }
+ else{ thisCurrentSeqsDiffs += thisSuccess; }
+ }
+
+ if(numFPrimers != 0){
+ if (thisfqualfile != "") {
+ thisSuccess = rtrimOligos->stripForward(savedFSeq, savedRSeq, *savedFQual, *savedRQual, thisPrimerIndex);
+ }else {
+ thisSuccess = rtrimOligos->stripForward(savedFSeq, savedRSeq, thisPrimerIndex);
+ }
+ if(thisSuccess > pDataArray->pdiffs) { thisTrashCode += 'f'; }
+ else{ thisCurrentSeqsDiffs += thisSuccess; }
+ }
+
+ if (thisCurrentSeqsDiffs > pDataArray->tdiffs) { thisTrashCode += 't'; }
+
+ if (thisTrashCode == "") {
+ trashCode = thisTrashCode;
+ success = thisSuccess;
+ currentSeqsDiffs = thisCurrentSeqsDiffs;
+ barcodeIndex = thisBarcodeIndex;
+ primerIndex = thisPrimerIndex;
+ savedFSeq.reverseComplement();
+ savedRSeq.reverseComplement();
+ fSeq.setAligned(savedFSeq.getAligned());
+ rSeq.setAligned(savedRSeq.getAligned());
+ if(thisfqualfile != ""){
+ savedFQual->flipQScores(); savedRQual->flipQScores();
+ fQual->setScores(savedFQual->getScores()); rQual->setScores(savedRQual->getScores());
+ }
+ }else { trashCode += "(" + thisTrashCode + ")"; }
+ }
+
//flip the reverse reads
rSeq.reverseComplement();
if (thisfqualfile != "") { rQual->flipQScores(); }
if (thisfqualfile != "") {
scores1 = fQual->getQualityScores();
scores2 = rQual->getQualityScores();
- delete fQual; delete rQual;
+ delete fQual; delete rQual; delete savedFQual; delete savedRQual;
}
int overlapStart = fSeq.getStartPos();
if(trashCode.length() == 0){
bool ignore = false;
if (pDataArray->createOligosGroup) {
- if(pDataArray->barcodes.size() != 0){
- string thisGroup = pDataArray->barcodeNameVector[barcodeIndex];
- if (pDataArray->primers.size() != 0) {
- if (pDataArray->primerNameVector[primerIndex] != "") {
- if(thisGroup != "") {
- thisGroup += "." + pDataArray->primerNameVector[primerIndex];
- }else {
- thisGroup = pDataArray->primerNameVector[primerIndex];
- }
- }
- }
-
- if (pDataArray->m->debug) { pDataArray->m->mothurOut(", group= " + thisGroup + "\n"); }
-
- int pos = thisGroup.find("ignore");
- if (pos == string::npos) {
- pDataArray->groupMap[fSeq.getName()] = thisGroup;
+ string thisGroup = oligos.getGroupName(barcodeIndex, primerIndex);
+ if (pDataArray->m->debug) { pDataArray->m->mothurOut(", group= " + thisGroup + "\n"); }
+
+ int pos = thisGroup.find("ignore");
+ if (pos == string::npos) {
+ pDataArray->groupMap[fSeq.getName()] = thisGroup;
- map<string, int>::iterator it = pDataArray->groupCounts.find(thisGroup);
- if (it == pDataArray->groupCounts.end()) { pDataArray->groupCounts[thisGroup] = 1; }
- else { pDataArray->groupCounts[it->first] ++; }
- }else { ignore = true; }
- }
+ map<string, int>::iterator it = pDataArray->groupCounts.find(thisGroup);
+ if (it == pDataArray->groupCounts.end()) { pDataArray->groupCounts[thisGroup] = 1; }
+ else { pDataArray->groupCounts[it->first] ++; }
+ }else { ignore = true; }
}else if (pDataArray->createFileGroup) {
int pos = pDataArray->group.find("ignore");
if (pos == string::npos) {
inRQual.close();
}
delete alignment;
+ if (pDataArray->reorient) { delete rtrimOligos; }
pDataArray->done = true;
if (pDataArray->m->control_pressed) { pDataArray->m->mothurRemove(pDataArray->outputFasta); pDataArray->m->mothurRemove(pDataArray->outputMisMatches); pDataArray->m->mothurRemove(pDataArray->outputScrapFasta); }
int end;
};
+/************************************************************/
+struct oligosPair {
+ string forward;
+ string reverse;
+
+ oligosPair() { forward = ""; reverse = ""; }
+ oligosPair(string f, string r) : forward(f), reverse(r) {}
+ ~oligosPair() {}
+};
+
/************************************************************/
struct seqPriorityNode {
int numIdentical;
exit(1);
}
}
+
/**************************************************************************************************/
bool isSubset(vector<string>, vector<string>); //bigSet, subset
int checkName(string&);
map<string, vector<string> > parseClasses(string);
+
//math operation
double max(vector<double>&); //returns largest value in vector
--- /dev/null
+//
+// oligos.cpp
+// Mothur
+//
+// Created by Sarah Westcott on 4/4/14.
+// Copyright (c) 2014 Schloss Lab. All rights reserved.
+//
+
+#include "oligos.h"
+
+/**************************************************************************************************/
+
+Oligos::Oligos(string o){
+ try {
+ m = MothurOut::getInstance();
+ hasPPrimers = false; hasPBarcodes = false; pairedOligos = false; reversePairs = true;
+ indexBarcode = 0; indexPairedBarcode = 0; indexPrimer = 0; indexPairedPrimer = 0;
+ oligosfile = o;
+ readOligos();
+ if (pairedOligos) {
+ numBarcodes = pairedBarcodes.size();
+ numFPrimers = pairedPrimers.size();
+ }else {
+ numBarcodes = barcodes.size();
+ numFPrimers = primers.size();
+ }
+ }
+ catch(exception& e) {
+ m->errorOut(e, "Oligos", "Oligos");
+ exit(1);
+ }
+}
+/**************************************************************************************************/
+
+Oligos::Oligos(){
+ try {
+ m = MothurOut::getInstance();
+ hasPPrimers = false; hasPBarcodes = false; pairedOligos = false; reversePairs = true;
+ indexBarcode = 0; indexPairedBarcode = 0; indexPrimer = 0; indexPairedPrimer = 0;
+ numFPrimers = 0; numBarcodes = 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "Oligos", "Oligos");
+ exit(1);
+ }
+}
+/**************************************************************************************************/
+int Oligos::read(string o){
+ try {
+ oligosfile = o;
+ readOligos();
+ if (pairedOligos) {
+ numBarcodes = pairedBarcodes.size();
+ numFPrimers = pairedPrimers.size();
+ }else {
+ numBarcodes = barcodes.size();
+ numFPrimers = primers.size();
+ }
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "Oligos", "read");
+ exit(1);
+ }
+}
+/**************************************************************************************************/
+int Oligos::read(string o, bool reverse){
+ try {
+ oligosfile = o;
+ reversePairs = reverse;
+ readOligos();
+ if (pairedOligos) {
+ numBarcodes = pairedBarcodes.size();
+ numFPrimers = pairedPrimers.size();
+ }else {
+ numBarcodes = barcodes.size();
+ numFPrimers = primers.size();
+ }
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "Oligos", "read");
+ exit(1);
+ }
+}
+//***************************************************************************************************************
+
+int Oligos::readOligos(){
+ try {
+ ifstream inOligos;
+ m->openInputFile(oligosfile, inOligos);
+
+ string type, oligo, roligo, group;
+
+ while(!inOligos.eof()){
+
+ inOligos >> type;
+
+ if (m->debug) { m->mothurOut("[DEBUG]: reading type - " + type + ".\n"); }
+
+ if(type[0] == '#'){
+ while (!inOligos.eof()) { char c = inOligos.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there
+ m->gobble(inOligos);
+ }
+ else{
+ m->gobble(inOligos);
+ //make type case insensitive
+ for(int i=0;i<type.length();i++){ type[i] = toupper(type[i]); }
+
+ inOligos >> oligo;
+
+ if (m->debug) { m->mothurOut("[DEBUG]: reading - " + oligo + ".\n"); }
+
+ for(int i=0;i<oligo.length();i++){
+ oligo[i] = toupper(oligo[i]);
+ if(oligo[i] == 'U') { oligo[i] = 'T'; }
+ }
+
+ if(type == "FORWARD"){
+ group = "";
+
+ // get rest of line in case there is a primer name
+ while (!inOligos.eof()) {
+ char c = inOligos.get();
+ if (c == 10 || c == 13 || c == -1){ break; }
+ else if (c == 32 || c == 9){;} //space or tab
+ else { group += c; }
+ }
+
+ //check for repeat barcodes
+ map<string, int>::iterator itPrime = primers.find(oligo);
+ if (itPrime != primers.end()) { m->mothurOut("[WARNING]: primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); }
+
+ if (m->debug) { if (group != "") { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); }else{ m->mothurOut("[DEBUG]: no group for primer " + oligo + ".\n"); } }
+
+ primers[oligo]=indexPrimer; indexPrimer++;
+ primerNameVector.push_back(group);
+ }
+ else if (type == "PRIMER"){
+ m->gobble(inOligos);
+
+ inOligos >> roligo;
+
+ for(int i=0;i<roligo.length();i++){
+ roligo[i] = toupper(roligo[i]);
+ if(roligo[i] == 'U') { roligo[i] = 'T'; }
+ }
+ if (reversePairs) { roligo = reverseOligo(roligo); }
+ group = "";
+
+ // get rest of line in case there is a primer name
+ while (!inOligos.eof()) {
+ char c = inOligos.get();
+ if (c == 10 || c == 13 || c == -1){ break; }
+ else if (c == 32 || c == 9){;} //space or tab
+ else { group += c; }
+ }
+
+ oligosPair newPrimer(oligo, roligo);
+
+ if (m->debug) { m->mothurOut("[DEBUG]: primer pair " + newPrimer.forward + " " + newPrimer.reverse + ", and group = " + group + ".\n"); }
+
+ //check for repeat barcodes
+ string tempPair = oligo+roligo;
+ if (uniquePrimers.count(tempPair) != 0) { m->mothurOut("primer pair " + newPrimer.forward + " " + newPrimer.reverse + " is in your oligos file already."); m->mothurOutEndLine(); }
+ else { uniquePrimers.insert(tempPair); }
+
+ if (m->debug) { if (group != "") { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); }else{ m->mothurOut("[DEBUG]: no group for primer pair " + newPrimer.forward + " " + newPrimer.reverse + ".\n"); } }
+
+ pairedPrimers[indexPairedPrimer]=newPrimer; indexPairedPrimer++;
+ primerNameVector.push_back(group);
+ hasPPrimers = true;
+ }
+ else if(type == "REVERSE"){
+ string oligoRC = reverseOligo(oligo);
+ revPrimer.push_back(oligoRC);
+ }
+ else if(type == "BARCODE"){
+ inOligos >> group;
+
+ //barcode lines can look like BARCODE atgcatgc groupName - for 454 seqs
+ //or BARCODE atgcatgc atgcatgc groupName - for illumina data that has forward and reverse info
+
+ string temp = "";
+ while (!inOligos.eof()) {
+ char c = inOligos.get();
+ if (c == 10 || c == 13 || c == -1){ break; }
+ else if (c == 32 || c == 9){;} //space or tab
+ else { temp += c; }
+ }
+
+ //then this is illumina data with 4 columns
+ if (temp != "") {
+ hasPBarcodes = true;
+ string reverseBarcode = group; //reverseOligo(group); //reverse barcode
+ group = temp;
+
+ for(int i=0;i<reverseBarcode.length();i++){
+ reverseBarcode[i] = toupper(reverseBarcode[i]);
+ if(reverseBarcode[i] == 'U') { reverseBarcode[i] = 'T'; }
+ }
+
+ if (reversePairs) { reverseBarcode = reverseOligo(reverseBarcode); }
+ oligosPair newPair(oligo, reverseBarcode);
+
+ if (m->debug) { m->mothurOut("[DEBUG]: barcode pair " + newPair.forward + " " + newPair.reverse + ", and group = " + group + ".\n"); }
+
+ //check for repeat barcodes
+ string tempPair = oligo+reverseBarcode;
+ if (uniqueBarcodes.count(tempPair) != 0) { m->mothurOut("barcode pair " + newPair.forward + " " + newPair.reverse + " is in your oligos file already, disregarding."); m->mothurOutEndLine(); }
+ else { uniqueBarcodes.insert(tempPair); }
+
+ pairedBarcodes[indexPairedBarcode]=newPair; indexPairedBarcode++;
+ barcodeNameVector.push_back(group);
+ }else {
+ //check for repeat barcodes
+ map<string, int>::iterator itBar = barcodes.find(oligo);
+ if (itBar != barcodes.end()) { m->mothurOut("[WARNING]: barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); }
+
+ barcodes[oligo]=indexBarcode; indexBarcode++;
+ barcodeNameVector.push_back(group);
+ }
+ }else if(type == "LINKER"){
+ linker.push_back(oligo);
+ }else if(type == "SPACER"){
+ spacer.push_back(oligo);
+ }
+ else{ m->mothurOut("[WARNING]: " + type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); }
+ }
+ m->gobble(inOligos);
+ }
+ inOligos.close();
+
+ if (hasPBarcodes || hasPPrimers) {
+ pairedOligos = true;
+ if ((primers.size() != 0) || (barcodes.size() != 0) || (linker.size() != 0) || (spacer.size() != 0) || (revPrimer.size() != 0)) { m->control_pressed = true; m->mothurOut("[ERROR]: cannot mix paired primers and barcodes with non paired or linkers and spacers, quitting."); m->mothurOutEndLine(); return 0; }
+ }
+
+
+ //add in potential combos
+ if(barcodeNameVector.size() == 0){
+ if (pairedOligos) {
+ oligosPair newPair("", "");
+ pairedBarcodes[0] = newPair;
+ }else {
+ barcodes[""] = 0;
+ }
+ barcodeNameVector.push_back("");
+ }
+
+ if(primerNameVector.size() == 0){
+ if (pairedOligos) {
+ oligosPair newPair("", "");
+ pairedPrimers[0] = newPair;
+ }else {
+ primers[""] = 0;
+ }
+ primerNameVector.push_back("");
+ }
+
+
+ if (pairedOligos) {
+ for(map<int, oligosPair>::iterator itBar = pairedBarcodes.begin();itBar != pairedBarcodes.end();itBar++){
+ for(map<int, oligosPair>::iterator itPrimer = pairedPrimers.begin();itPrimer != pairedPrimers.end(); itPrimer++){
+
+ string primerName = primerNameVector[itPrimer->first];
+ string barcodeName = barcodeNameVector[itBar->first];
+
+ if (m->debug) { m->mothurOut("[DEBUG]: primerName = " + primerName + " barcodeName = " + barcodeName + "\n"); }
+
+ if ((primerName == "ignore") || (barcodeName == "ignore")) { if (m->debug) { m->mothurOut("[DEBUG]: in ignore. \n"); } } //do nothing
+ else if ((primerName == "") && (barcodeName == "")) { if (m->debug) { m->mothurOut("[DEBUG]: in blank. \n"); } } //do nothing
+ else {
+ string comboGroupName = "";
+ string fastqFileName = "";
+
+ if(primerName == ""){
+ comboGroupName = barcodeNameVector[itBar->first];
+ }
+ else{
+ if(barcodeName == ""){
+ comboGroupName = primerNameVector[itPrimer->first];
+ }
+ else{
+ comboGroupName = barcodeNameVector[itBar->first] + "." + primerNameVector[itPrimer->first];
+ }
+ }
+
+ if (m->debug) { m->mothurOut("[DEBUG]: comboGroupName = " + comboGroupName + "\n"); }
+
+ uniqueNames.insert(comboGroupName);
+
+ map<string, vector<string> >::iterator itGroup2Barcode = Group2Barcode.find(comboGroupName);
+ if (itGroup2Barcode == Group2Barcode.end()) {
+ vector<string> tempBarcodes; tempBarcodes.push_back((itBar->second).forward+"."+(itBar->second).reverse);
+ Group2Barcode[comboGroupName] = tempBarcodes;
+ }else {
+ Group2Barcode[comboGroupName].push_back((itBar->second).forward+"."+(itBar->second).reverse);
+ }
+
+ itGroup2Barcode = Group2Primer.find(comboGroupName);
+ if (itGroup2Barcode == Group2Primer.end()) {
+ vector<string> tempPrimers; tempPrimers.push_back((itPrimer->second).forward+"."+(itPrimer->second).reverse);
+ Group2Primer[comboGroupName] = tempPrimers;
+ }else {
+ Group2Primer[comboGroupName].push_back((itPrimer->second).forward+"."+(itPrimer->second).reverse);
+ }
+ }
+ }
+ }
+ }else {
+ for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
+ for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
+
+ string primerName = primerNameVector[itPrimer->second];
+ string barcodeName = barcodeNameVector[itBar->second];
+
+ if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing
+ else if ((primerName == "") && (barcodeName == "")) { } //do nothing
+ else {
+ string comboGroupName = "";
+ string fastqFileName = "";
+
+ if(primerName == ""){
+ comboGroupName = barcodeNameVector[itBar->second];
+ }
+ else{
+ if(barcodeName == ""){
+ comboGroupName = primerNameVector[itPrimer->second];
+ }
+ else{
+ comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second];
+ }
+ }
+ uniqueNames.insert(comboGroupName);
+
+ map<string, vector<string> >::iterator itGroup2Barcode = Group2Barcode.find(comboGroupName);
+ if (itGroup2Barcode == Group2Barcode.end()) {
+ vector<string> tempBarcodes; tempBarcodes.push_back(itBar->first);
+ Group2Barcode[comboGroupName] = tempBarcodes;
+ }else {
+ Group2Barcode[comboGroupName].push_back(itBar->first);
+ }
+
+ itGroup2Barcode = Group2Primer.find(comboGroupName);
+ if (itGroup2Barcode == Group2Primer.end()) {
+ vector<string> tempPrimers; tempPrimers.push_back(itPrimer->first);
+ Group2Primer[comboGroupName] = tempPrimers;
+ }else {
+ Group2Primer[comboGroupName].push_back(itPrimer->first);
+ }
+ }
+ }
+ }
+ }
+
+
+ if (m->debug) { int count = 0; for (set<string>::iterator it = uniqueNames.begin(); it != uniqueNames.end(); it++) { m->mothurOut("[DEBUG]: " + toString(count) + " groupName = " + *it + "\n"); count++; } }
+
+
+ Groups.clear();
+ for (set<string>::iterator it = uniqueNames.begin(); it != uniqueNames.end(); it++) { Groups.push_back(*it); }
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "Oligos", "readOligos");
+ exit(1);
+ }
+}
+//********************************************************************/
+vector<string> Oligos::getBarcodes(string groupName){
+ try {
+ vector<string> thisGroupsBarcodes;
+
+ map<string, vector<string> >::iterator it = Group2Barcode.find(groupName);
+
+ if (it == Group2Barcode.end()) { m->mothurOut("[ERROR]: no barcodes found for group " + groupName + ".\n"); m->control_pressed=true;
+ }else { thisGroupsBarcodes = it->second; }
+
+ return thisGroupsBarcodes;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "Oligos", "getBarcodes");
+ exit(1);
+ }
+}
+//********************************************************************/
+vector<string> Oligos::getPrimers(string groupName){
+ try {
+ vector<string> thisGroupsPrimers;
+
+ map<string, vector<string> >::iterator it = Group2Primer.find(groupName);
+
+ if (it == Group2Primer.end()) { m->mothurOut("[ERROR]: no primers found for group " + groupName + ".\n"); m->control_pressed=true;
+ }else { thisGroupsPrimers = it->second; }
+
+ return thisGroupsPrimers;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "Oligos", "getPrimers");
+ exit(1);
+ }
+}
+//********************************************************************/
+//can't have paired and unpaired so this function will either run the paired map or the unpaired
+map<int, oligosPair> Oligos::getReorientedPairedPrimers(){
+ try {
+ map<int, oligosPair> rpairedPrimers;
+
+ for (map<int, oligosPair>::iterator it = pairedPrimers.begin(); it != pairedPrimers.end(); it++) {
+ string forward = (it->second).reverse;
+ if (reversePairs) { forward = reverseOligo(forward); }
+ string reverse = (it->second).forward;
+ if (reversePairs) { reverse = reverseOligo(reverse); }
+ oligosPair tempPair(forward, reverse); //reversePrimer, rc ForwardPrimer
+ rpairedPrimers[it->first] = tempPair;
+ }
+
+
+ for (map<string, int>::iterator it = primers.begin(); it != primers.end(); it++) {
+ oligosPair tempPair("", reverseOligo((it->first))); //reverseBarcode, rc ForwardBarcode
+ rpairedPrimers[it->second] = tempPair;
+ }
+
+ return rpairedPrimers;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "Oligos", "getReorientedPairedPrimers");
+ exit(1);
+ }
+}
+//********************************************************************/
+//can't have paired and unpaired so this function will either run the paired map or the unpaired
+map<int, oligosPair> Oligos::getReorientedPairedBarcodes(){
+ try {
+ map<int, oligosPair> rpairedBarcodes;
+
+ for (map<int, oligosPair>::iterator it = pairedBarcodes.begin(); it != pairedBarcodes.end(); it++) {
+ string forward = (it->second).reverse;
+ if (reversePairs) { forward = reverseOligo(forward); }
+ string reverse = (it->second).forward;
+ if (reversePairs) { reverse = reverseOligo(reverse); }
+ oligosPair tempPair(forward, reverse); //reversePrimer, rc ForwardPrimer
+ rpairedBarcodes[it->first] = tempPair;
+ }
+
+ for (map<string, int>::iterator it = barcodes.begin(); it != barcodes.end(); it++) {
+ oligosPair tempPair("", reverseOligo((it->first))); //reverseBarcode, rc ForwardBarcode
+ rpairedBarcodes[it->second] = tempPair;
+ }
+
+ return rpairedBarcodes;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "Oligos", "getReorientedPairedBarcodes");
+ exit(1);
+ }
+}
+
+//********************************************************************/
+string Oligos::reverseOligo(string oligo){
+ try {
+ string reverse = "";
+
+ for(int i=oligo.length()-1;i>=0;i--){
+
+ if(oligo[i] == 'A') { reverse += 'T'; }
+ else if(oligo[i] == 'T'){ reverse += 'A'; }
+ else if(oligo[i] == 'U'){ reverse += 'A'; }
+
+ else if(oligo[i] == 'G'){ reverse += 'C'; }
+ else if(oligo[i] == 'C'){ reverse += 'G'; }
+
+ else if(oligo[i] == 'R'){ reverse += 'Y'; }
+ else if(oligo[i] == 'Y'){ reverse += 'R'; }
+
+ else if(oligo[i] == 'M'){ reverse += 'K'; }
+ else if(oligo[i] == 'K'){ reverse += 'M'; }
+
+ else if(oligo[i] == 'W'){ reverse += 'W'; }
+ else if(oligo[i] == 'S'){ reverse += 'S'; }
+
+ else if(oligo[i] == 'B'){ reverse += 'V'; }
+ else if(oligo[i] == 'V'){ reverse += 'B'; }
+
+ else if(oligo[i] == 'D'){ reverse += 'H'; }
+ else if(oligo[i] == 'H'){ reverse += 'D'; }
+
+ else { reverse += 'N'; }
+ }
+
+
+ return reverse;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "Oligos", "reverseOligo");
+ exit(1);
+ }
+}
+//********************************************************************/
+string Oligos::getBarcodeName(int index){
+ try {
+ string name = "";
+
+ if ((index >= 0) && (index < barcodeNameVector.size())) { name = barcodeNameVector[index]; }
+
+ return name;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "Oligos", "getBarcodeName");
+ exit(1);
+ }
+}
+//********************************************************************/
+string Oligos::getPrimerName(int index){
+ try {
+ string name = "";
+
+ if ((index >= 0) && (index < primerNameVector.size())) { name = primerNameVector[index]; }
+
+ return name;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "Oligos", "getPrimerName");
+ exit(1);
+ }
+}
+//********************************************************************/
+string Oligos::getGroupName(int barcodeIndex, int primerIndex){
+ try {
+
+ string thisGroup = "";
+ if(numBarcodes != 0){
+ thisGroup = getBarcodeName(barcodeIndex);
+ if (numFPrimers != 0) {
+ if (getPrimerName(primerIndex) != "") {
+ if(thisGroup != "") {
+ thisGroup += "." + getPrimerName(primerIndex);
+ }else {
+ thisGroup = getPrimerName(primerIndex);
+ }
+ }
+ }
+ }
+
+ return thisGroup;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "Oligos", "getGroupName");
+ exit(1);
+ }
+}
+
+/**************************************************************************************************/
+
--- /dev/null
+//
+// oligos.h
+// Mothur
+//
+// Created by Sarah Westcott on 4/4/14.
+// Copyright (c) 2014 Schloss Lab. All rights reserved.
+//
+
+#ifndef Mothur_oligos_h
+#define Mothur_oligos_h
+
+
+#include "mothurout.h"
+
+
+/**************************************************************************************************/
+
+class Oligos {
+
+public:
+ Oligos(string);
+ Oligos();
+ ~Oligos() {}
+
+ int read(string);
+ int read(string, bool); //read without reversing the paired barcodes, for make.contigs.
+ bool hasPairedPrimers() { return hasPPrimers; }
+ bool hasPairedBarcodes() { return hasPBarcodes; }
+
+ //for processing with trimOligos class
+ map<int, oligosPair> getPairedPrimers() { return pairedPrimers; }
+ map<int, oligosPair> getPairedBarcodes() { return pairedBarcodes; }
+ map<string, int> getPrimers() { return primers; }
+ map<string, int> getBarcodes() { return barcodes; }
+
+ map<int, oligosPair> getReorientedPairedPrimers();
+ map<int, oligosPair> getReorientedPairedBarcodes();
+ map<string, int> getReorientedPrimers();
+ map<string, int> getReorientedBarcodes();
+
+
+ vector<string> getLinkers() { return linker; }
+ vector<string> getSpacers() { return spacer; }
+ vector<string> getReversePrimers() { return revPrimer; }
+ vector<string> getPrimerNames() { return primerNameVector; }
+ vector<string> getBarcodeNames() { return barcodeNameVector; }
+ vector<string> getGroupNames() { return Groups; }
+
+
+ //for printing and other formatting uses
+ vector<string> getBarcodes(string); //get barcodes for a group. For paired barcodes will return forward.reverse
+ vector<string> getPrimers(string); //get primers for a group. For paired primers will return forward.reverse
+ string getBarcodeName(int);
+ string getPrimerName(int);
+ string getGroupName(int, int);
+
+
+private:
+
+ set<string> uniqueNames;
+ vector<string> Groups;
+ vector<string> revPrimer;
+ map<string, vector<string> > Group2Barcode;
+ map<string, vector<string> > Group2Primer;
+ map<int, oligosPair> pairedBarcodes;
+ map<int, oligosPair> pairedPrimers;
+ map<string, int> primers;
+ map<string, int> barcodes;
+ vector<string> linker;
+ vector<string> spacer;
+ vector<string> primerNameVector;
+ vector<string> barcodeNameVector;
+ bool hasPPrimers, hasPBarcodes, pairedOligos, reversePairs;
+ string oligosfile;
+ int numBarcodes, numFPrimers;
+ MothurOut* m;
+
+ int indexPrimer;
+ int indexBarcode;
+ int indexPairedPrimer;
+ int indexPairedBarcode;
+ set<string> uniquePrimers;
+ set<string> uniqueBarcodes;
+
+ int readOligos();
+ string reverseOligo(string);
+};
+
+/**************************************************************************************************/
+
+
+#endif
CommandParameter pfastq("fastq", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(pfastq);
CommandParameter poligos("oligos", "InputTypes", "", "", "oligosGroup", "none", "none","",false,false); parameters.push_back(poligos);
CommandParameter pgroup("group", "InputTypes", "", "", "oligosGroup", "none", "none","",false,false); parameters.push_back(pgroup);
+ CommandParameter preorient("checkorient", "Boolean", "", "F", "", "", "","",false,false,true); parameters.push_back(preorient);
CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ppdiffs);
CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pbdiffs);
CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pldiffs);
helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n";
helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n";
+ helpString += "The checkorient parameter will check look for the reverse compliment of the barcode or primer in the sequence. If found the sequence is flipped. The default is false.\n";
helpString += "The format parameter is used to indicate whether your sequences are sanger, solexa, illumina1.8+ or illumina, default=sanger.\n";
helpString += "The fasta parameter allows you to indicate whether you want a fasta file generated. Default=T.\n";
helpString += "The qfile parameter allows you to indicate whether you want a quality file generated. Default=T.\n";
}
if ((!fasta) && (!qual)) { m->mothurOut("[ERROR]: no outputs selected. Aborting."); m->mothurOutEndLine(); abort=true; }
+
+ temp = validParameter.validFile(parameters, "checkorient", false); if (temp == "not found") { temp = "F"; }
+ reorient = m->isTrue(temp);
}
}
if (fasta) { m->openOutputFile(fastaFile, outFasta); outputNames.push_back(fastaFile); outputTypes["fasta"].push_back(fastaFile); }
if (qual) { m->openOutputFile(qualFile, outQual); outputNames.push_back(qualFile); outputTypes["qfile"].push_back(qualFile); }
- TrimOligos* trimOligos = NULL;
- int numBarcodes, numPrimers; numBarcodes = 0; numPrimers = 0;
+ TrimOligos* trimOligos = NULL; TrimOligos* rtrimOligos = NULL;
+ pairedOligos = false; numBarcodes = 0; numPrimers= 0; numLinkers= 0; numSpacers = 0; numRPrimers = 0;
if (oligosfile != "") {
readOligos(oligosfile);
- numPrimers = primers.size(); numBarcodes = barcodes.size();
//find group read belongs to
- if (pairedOligos) { trimOligos = new TrimOligos(pdiffs, bdiffs, 0, 0, pairedPrimers, pairedBarcodes); numBarcodes = pairedBarcodes.size(); numPrimers = pairedPrimers.size(); }
- else { trimOligos = new TrimOligos(pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, revPrimer, linker, spacer); }
+ if (pairedOligos) { trimOligos = new TrimOligos(pdiffs, bdiffs, 0, 0, oligos.getPairedPrimers(), oligos.getPairedBarcodes()); numBarcodes = oligos.getPairedBarcodes().size(); numPrimers = oligos.getPairedPrimers().size(); }
+ else { trimOligos = new TrimOligos(pdiffs, bdiffs, ldiffs, sdiffs, oligos.getPrimers(), oligos.getBarcodes(), oligos.getReversePrimers(), oligos.getLinkers(), oligos.getSpacers()); numPrimers = oligos.getPrimers().size(); numBarcodes = oligos.getBarcodes().size(); }
+
+ if (reorient) {
+ rtrimOligos = new TrimOligos(pdiffs, bdiffs, 0, 0, oligos.getReorientedPairedPrimers(), oligos.getReorientedPairedBarcodes()); numBarcodes = oligos.getReorientedPairedBarcodes().size();
+ }
}
else if (groupfile != "") { readGroup(groupfile); }
if (split > 1) {
int barcodeIndex, primerIndex, trashCodeLength;
- if (oligosfile != "") { trashCodeLength = findGroup(thisRead, barcodeIndex, primerIndex, trimOligos, numBarcodes, numPrimers); }
+ if (oligosfile != "") { trashCodeLength = findGroup(thisRead, barcodeIndex, primerIndex, trimOligos, rtrimOligos, numBarcodes, numPrimers); }
else if (groupfile != "") { trashCodeLength = findGroup(thisRead, barcodeIndex, primerIndex, "groupMode"); }
else { m->mothurOut("[ERROR]: uh oh, we shouldn't be here...\n"); }
if (split > 1) {
if (groupfile != "") { delete groupMap; }
- else if (oligosfile != "") { delete trimOligos; }
+ else if (oligosfile != "") { delete trimOligos; if (reorient) { delete rtrimOligos; } }
map<string, string>::iterator it;
set<string> namesToRemove;
}
}
//**********************************************************************************************************************
-int ParseFastaQCommand::findGroup(fastqRead2 thisRead, int& barcode, int& primer, TrimOligos*& trimOligos, int numBarcodes, int numPrimers) {
+int ParseFastaQCommand::findGroup(fastqRead2 thisRead, int& barcode, int& primer, TrimOligos*& trimOligos, TrimOligos*& rtrimOligos, int numBarcodes, int numPrimers) {
try {
int success = 1;
string trashCode = "";
Sequence currSeq(thisRead.seq.getName(), thisRead.seq.getAligned());
QualityScores currQual; currQual.setScores(convertQual(thisRead.quality));
- if(linker.size() != 0){
+ //for reorient
+ Sequence savedSeq(currSeq.getName(), currSeq.getAligned());
+ QualityScores savedQual(currQual.getName(), currQual.getScores());
+
+ if(numLinkers != 0){
success = trimOligos->stripLinker(currSeq, currQual);
if(success > ldiffs) { trashCode += 'k'; }
else{ currentSeqsDiffs += success; }
else{ currentSeqsDiffs += success; }
}
- if(spacer.size() != 0){
+ if(numSpacers != 0){
success = trimOligos->stripSpacer(currSeq, currQual);
if(success > sdiffs) { trashCode += 's'; }
else{ currentSeqsDiffs += success; }
if (currentSeqsDiffs > tdiffs) { trashCode += 't'; }
- if(revPrimer.size() != 0){
+ if(numRPrimers != 0){
success = trimOligos->stripReverse(currSeq, currQual);
if(!success) { trashCode += 'r'; }
}
- if (trashCode.length() == 0) { //is this sequence in the ignore group
- string thisGroup = "";
+ if (reorient && (trashCode != "")) { //if you failed and want to check the reverse
+ int thisSuccess = 0;
+ string thisTrashCode = "";
+ int thisCurrentSeqsDiffs = 0;
- if(barcodes.size() != 0){
- thisGroup = barcodeNameVector[barcode];
- if (numPrimers != 0) {
- if (primerNameVector[primer] != "") {
- if(thisGroup != "") {
- thisGroup += "." + primerNameVector[primer];
- }else {
- thisGroup = primerNameVector[primer];
- }
- }
- }
+ int thisBarcodeIndex = 0;
+ int thisPrimerIndex = 0;
+ //cout << currSeq.getName() << '\t' << savedSeq.getUnaligned() << endl;
+ if(numBarcodes != 0){
+ thisSuccess = rtrimOligos->stripBarcode(savedSeq, savedQual, thisBarcodeIndex);
+ if(thisSuccess > bdiffs) { thisTrashCode += "b"; }
+ else{ thisCurrentSeqsDiffs += thisSuccess; }
+ }
+ //cout << currSeq.getName() << '\t' << savedSeq.getUnaligned() << endl;
+ if(numPrimers != 0){
+ thisSuccess = rtrimOligos->stripForward(savedSeq, savedQual, thisPrimerIndex, true);
+ if(thisSuccess > pdiffs) { thisTrashCode += "f"; }
+ else{ thisCurrentSeqsDiffs += thisSuccess; }
}
+ if (thisCurrentSeqsDiffs > tdiffs) { thisTrashCode += 't'; }
+
+ if (thisTrashCode == "") {
+ trashCode = thisTrashCode;
+ success = thisSuccess;
+ currentSeqsDiffs = thisCurrentSeqsDiffs;
+ barcode = thisBarcodeIndex;
+ primer = thisPrimerIndex;
+ savedSeq.reverseComplement();
+ currSeq.setAligned(savedSeq.getAligned());
+ savedQual.flipQScores();
+ currQual.setScores(savedQual.getScores());
+ }else { trashCode += "(" + thisTrashCode + ")"; }
+ }
+
+ if (trashCode.length() == 0) { //is this sequence in the ignore group
+ string thisGroup = oligos.getGroupName(barcode, primer);
+
int pos = thisGroup.find("ignore");
if (pos != string::npos) { trashCode += "i"; }
}
string group = groupMap->getGroup(thisRead.seq.getName());
if (group == "not found") { trashCode += "g"; } //scrap for group
- else { //find file group
- map<string, int>::iterator it = barcodes.find(group);
- if (it != barcodes.end()) {
- barcode = it->second;
- }else { trashCode += "g"; }
- }
-
+
return trashCode.length();
}
catch(exception& e) {
bool ParseFastaQCommand::readOligos(string oligoFile){
try {
- ifstream inOligos;
- m->openInputFile(oligoFile, inOligos);
-
- string type, oligo, roligo, group;
- bool hasPrimer = false; bool hasPairedBarcodes = false; pairedOligos = false;
-
- int indexPrimer = 0;
- int indexBarcode = 0;
- int indexPairedPrimer = 0;
- int indexPairedBarcode = 0;
- set<string> uniquePrimers;
- set<string> uniqueBarcodes;
-
- while(!inOligos.eof()){
-
- inOligos >> type;
-
- if (m->debug) { m->mothurOut("[DEBUG]: reading type - " + type + ".\n"); }
-
- if(type[0] == '#'){
- while (!inOligos.eof()) { char c = inOligos.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there
- m->gobble(inOligos);
- }
- else{
- m->gobble(inOligos);
- //make type case insensitive
- for(int i=0;i<type.length();i++){ type[i] = toupper(type[i]); }
-
- inOligos >> oligo;
-
- if (m->debug) { m->mothurOut("[DEBUG]: reading - " + oligo + ".\n"); }
-
- for(int i=0;i<oligo.length();i++){
- oligo[i] = toupper(oligo[i]);
- if(oligo[i] == 'U') { oligo[i] = 'T'; }
- }
-
- if(type == "FORWARD"){
- group = "";
-
- // get rest of line in case there is a primer name
- while (!inOligos.eof()) {
- char c = inOligos.get();
- if (c == 10 || c == 13 || c == -1){ break; }
- else if (c == 32 || c == 9){;} //space or tab
- else { group += c; }
- }
-
- //check for repeat barcodes
- map<string, int>::iterator itPrime = primers.find(oligo);
- if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); }
-
- if (m->debug) { if (group != "") { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); }else{ m->mothurOut("[DEBUG]: no group for primer " + oligo + ".\n"); } }
-
- primers[oligo]=indexPrimer; indexPrimer++;
- primerNameVector.push_back(group);
- }
- else if (type == "PRIMER"){
- m->gobble(inOligos);
-
- inOligos >> roligo;
-
- for(int i=0;i<roligo.length();i++){
- roligo[i] = toupper(roligo[i]);
- if(roligo[i] == 'U') { roligo[i] = 'T'; }
- }
- roligo = reverseOligo(roligo);
-
- group = "";
-
- // get rest of line in case there is a primer name
- while (!inOligos.eof()) {
- char c = inOligos.get();
- if (c == 10 || c == 13 || c == -1){ break; }
- else if (c == 32 || c == 9){;} //space or tab
- else { group += c; }
- }
-
- oligosPair newPrimer(oligo, roligo);
-
- if (m->debug) { m->mothurOut("[DEBUG]: primer pair " + newPrimer.forward + " " + newPrimer.reverse + ", and group = " + group + ".\n"); }
-
- //check for repeat barcodes
- string tempPair = oligo+roligo;
- if (uniquePrimers.count(tempPair) != 0) { m->mothurOut("primer pair " + newPrimer.forward + " " + newPrimer.reverse + " is in your oligos file already."); m->mothurOutEndLine(); }
- else { uniquePrimers.insert(tempPair); }
-
- if (m->debug) { if (group != "") { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); }else{ m->mothurOut("[DEBUG]: no group for primer pair " + newPrimer.forward + " " + newPrimer.reverse + ".\n"); } }
-
- pairedPrimers[indexPairedPrimer]=newPrimer; indexPairedPrimer++;
- primerNameVector.push_back(group);
- hasPrimer = true;
- }
- else if(type == "REVERSE"){
- //Sequence oligoRC("reverse", oligo);
- //oligoRC.reverseComplement();
- string oligoRC = reverseOligo(oligo);
- revPrimer.push_back(oligoRC);
- }
- else if(type == "BARCODE"){
- inOligos >> group;
-
- //barcode lines can look like BARCODE atgcatgc groupName - for 454 seqs
- //or BARCODE atgcatgc atgcatgc groupName - for illumina data that has forward and reverse info
-
- string temp = "";
- while (!inOligos.eof()) {
- char c = inOligos.get();
- if (c == 10 || c == 13 || c == -1){ break; }
- else if (c == 32 || c == 9){;} //space or tab
- else { temp += c; }
- }
-
- //then this is illumina data with 4 columns
- if (temp != "") {
- hasPairedBarcodes = true;
- string reverseBarcode = group; //reverseOligo(group); //reverse barcode
- group = temp;
-
- for(int i=0;i<reverseBarcode.length();i++){
- reverseBarcode[i] = toupper(reverseBarcode[i]);
- if(reverseBarcode[i] == 'U') { reverseBarcode[i] = 'T'; }
- }
-
- reverseBarcode = reverseOligo(reverseBarcode);
- oligosPair newPair(oligo, reverseBarcode);
-
- if (m->debug) { m->mothurOut("[DEBUG]: barcode pair " + newPair.forward + " " + newPair.reverse + ", and group = " + group + ".\n"); }
- //check for repeat barcodes
- string tempPair = oligo+reverseBarcode;
- if (uniqueBarcodes.count(tempPair) != 0) { m->mothurOut("barcode pair " + newPair.forward + " " + newPair.reverse + " is in your oligos file already, disregarding."); m->mothurOutEndLine(); }
- else { uniqueBarcodes.insert(tempPair); }
-
- pairedBarcodes[indexPairedBarcode]=newPair; indexPairedBarcode++;
- barcodeNameVector.push_back(group);
- }else {
- //check for repeat barcodes
- map<string, int>::iterator itBar = barcodes.find(oligo);
- if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); }
-
- barcodes[oligo]=indexBarcode; indexBarcode++;
- barcodeNameVector.push_back(group);
- }
- }else if(type == "LINKER"){
- linker.push_back(oligo);
- }else if(type == "SPACER"){
- spacer.push_back(oligo);
- }
- else{ m->mothurOut("[WARNING]: " + type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); }
- }
- m->gobble(inOligos);
- }
- inOligos.close();
-
- if (hasPairedBarcodes || hasPrimer) {
+ bool allBlank = false;
+ oligos.read(oligosfile);
+
+ if (m->control_pressed) { return false; } //error in reading oligos
+
+ if (oligos.hasPairedBarcodes()) {
pairedOligos = true;
- if ((primers.size() != 0) || (barcodes.size() != 0) || (linker.size() != 0) || (spacer.size() != 0) || (revPrimer.size() != 0)) { m->control_pressed = true; m->mothurOut("[ERROR]: cannot mix paired primers and barcodes with non paired or linkers and spacers, quitting."); m->mothurOutEndLine(); return 0; }
+ numPrimers = oligos.getPairedPrimers().size();
+ numBarcodes = oligos.getPairedBarcodes().size();
+ }else {
+ pairedOligos = false;
+ numPrimers = oligos.getPrimers().size();
+ numBarcodes = oligos.getBarcodes().size();
}
- //add in potential combos
- if(barcodeNameVector.size() == 0){
- barcodes[""] = 0;
- barcodeNameVector.push_back("");
- }
-
- if(primerNameVector.size() == 0){
- primers[""] = 0;
- primerNameVector.push_back("");
- }
-
- fastqFileNames.resize(barcodeNameVector.size());
+ numLinkers = oligos.getLinkers().size();
+ numSpacers = oligos.getSpacers().size();
+ numRPrimers = oligos.getReversePrimers().size();
+
+ vector<string> groupNames = oligos.getGroupNames();
+ if (groupNames.size() == 0) { allBlank = true; }
+
+
+ fastqFileNames.resize(oligos.getBarcodeNames().size());
for(int i=0;i<fastqFileNames.size();i++){
- fastqFileNames[i].assign(primerNameVector.size(), "");
+ for(int j=0;j<oligos.getPrimerNames().size();j++){ fastqFileNames[i].push_back(""); }
}
-
-
- set<string> uniqueNames; //used to cleanup outputFileNames
- if (pairedOligos) {
- for(map<int, oligosPair>::iterator itBar = pairedBarcodes.begin();itBar != pairedBarcodes.end();itBar++){
- for(map<int, oligosPair>::iterator itPrimer = pairedPrimers.begin();itPrimer != pairedPrimers.end(); itPrimer++){
-
- string primerName = primerNameVector[itPrimer->first];
- string barcodeName = barcodeNameVector[itBar->first];
+
+ set<string> uniqueNames; //used to cleanup outputFileNames
+ if (pairedOligos) {
+ map<int, oligosPair> barcodes = oligos.getPairedBarcodes();
+ map<int, oligosPair> primers = oligos.getPairedPrimers();
+ for(map<int, oligosPair>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
+ for(map<int, oligosPair>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
+
+ string primerName = oligos.getPrimerName(itPrimer->first);
+ string barcodeName = oligos.getBarcodeName(itBar->first);
+
+ if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing
+ else if ((primerName == "") && (barcodeName == "")) { } //do nothing
+ else {
+ string comboGroupName = "";
+ string fastaFileName = "";
+ string qualFileName = "";
+ string nameFileName = "";
+ string countFileName = "";
- if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing
- else {
- string comboGroupName = "";
- string fastqFileName = "";
-
- if(primerName == ""){
- comboGroupName = barcodeNameVector[itBar->first];
+ if(primerName == ""){
+ comboGroupName = barcodeName;
+ }else{
+ if(barcodeName == ""){
+ comboGroupName = primerName;
}
else{
- if(barcodeName == ""){
- comboGroupName = primerNameVector[itPrimer->first];
- }
- else{
- comboGroupName = barcodeNameVector[itBar->first] + "." + primerNameVector[itPrimer->first];
- }
+ comboGroupName = barcodeName + "." + primerName;
}
-
-
- ofstream temp;
- map<string, string> variables;
- variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaQFile));
- variables["[group]"] = comboGroupName;
- fastqFileName = getOutputFileName("fastq", variables);
- if (uniqueNames.count(fastqFileName) == 0) {
- outputNames.push_back(fastqFileName);
- outputTypes["fastq"].push_back(fastqFileName);
- uniqueNames.insert(fastqFileName);
- }
-
- fastqFileNames[itBar->first][itPrimer->first] = fastqFileName;
- m->openOutputFile(fastqFileName, temp); temp.close();
-
}
+
+ ofstream temp;
+ map<string, string> variables;
+ variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaQFile));
+ variables["[group]"] = comboGroupName;
+ string fastqFileName = getOutputFileName("fastq", variables);
+ if (uniqueNames.count(fastqFileName) == 0) {
+ outputNames.push_back(fastqFileName);
+ outputTypes["fastq"].push_back(fastqFileName);
+ uniqueNames.insert(fastqFileName);
+ }
+
+ fastqFileNames[itBar->first][itPrimer->first] = fastqFileName;
+ m->openOutputFile(fastqFileName, temp); temp.close();
}
}
- }else {
- for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
- for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
-
- string primerName = primerNameVector[itPrimer->second];
- string barcodeName = barcodeNameVector[itBar->second];
+ }
+ }else {
+ map<string, int> barcodes = oligos.getBarcodes() ;
+ map<string, int> primers = oligos.getPrimers();
+ for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
+ for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
+
+ string primerName = oligos.getPrimerName(itPrimer->second);
+ string barcodeName = oligos.getBarcodeName(itBar->second);
+
+ if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing
+ else if ((primerName == "") && (barcodeName == "")) { } //do nothing
+ else {
+ string comboGroupName = "";
+ string fastaFileName = "";
+ string qualFileName = "";
+ string nameFileName = "";
+ string countFileName = "";
- if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing
- else {
- string comboGroupName = "";
- string fastqFileName = "";
-
- if(primerName == ""){
- comboGroupName = barcodeNameVector[itBar->second];
+ if(primerName == ""){
+ comboGroupName = barcodeName;
+ }else{
+ if(barcodeName == ""){
+ comboGroupName = primerName;
}
else{
- if(barcodeName == ""){
- comboGroupName = primerNameVector[itPrimer->second];
- }
- else{
- comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second];
- }
+ comboGroupName = barcodeName + "." + primerName;
}
-
-
- ofstream temp;
- map<string, string> variables;
- variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaQFile));
- variables["[group]"] = comboGroupName;
- fastqFileName = getOutputFileName("fastq", variables);
- if (uniqueNames.count(fastqFileName) == 0) {
- outputNames.push_back(fastqFileName);
- outputTypes["fastq"].push_back(fastqFileName);
- uniqueNames.insert(fastqFileName);
- }
-
- fastqFileNames[itBar->second][itPrimer->second] = fastqFileName;
- m->openOutputFile(fastqFileName, temp); temp.close();
-
}
+
+ ofstream temp;
+ map<string, string> variables;
+ variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaQFile));
+ variables["[group]"] = comboGroupName;
+ string fastqFileName = getOutputFileName("fastq", variables);
+ if (uniqueNames.count(fastqFileName) == 0) {
+ outputNames.push_back(fastqFileName);
+ outputTypes["fastq"].push_back(fastqFileName);
+ uniqueNames.insert(fastqFileName);
+ }
+
+ fastqFileNames[itBar->second][itPrimer->second] = fastqFileName;
+ m->openOutputFile(fastqFileName, temp); temp.close();
}
}
}
-
+ }
+
+ if (allBlank) {
+ m->mothurOut("[WARNING]: your oligos file does not contain any group names. mothur will not create a groupfile."); m->mothurOutEndLine();
+ return false;
+ }
+
ofstream temp;
map<string, string> variables;
variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaQFile));
variables["[group]"] = "scrap";
noMatchFile = getOutputFileName("fastq", variables);
m->openOutputFile(noMatchFile, temp); temp.close();
-
+
return true;
}
ofstream temp;
m->openOutputFileBinary(thisFilename, temp); temp.close();
fastqFileNames[i].push_back(thisFilename);
- barcodes[groups[i]] = i;
}
}
exit(1);
}
}
-//********************************************************************/
-string ParseFastaQCommand::reverseOligo(string oligo){
- try {
- string reverse = "";
-
- for(int i=oligo.length()-1;i>=0;i--){
-
- if(oligo[i] == 'A') { reverse += 'T'; }
- else if(oligo[i] == 'T'){ reverse += 'A'; }
- else if(oligo[i] == 'U'){ reverse += 'A'; }
-
- else if(oligo[i] == 'G'){ reverse += 'C'; }
- else if(oligo[i] == 'C'){ reverse += 'G'; }
-
- else if(oligo[i] == 'R'){ reverse += 'Y'; }
- else if(oligo[i] == 'Y'){ reverse += 'R'; }
-
- else if(oligo[i] == 'M'){ reverse += 'K'; }
- else if(oligo[i] == 'K'){ reverse += 'M'; }
-
- else if(oligo[i] == 'W'){ reverse += 'W'; }
- else if(oligo[i] == 'S'){ reverse += 'S'; }
-
- else if(oligo[i] == 'B'){ reverse += 'V'; }
- else if(oligo[i] == 'V'){ reverse += 'B'; }
-
- else if(oligo[i] == 'D'){ reverse += 'H'; }
- else if(oligo[i] == 'H'){ reverse += 'D'; }
-
- else { reverse += 'N'; }
- }
-
-
- return reverse;
- }
- catch(exception& e) {
- m->errorOut(e, "ParseFastaQCommand", "reverseOligo");
- exit(1);
- }
-}
-
-
//**********************************************************************************************************************
#include "trimoligos.h"
#include "sequence.hpp"
#include "groupmap.h"
+#include "oligos.h"
struct fastqRead2 {
string quality;
vector<string> outputNames;
string outputDir, fastaQFile, format, oligosfile, groupfile;
- bool abort, fasta, qual, pacbio, pairedOligos;
- int pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, split;
+ bool abort, fasta, qual, pacbio, pairedOligos, reorient;
+ int pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, split, numBarcodes, numPrimers, numLinkers, numSpacers, numRPrimers;
GroupMap* groupMap;
+ Oligos oligos;
- //oligos file data structures
- vector<string> linker, spacer, primerNameVector, barcodeNameVector, revPrimer;
- map<string, int> barcodes;
- map<string, int> primers;
- map<int, oligosPair> pairedBarcodes;
- map<int, oligosPair> pairedPrimers;
vector<vector<string> > fastqFileNames;
string noMatchFile;
vector<char> convertTable;
bool readOligos(string oligosFile);
bool readGroup(string oligosFile);
- string reverseOligo(string oligo);
fastqRead2 readFastq(ifstream&, bool&);
- int findGroup(fastqRead2, int&, int&, TrimOligos*&, int, int);
+ int findGroup(fastqRead2, int&, int&, TrimOligos*&, TrimOligos*&, int, int);
int findGroup(fastqRead2, int&, int&, string);
};
#include "alignment.hpp"
#include "needlemanoverlap.hpp"
#include "counttable.h"
+#include "oligos.h"
class PcrSeqsCommand : public Command {
public:
};
vector<linePair> lines;
- bool getOligos(vector<vector<string> >&, vector<vector<string> >&, vector<vector<string> >&);
- bool abort, keepprimer, keepdots, fileAligned;
+ bool abort, keepprimer, keepdots, fileAligned, pairedOligos;
string fastafile, oligosfile, taxfile, groupfile, namefile, countfile, ecolifile, outputDir, nomatch;
- int start, end, processors, length, pdiffs;
+ int start, end, processors, length, pdiffs, numFPrimers, numRPrimers;
+ Oligos oligos;
- vector<string> revPrimer, outputNames;
- map<string, int> primers;
+ vector<string> outputNames;
int writeAccnos(set<string>);
int readName(set<string>&);
int readGroup(set<string>);
int readTax(set<string>);
int readCount(set<string>);
- bool readOligos();
+ int readOligos();
bool readEcoli();
int driverPcr(string, string, string, string, set<string>&, linePair, int&, bool&);
int createProcesses(string, string, string, set<string>&);
bool isAligned(string, map<int, int>&);
- string reverseOligo(string);
int adjustDots(string, string, int, int);
};
unsigned long long fend;
int count, start, end, length, pdiffs, pstart, pend;
MothurOut* m;
- map<string, int> primers;
- vector<string> revPrimer;
set<string> badSeqNames;
bool keepprimer, keepdots, fileAligned, adjustNeeded;
-
pcrData(){}
- pcrData(string f, string gf, string bfn, string loc, MothurOut* mout, string ol, string ec, map<string, int> pr, vector<string> rpr, string nm, bool kp, bool kd, int st, int en, int l, int pd, unsigned long long fst, unsigned long long fen) {
+ pcrData(string f, string gf, string bfn, string loc, MothurOut* mout, string ol, string ec, string nm, bool kp, bool kd, int st, int en, int l, int pd, unsigned long long fst, unsigned long long fen) {
filename = f;
goodFasta = gf;
badFasta = bfn;
m = mout;
oligosfile = ol;
ecolifile = ec;
- primers = pr;
- revPrimer = rpr;
nomatch = nm;
keepprimer = kp;
keepdots = kd;
map<string, int> faked;
set<int> locations; //locations = beginning locations
- TrimOligos trim(pDataArray->pdiffs, 0, pDataArray->primers, faked, pDataArray->revPrimer);
+ Oligos oligos(pDataArray->oligosfile);
+ int numFPrimers, numRPrimers;
+ map<string, int> primers;
+ map<string, int> barcodes; //not used
+ vector<string> revPrimer;
+ if (oligos.hasPairedBarcodes()) {
+ numFPrimers = oligos.getPairedPrimers().size();
+ map<int, oligosPair> primerPairs = oligos.getPairedPrimers();
+ for (map<int, oligosPair>::iterator it = primerPairs.begin(); it != primerPairs.end(); it++) {
+ primers[(it->second).forward] = it->first;
+ revPrimer.push_back((it->second).reverse);
+ }
+ }else {
+ numFPrimers = oligos.getPrimers().size();
+ primers = oligos.getPrimers();
+ revPrimer = oligos.getReversePrimers();
+ }
+ numRPrimers = oligos.getReversePrimers().size();
+
+ TrimOligos trim(pDataArray->pdiffs, 0, primers, barcodes, revPrimer);
for(int i = 0; i < pDataArray->fend; i++){ //end is the number of sequences to process
pDataArray->count++;
///////////////////////////////////////////////////////////////
//process primers
- if (pDataArray->primers.size() != 0) {
+ if (numFPrimers != 0) {
int primerStart = 0; int primerEnd = 0;
bool good = trim.findForward(currSeq, primerStart, primerEnd);
}
//process reverse primers
- if (pDataArray->revPrimer.size() != 0) {
+ if (numRPrimers != 0) {
int primerStart = 0; int primerEnd = 0;
bool good = trim.findReverse(currSeq, primerStart, primerEnd);
if (pDataArray->fileAligned && !pDataArray->keepdots) { //print out smallest start value and largest end value
if (locations.size() > 1) { pDataArray->adjustNeeded = true; }
- if (pDataArray->primers.size() != 0) { set<int>::iterator it = locations.begin(); pDataArray->pstart = *it; }
+ if (numFPrimers != 0) { set<int>::iterator it = locations.begin(); pDataArray->pstart = *it; }
}
return 0;
if (abort == true) { if (calledHelp) { return 0; } return 2; }
int start = time(NULL);
- fileAligned = true;
+ fileAligned = true; pairedOligos = false;
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(fastafile); }
length = 0;
- if(oligosfile != ""){ readOligos(); if (m->debug) { m->mothurOut("[DEBUG]: read oligos file. numprimers = " + toString(primers.size()) + ", revprimers = " + toString(revPrimer.size()) + ".\n"); } } if (m->control_pressed) { return 0; }
+ if(oligosfile != ""){ readOligos(); if (m->debug) { m->mothurOut("[DEBUG]: read oligos file. numprimers = " + toString(numFPrimers) + ", revprimers = " + toString(numRPrimers) + ".\n"); } } if (m->control_pressed) { return 0; }
if(ecolifile != "") { readEcoli(); } if (m->control_pressed) { return 0; }
vector<unsigned long long> positions;
if (i!=0) {extension += toString(i) + ".temp"; processIDS.push_back(i); }
// Allocate memory for thread data.
- pcrData* tempPcr = new pcrData(filename, goodFileName+extension, badFileName+extension, locationsFile+extension, m, oligosfile, ecolifile, primers, revPrimer, nomatch, keepprimer, keepdots, start, end, length, pdiffs, lines[i].start, lines[i].end);
+ pcrData* tempPcr = new pcrData(filename, goodFileName+extension, badFileName+extension, locationsFile+extension, m, oligosfile, ecolifile, nomatch, keepprimer, keepdots, start, end, length, pdiffs, lines[i].start, lines[i].end);
pDataArray.push_back(tempPcr);
//default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
string name = "";
int thisStart = -1; int thisEnd = -1;
- if (primers.size() != 0) { inLocations >> name >> thisStart; m->gobble(inLocations); }
- if (revPrimer.size() != 0) { inLocations >> name >> thisEnd; m->gobble(inLocations); }
+ if (numFPrimers != 0) { inLocations >> name >> thisStart; m->gobble(inLocations); }
+ if (numRPrimers != 0) { inLocations >> name >> thisEnd; m->gobble(inLocations); }
else { pend = -1; break; }
int myDiff = 0;
set<int> locations; //locations[0] = beginning locations,
//pdiffs, bdiffs, primers, barcodes, revPrimers
- map<string, int> faked;
- TrimOligos trim(pdiffs, 0, primers, faked, revPrimer);
+ map<string, int> primers;
+ map<string, int> barcodes; //not used
+ vector<string> revPrimer;
+ if (pairedOligos) {
+ map<int, oligosPair> primerPairs = oligos.getPairedPrimers();
+ for (map<int, oligosPair>::iterator it = primerPairs.begin(); it != primerPairs.end(); it++) {
+ primers[(it->second).forward] = it->first;
+ revPrimer.push_back((it->second).reverse);
+ }
+ if (pdiffs != 0) { m->mothurOut("[WARNING]: Pcr.seqs is only designed to allow diffs in forward primers. Reverse primers must be an exact match.\n"); }
+ }else{
+ primers = oligos.getPrimers();
+ revPrimer = oligos.getReversePrimers();
+ }
+
+ TrimOligos trim(pdiffs, 0, primers, barcodes, revPrimer);
while (!done) {
string name = "";
int thisStart = -1; int thisEnd = -1;
- if (primers.size() != 0) { inLocations >> name >> thisStart; m->gobble(inLocations); }
- if (revPrimer.size() != 0) { inLocations >> name >> thisEnd; m->gobble(inLocations); }
+ if (numFPrimers != 0) { inLocations >> name >> thisStart; m->gobble(inLocations); }
+ if (numRPrimers != 0) { inLocations >> name >> thisEnd; m->gobble(inLocations); }
//cout << seq.getName() << '\t' << thisStart << '\t' << thisEnd << '\t' << seq.getAligned().length() << endl;
exit(1);
}
}
-//********************************************************************/
-string PcrSeqsCommand::reverseOligo(string oligo){
- try {
- string reverse = "";
-
- for(int i=oligo.length()-1;i>=0;i--){
-
- if(oligo[i] == 'A') { reverse += 'T'; }
- else if(oligo[i] == 'T'){ reverse += 'A'; }
- else if(oligo[i] == 'U'){ reverse += 'A'; }
-
- else if(oligo[i] == 'G'){ reverse += 'C'; }
- else if(oligo[i] == 'C'){ reverse += 'G'; }
-
- else if(oligo[i] == 'R'){ reverse += 'Y'; }
- else if(oligo[i] == 'Y'){ reverse += 'R'; }
-
- else if(oligo[i] == 'M'){ reverse += 'K'; }
- else if(oligo[i] == 'K'){ reverse += 'M'; }
-
- else if(oligo[i] == 'W'){ reverse += 'W'; }
- else if(oligo[i] == 'S'){ reverse += 'S'; }
-
- else if(oligo[i] == 'B'){ reverse += 'V'; }
- else if(oligo[i] == 'V'){ reverse += 'B'; }
-
- else if(oligo[i] == 'D'){ reverse += 'H'; }
- else if(oligo[i] == 'H'){ reverse += 'D'; }
-
- else { reverse += 'N'; }
- }
-
-
- return reverse;
- }
- catch(exception& e) {
- m->errorOut(e, "PcrSeqsCommand", "reverseOligo");
- exit(1);
- }
-}
-
-//***************************************************************************************************************
-bool PcrSeqsCommand::readOligos(){
- try {
- ifstream inOligos;
- m->openInputFile(oligosfile, inOligos);
-
- string type, oligo, group;
- int primerCount = 0;
-
- while(!inOligos.eof()){
-
- inOligos >> type;
-
- if(type[0] == '#'){ //ignore
- while (!inOligos.eof()) { char c = inOligos.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there
- m->gobble(inOligos);
- }else{
- m->gobble(inOligos);
- //make type case insensitive
- for(int i=0;i<type.length();i++){ type[i] = toupper(type[i]); }
-
- inOligos >> oligo;
-
- for(int i=0;i<oligo.length();i++){
- oligo[i] = toupper(oligo[i]);
- if(oligo[i] == 'U') { oligo[i] = 'T'; }
- }
-
- if(type == "FORWARD"){
- // get rest of line in case there is a primer name
- while (!inOligos.eof()) {
- char c = inOligos.get();
- if (c == 10 || c == 13 || c == -1){ break; }
- else if (c == 32 || c == 9){;} //space or tab
- }
- primers[oligo] = primerCount; primerCount++;
- //cout << "for oligo = " << oligo << endl;
- }else if(type == "REVERSE"){
- string oligoRC = reverseOligo(oligo);
- revPrimer.push_back(oligoRC);
- //cout << "rev oligo = " << oligo << " reverse = " << oligoRC << endl;
- }else if(type == "BARCODE"){
- inOligos >> group;
- }else if(type == "PRIMER"){
- m->gobble(inOligos);
- primers[oligo] = primerCount; primerCount++;
-
- string roligo="";
- inOligos >> roligo;
-
- for(int i=0;i<roligo.length();i++){
- roligo[i] = toupper(roligo[i]);
- if(roligo[i] == 'U') { roligo[i] = 'T'; }
- }
- revPrimer.push_back(reverseOligo(roligo));
-
- // get rest of line in case there is a primer name
- while (!inOligos.eof()) {
- char c = inOligos.get();
- if (c == 10 || c == 13 || c == -1){ break; }
- else if (c == 32 || c == 9){;} //space or tab
- }
- //cout << "prim oligo = " << oligo << " reverse = " << roligo << endl;
- }else if((type == "LINKER")||(type == "SPACER")) {;}
- else{ m->mothurOut(type + " is not recognized as a valid type. Choices are primer, forward, reverse, linker, spacer and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); m->control_pressed = true; }
- }
- m->gobble(inOligos);
- }
- inOligos.close();
-
- if ((primers.size() == 0) && (revPrimer.size() == 0)) {
- m->mothurOut("[ERROR]: your oligos file does not contain valid primers or reverse primers. Please correct."); m->mothurOutEndLine();
- m->control_pressed = true;
- return false;
- }
-
- return true;
-
- }catch(exception& e) {
- m->errorOut(e, "PcrSeqsCommand", "readOligos");
- exit(1);
- }
-}
//***************************************************************************************************************
bool PcrSeqsCommand::readEcoli(){
try {
exit(1);
}
}
+//***************************************************************************************************************
+
+int PcrSeqsCommand::readOligos(){
+ try {
+ oligos.read(oligosfile);
+
+ if (m->control_pressed) { return false; } //error in reading oligos
+
+ if (oligos.hasPairedBarcodes()) {
+ pairedOligos = true;
+ numFPrimers = oligos.getPairedPrimers().size();
+ }else {
+ pairedOligos = false;
+ numFPrimers = oligos.getPrimers().size();
+ }
+ numRPrimers = oligos.getReversePrimers().size();
+
+ if (oligos.getLinkers().size() != 0) { m->mothurOut("[WARNING]: pcr.seqs is not setup to remove linkers, ignoring.\n"); }
+ if (oligos.getSpacers().size() != 0) { m->mothurOut("[WARNING]: pcr.seqs is not setup to remove spacers, ignoring.\n"); }
+
+ return true;
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "PcrSeqsCommand", "readOligos");
+ exit(1);
+ }
+}
+
/**************************************************************************************/
exit(1);
}
}
+/**************************************************************************************************/
+QualityScores::QualityScores(string n, vector<int> s){
+ try {
+ m = MothurOut::getInstance();
+ setName(n);
+ setScores(s);
+ }
+ catch(exception& e) {
+ m->errorOut(e, "QualityScores", "QualityScores");
+ exit(1);
+ }
+}
/**************************************************************************************************/
QualityScores::QualityScores(ifstream& qFile){
if (logTransform) { aveQScore += pow(10.0, qScores[i]); }
else { aveQScore += qScores[i]; }
}
- aveQScore /= (double) seqLength;
- if (logTransform) { aveQScore = log10(aveQScore /(double) seqLength); }
- else { aveQScore /= (double) seqLength; }
+ if (logTransform) { aveQScore = log10(aveQScore /(double) seqLength); }
+ else { aveQScore /= (double) seqLength; }
return aveQScore;
}
double aveQScore = calculateAverage(logTransform);
+ if (m->debug) { m->mothurOut("[DEBUG]: " + sequence.getName() + " average = " + toString(aveQScore) + "\n"); }
+
if(aveQScore >= qAverage) { success = 1; }
else { success = 0; }
class QualityScores {
public:
QualityScores();
+ QualityScores(string n, vector<int> qs);
QualityScores(ifstream&);
string getName();
int getLength(){ return (int)qScores.size(); }
try { \r
CommandParameter psff("sff", "InputTypes", "", "", "none", "none", "none","",false,false,true); parameters.push_back(psff);\r
CommandParameter poligos("oligos", "InputTypes", "", "", "oligosGroup", "none", "none","",false,false); parameters.push_back(poligos);\r
+ CommandParameter preorient("checkorient", "Boolean", "", "F", "", "", "","",false,false,true); parameters.push_back(preorient);\r
CommandParameter pgroup("group", "InputTypes", "", "", "oligosGroup", "none", "none","",false,false); parameters.push_back(pgroup);\r
CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(paccnos);\r
CommandParameter psfftxt("sfftxt", "String", "", "", "", "", "","",false,false); parameters.push_back(psfftxt);\r
try {\r
string helpString = "";\r
helpString += "The sffinfo command reads a sff file and extracts the sequence data, or you can use it to parse a sfftxt file.\n";\r
- helpString += "The sffinfo command parameters are sff, fasta, qfile, accnos, flow, sfftxt, oligos, group, bdiffs, tdiffs, ldiffs, sdiffs, pdiffs and trim. sff is required. \n";\r
+ helpString += "The sffinfo command parameters are sff, fasta, qfile, accnos, flow, sfftxt, oligos, group, bdiffs, tdiffs, ldiffs, sdiffs, pdiffs, checkorient and trim. sff is required. \n";\r
helpString += "The sff parameter allows you to enter the sff file you would like to extract data from. You may enter multiple files by separating them by -'s.\n";\r
helpString += "The fasta parameter allows you to indicate if you would like a fasta formatted file generated. Default=True. \n";\r
helpString += "The qfile parameter allows you to indicate if you would like a quality file generated. Default=True. \n";\r
helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";\r
helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n";\r
helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n";\r
+ helpString += "The checkorient parameter will check look for the reverse compliment of the barcode or primer in the sequence. The default is false.\n";\r
helpString += "The flow parameter allows you to indicate if you would like a flowgram file generated. Default=True. \n";\r
helpString += "The sfftxt parameter allows you to indicate if you would like a sff.txt file generated. Default=False. \n";\r
helpString += "If you want to parse an existing sfftxt file into flow, fasta and quality file, enter the file name using the sfftxt parameter. \n";\r
else { m->mothurOut("[ERROR]: you must provide a valid sff or sfftxt file."); m->mothurOutEndLine(); abort=true; }\r
}\r
\r
+ temp = validParameter.validFile(parameters, "checkorient", false); if (temp == "not found") { temp = "F"; }\r
+ reorient = m->isTrue(temp);\r
\r
}\r
}\r
//**********************************************************************************************************************\r
int SffInfoCommand::extractSffInfo(string input, string accnos, string oligos){\r
try {\r
+ oligosObject = new Oligos();\r
currentFileName = input;\r
if (outputDir == "") { outputDir += m->hasPath(input); }\r
\r
if (accnos != "") { readAccnosFile(accnos); }\r
else { seqNames.clear(); }\r
\r
- if (hasOligos) { readOligos(oligos); split = 2; }\r
+ TrimOligos* trimOligos = NULL; TrimOligos* rtrimOligos = NULL;\r
+ if (hasOligos) {\r
+ readOligos(oligos); split = 2;\r
+ if (m->control_pressed) { delete oligosObject; return 0; }\r
+ trimOligos = new TrimOligos(pdiffs, bdiffs, ldiffs, sdiffs, oligosObject->getPrimers(), oligosObject->getBarcodes(), oligosObject->getReversePrimers(), oligosObject->getLinkers(), oligosObject->getSpacers()); numFPrimers = oligosObject->getPrimers().size(); numBarcodes = oligosObject->getBarcodes().size();\r
+ if (reorient) {\r
+ rtrimOligos = new TrimOligos(pdiffs, bdiffs, 0, 0, oligosObject->getReorientedPairedPrimers(), oligosObject->getReorientedPairedBarcodes()); numBarcodes = oligosObject->getReorientedPairedBarcodes().size();\r
+ }\r
+ }\r
if (hasGroup) { readGroup(oligos); split = 2; }\r
\r
ofstream outSfftxt, outFasta, outQual, outFlow;\r
int count = 0;\r
\r
//check magic number and version\r
- if (header.magicNumber != 779314790) { m->mothurOut("Magic Number is not correct, not a valid .sff file"); m->mothurOutEndLine(); return count; }\r
- if (header.version != "0001") { m->mothurOut("Version is not supported, only support version 0001."); m->mothurOutEndLine(); return count; }\r
+ if (header.magicNumber != 779314790) { m->mothurOut("Magic Number is not correct, not a valid .sff file"); m->mothurOutEndLine(); delete oligosObject; if (hasOligos) { delete trimOligos; if (reorient) { delete rtrimOligos; } } return count; }\r
+ if (header.version != "0001") { m->mothurOut("Version is not supported, only support version 0001."); m->mothurOutEndLine(); delete oligosObject; if (hasOligos) { delete trimOligos; if (reorient) { delete rtrimOligos; } } return count; }\r
\r
//print common header\r
if (sfftxt) { printCommonHeader(outSfftxt, header); }\r
\r
//read data\r
seqRead read; Header readheader;\r
- readSeqData(in, read, header.numFlowsPerRead, readheader);\r
+ readSeqData(in, read, header.numFlowsPerRead, readheader, trimOligos, rtrimOligos);\r
\r
bool okay = sanityCheck(readheader, read);\r
if (!okay) { break; }\r
else { outputNames.push_back(noMatchFile); outputTypes["sff"].push_back(noMatchFile); }\r
}\r
\r
+ delete oligosObject;\r
+ if (hasOligos) { delete trimOligos; if (reorient) { delete rtrimOligos; } }\r
+ \r
return count;\r
}\r
catch(exception& e) {\r
}\r
}\r
//**********************************************************************************************************************\r
-bool SffInfoCommand::readSeqData(ifstream& in, seqRead& read, int numFlowReads, Header& header){\r
+bool SffInfoCommand::readSeqData(ifstream& in, seqRead& read, int numFlowReads, Header& header, TrimOligos*& trimOligos, TrimOligos*& rtrimOligos){\r
try {\r
unsigned long long startSpotInFile = in.tellg();\r
if (!in.eof()) {\r
\r
int barcodeIndex, primerIndex, trashCodeLength;\r
\r
- if (hasOligos) { trashCodeLength = findGroup(header, read, barcodeIndex, primerIndex); }\r
+ if (hasOligos) { trashCodeLength = findGroup(header, read, barcodeIndex, primerIndex, trimOligos, rtrimOligos); }\r
else if (hasGroup) { trashCodeLength = findGroup(header, read, barcodeIndex, primerIndex, "groupMode"); }\r
else { m->mothurOut("[ERROR]: uh oh, we shouldn't be here...\n"); }\r
\r
}\r
}\r
//**********************************************************************************************************************\r
-int SffInfoCommand::findGroup(Header header, seqRead read, int& barcode, int& primer) {\r
+int SffInfoCommand::findGroup(Header header, seqRead read, int& barcode, int& primer, TrimOligos*& trimOligos, TrimOligos*& rtrimOligos) {\r
try {\r
- //find group read belongs to\r
- TrimOligos trimOligos(pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, revPrimer, linker, spacer);\r
\r
int success = 1;\r
string trashCode = "";\r
Sequence currSeq(header.name, seq);\r
QualityScores currQual;\r
\r
+ //for reorient\r
+ Sequence savedSeq(currSeq.getName(), currSeq.getAligned());\r
+ QualityScores savedQual(currQual.getName(), currQual.getScores());\r
+ \r
if(numLinkers != 0){\r
- success = trimOligos.stripLinker(currSeq, currQual);\r
+ success = trimOligos->stripLinker(currSeq, currQual);\r
if(success > ldiffs) { trashCode += 'k'; }\r
else{ currentSeqsDiffs += success; }\r
\r
}\r
\r
- if(barcodes.size() != 0){\r
- success = trimOligos.stripBarcode(currSeq, currQual, barcode);\r
+ if(numBarcodes != 0){\r
+ success = trimOligos->stripBarcode(currSeq, currQual, barcode);\r
if(success > bdiffs) { trashCode += 'b'; }\r
else{ currentSeqsDiffs += success; }\r
}\r
\r
if(numSpacers != 0){\r
- success = trimOligos.stripSpacer(currSeq, currQual);\r
+ success = trimOligos->stripSpacer(currSeq, currQual);\r
if(success > sdiffs) { trashCode += 's'; }\r
else{ currentSeqsDiffs += success; }\r
\r
}\r
\r
if(numFPrimers != 0){\r
- success = trimOligos.stripForward(currSeq, currQual, primer, true);\r
+ success = trimOligos->stripForward(currSeq, currQual, primer, true);\r
if(success > pdiffs) { trashCode += 'f'; }\r
else{ currentSeqsDiffs += success; }\r
}\r
\r
if (currentSeqsDiffs > tdiffs) { trashCode += 't'; }\r
\r
- if(revPrimer.size() != 0){\r
- success = trimOligos.stripReverse(currSeq, currQual);\r
+ if(numRPrimers != 0){\r
+ success = trimOligos->stripReverse(currSeq, currQual);\r
if(!success) { trashCode += 'r'; }\r
}\r
\r
- if (trashCode.length() == 0) { //is this sequence in the ignore group\r
- string thisGroup = "";\r
+ if (reorient && (trashCode != "")) { //if you failed and want to check the reverse\r
+ int thisSuccess = 0;\r
+ string thisTrashCode = "";\r
+ int thisCurrentSeqsDiffs = 0;\r
\r
- if(barcodes.size() != 0){\r
- thisGroup = barcodeNameVector[barcode];\r
- if (numFPrimers != 0) {\r
- if (primerNameVector[primer] != "") {\r
- if(thisGroup != "") {\r
- thisGroup += "." + primerNameVector[primer];\r
- }else {\r
- thisGroup = primerNameVector[primer];\r
- }\r
- }\r
- }\r
+ int thisBarcodeIndex = 0;\r
+ int thisPrimerIndex = 0;\r
+ //cout << currSeq.getName() << '\t' << savedSeq.getUnaligned() << endl;\r
+ if(numBarcodes != 0){\r
+ thisSuccess = rtrimOligos->stripBarcode(savedSeq, savedQual, thisBarcodeIndex);\r
+ if(thisSuccess > bdiffs) { thisTrashCode += "b"; }\r
+ else{ thisCurrentSeqsDiffs += thisSuccess; }\r
+ }\r
+ //cout << currSeq.getName() << '\t' << savedSeq.getUnaligned() << endl;\r
+ if(numFPrimers != 0){\r
+ thisSuccess = rtrimOligos->stripForward(savedSeq, savedQual, thisPrimerIndex, true);\r
+ if(thisSuccess > pdiffs) { thisTrashCode += "f"; }\r
+ else{ thisCurrentSeqsDiffs += thisSuccess; }\r
}\r
\r
+ if (thisCurrentSeqsDiffs > tdiffs) { thisTrashCode += 't'; }\r
+ \r
+ if (thisTrashCode == "") {\r
+ trashCode = thisTrashCode;\r
+ success = thisSuccess;\r
+ currentSeqsDiffs = thisCurrentSeqsDiffs;\r
+ barcode = thisBarcodeIndex;\r
+ primer = thisPrimerIndex;\r
+ savedSeq.reverseComplement();\r
+ currSeq.setAligned(savedSeq.getAligned());\r
+ savedQual.flipQScores();\r
+ currQual.setScores(savedQual.getScores());\r
+ }else { trashCode += "(" + thisTrashCode + ")"; }\r
+ }\r
+\r
+ if (trashCode.length() == 0) { //is this sequence in the ignore group\r
+ string thisGroup = oligosObject->getGroupName(barcode, primer);\r
+ \r
int pos = thisGroup.find("ignore");\r
if (pos != string::npos) { trashCode += "i"; }\r
}\r
\r
string group = groupMap->getGroup(header.name);\r
if (group == "not found") { trashCode += "g"; } //scrap for group\r
- else { //find file group\r
- map<string, int>::iterator it = barcodes.find(group);\r
- if (it != barcodes.end()) {\r
- barcode = it->second;\r
- }else { trashCode += "g"; }\r
- }\r
\r
return trashCode.length();\r
}\r
numSplitReads.clear();\r
filehandlesHeaders.clear();\r
\r
- ifstream inOligos;\r
- m->openInputFile(oligoFile, inOligos);\r
- \r
- string type, oligo, group;\r
+ bool allBlank = false;\r
+ oligosObject->read(oligoFile);\r
\r
- int indexPrimer = 0;\r
- int indexBarcode = 0;\r
- \r
- while(!inOligos.eof()){\r
- \r
- inOligos >> type;\r
- \r
- if(type[0] == '#'){\r
- while (!inOligos.eof()) { char c = inOligos.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there\r
- m->gobble(inOligos);\r
- }\r
- else{\r
- m->gobble(inOligos);\r
- //make type case insensitive\r
- for(int i=0;i<type.length();i++){ type[i] = toupper(type[i]); }\r
- \r
- inOligos >> oligo;\r
- \r
- for(int i=0;i<oligo.length();i++){\r
- oligo[i] = toupper(oligo[i]);\r
- if(oligo[i] == 'U') { oligo[i] = 'T'; }\r
- }\r
- \r
- if(type == "FORWARD"){\r
- group = "";\r
- \r
- // get rest of line in case there is a primer name\r
- while (!inOligos.eof()) {\r
- char c = inOligos.get();\r
- if (c == 10 || c == 13 || c == -1){ break; }\r
- else if (c == 32 || c == 9){;} //space or tab\r
- else { group += c; }\r
- }\r
- \r
- //check for repeat barcodes\r
- map<string, int>::iterator itPrime = primers.find(oligo);\r
- if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); }\r
- \r
- primers[oligo]=indexPrimer; indexPrimer++;\r
- primerNameVector.push_back(group);\r
- \r
- }else if(type == "REVERSE"){\r
- //Sequence oligoRC("reverse", oligo);\r
- //oligoRC.reverseComplement();\r
- string oligoRC = reverseOligo(oligo);\r
- revPrimer.push_back(oligoRC);\r
- }\r
- else if(type == "BARCODE"){\r
- inOligos >> group;\r
- \r
- \r
- //check for repeat barcodes\r
- map<string, int>::iterator itBar = barcodes.find(oligo);\r
- if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); }\r
- \r
- barcodes[oligo]=indexBarcode; indexBarcode++;\r
- barcodeNameVector.push_back(group);\r
- }else if(type == "LINKER"){\r
- linker.push_back(oligo);\r
- }else if(type == "SPACER"){\r
- spacer.push_back(oligo);\r
- }\r
- else{ m->mothurOut("[WARNING]: " + type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); }\r
- }\r
- m->gobble(inOligos);\r
- }\r
- inOligos.close();\r
- \r
- if(barcodeNameVector.size() == 0 && primerNameVector[0] == ""){ split = 1; }\r
- \r
- //add in potential combos\r
- if(barcodeNameVector.size() == 0){\r
- barcodes[""] = 0;\r
- barcodeNameVector.push_back("");\r
- }\r
- \r
- if(primerNameVector.size() == 0){\r
- primers[""] = 0;\r
- primerNameVector.push_back("");\r
- }\r
- \r
- filehandles.resize(barcodeNameVector.size());\r
+ if (m->control_pressed) { return false; } //error in reading oligos\r
+ \r
+ if (oligosObject->hasPairedBarcodes()) {\r
+ pairedOligos = true;\r
+ m->mothurOut("[ERROR]: sffinfo does not support paired barcodes and primers, aborting.\n"); m->control_pressed = true; return true;\r
+ }else {\r
+ pairedOligos = false;\r
+ numFPrimers = oligosObject->getPrimers().size();\r
+ numBarcodes = oligosObject->getBarcodes().size();\r
+ }\r
+ \r
+ numLinkers = oligosObject->getLinkers().size();\r
+ numSpacers = oligosObject->getSpacers().size();\r
+ numRPrimers = oligosObject->getReversePrimers().size();\r
+ \r
+ vector<string> groupNames = oligosObject->getGroupNames();\r
+ if (groupNames.size() == 0) { allBlank = true; }\r
+ \r
+ filehandles.resize(oligosObject->getBarcodeNames().size());\r
for(int i=0;i<filehandles.size();i++){\r
- filehandles[i].assign(primerNameVector.size(), "");\r
+ for(int j=0;j<oligosObject->getPrimerNames().size();j++){ filehandles[i].push_back(""); }\r
}\r
\r
- if(split > 1){\r
- set<string> uniqueNames; //used to cleanup outputFileNames\r
- for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){\r
- for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){\r
- \r
- string primerName = primerNameVector[itPrimer->second];\r
- string barcodeName = barcodeNameVector[itBar->second];\r
- \r
+ if(split > 1){\r
+ set<string> uniqueNames; //used to cleanup outputFileNames\r
+ map<string, int> barcodes = oligosObject->getBarcodes() ;\r
+ map<string, int> primers = oligosObject->getPrimers();\r
+ for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){\r
+ for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){\r
+ \r
+ string primerName = oligosObject->getPrimerName(itPrimer->second);\r
+ string barcodeName = oligosObject->getBarcodeName(itBar->second);\r
+ \r
if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing\r
+ else if ((primerName == "") && (barcodeName == "")) { } //do nothing\r
else {\r
string comboGroupName = "";\r
string fastaFileName = "";\r
string qualFileName = "";\r
string nameFileName = "";\r
+ string countFileName = "";\r
\r
if(primerName == ""){\r
- comboGroupName = barcodeNameVector[itBar->second];\r
- }\r
- else{\r
+ comboGroupName = barcodeName;\r
+ }else{\r
if(barcodeName == ""){\r
- comboGroupName = primerNameVector[itPrimer->second];\r
+ comboGroupName = primerName;\r
}\r
else{\r
- comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second];\r
+ comboGroupName = barcodeName + "." + primerName;\r
}\r
}\r
\r
filehandles[itBar->second][itPrimer->second] = thisFilename;\r
temp.open(thisFilename.c_str(), ios::binary); temp.close();\r
}\r
- }\r
- }\r
- }\r
- numFPrimers = primers.size();\r
- numLinkers = linker.size();\r
- numSpacers = spacer.size();\r
+ }\r
+ }\r
+ }\r
+ \r
map<string, string> variables;\r
variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(currentFileName));\r
variables["[group]"] = "scrap";\r
noMatchFile = getOutputFileName("sff",variables);\r
m->mothurRemove(noMatchFile);\r
numNoMatch = 0;\r
- \r
- \r
- bool allBlank = true;\r
- for (int i = 0; i < barcodeNameVector.size(); i++) {\r
- if (barcodeNameVector[i] != "") {\r
- allBlank = false;\r
- break;\r
- }\r
- }\r
- for (int i = 0; i < primerNameVector.size(); i++) {\r
- if (primerNameVector[i] != "") {\r
- allBlank = false;\r
- break;\r
- }\r
- }\r
\r
filehandlesHeaders.resize(filehandles.size());\r
numSplitReads.resize(filehandles.size());\r
filehandles.clear();\r
numSplitReads.clear();\r
filehandlesHeaders.clear();\r
- barcodes.clear();\r
\r
groupMap = new GroupMap();\r
groupMap->readMap(oligoFile);\r
ofstream temp;\r
m->openOutputFileBinary(thisFilename, temp); temp.close();\r
filehandles[i].push_back(thisFilename);\r
- barcodes[groups[i]] = i;\r
}\r
}\r
\r
#include "command.hpp"
#include "groupmap.h"
+#include "oligos.h"
+#include "trimoligos.h"
/**********************************************************/
private:
string sffFilename, sfftxtFilename, outputDir, accnosName, currentFileName, oligosfile, noMatchFile, groupfile;
vector<string> filenames, outputNames, accnosFileNames, oligosFileNames, groupFileNames;
- bool abort, fasta, qual, trim, flow, sfftxt, hasAccnos, hasOligos, hasGroup;
- int mycount, split, numFPrimers, numLinkers, numSpacers, pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, numNoMatch;
+ bool abort, fasta, qual, trim, flow, sfftxt, hasAccnos, hasOligos, hasGroup, reorient, pairedOligos;
+ int mycount, split, numBarcodes, numFPrimers, numLinkers, numSpacers, numRPrimers, pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, numNoMatch;
set<string> seqNames;
- map<string, int> barcodes;
- map<string, int> primers;
GroupMap* groupMap;
- vector<string> linker, spacer, primerNameVector, barcodeNameVector, revPrimer;
vector<vector<int> > numSplitReads;
vector<vector<string> > filehandles;
vector<vector<string> > filehandlesHeaders;
+ Oligos* oligosObject;
//extract sff file functions
int extractSffInfo(string, string, string);
int readCommonHeader(ifstream&, CommonHeader&);
int readHeader(ifstream&, Header&);
- bool readSeqData(ifstream&, seqRead&, int, Header&);
+ bool readSeqData(ifstream&, seqRead&, int, Header&, TrimOligos*&, TrimOligos*&);
int decodeName(string&, string&, string&, string);
bool readOligos(string oligosFile);
bool readGroup(string oligosFile);
int parseSffTxt();
bool sanityCheck(Header&, seqRead&);
int adjustCommonHeader(CommonHeader);
- int findGroup(Header header, seqRead read, int& barcode, int& primer);
+ int findGroup(Header header, seqRead read, int& barcode, int& primer, TrimOligos*&, TrimOligos*&);
int findGroup(Header header, seqRead read, int& barcode, int& primer, string);
string reverseOligo(string oligo);
CommandParameter pfile("file", "InputTypes", "", "", "sffFastQFile-oligos", "sffFastQFile", "none","xml",false,false); parameters.push_back(pfile);
CommandParameter pfastq("fastq", "InputTypes", "", "", "sffFastQFile", "sffFastQFile", "none","xml",false,false); parameters.push_back(pfastq);
CommandParameter pcontact("project", "InputTypes", "", "", "none", "none", "none","xml",false,true,true); parameters.push_back(pcontact);
+ CommandParameter preorient("checkorient", "Boolean", "", "F", "", "", "","",false,false,true); parameters.push_back(preorient);
CommandParameter pmimark("mimark", "InputTypes", "", "", "none", "none", "none","xml",false,true,true); parameters.push_back(pmimark);
//choose only one multiple options
CommandParameter pplatform("platform", "Multiple", "_LS454-ILLUMINA-ION_TORRENT-PACBIO_SMRT", "_LS454", "", "", "","",false,false); parameters.push_back(pplatform);
try {
string helpString = "";
helpString += "The sra command creates the necessary files for a NCBI submission. The xml file and individual sff or fastq files parsed from the original sff or fastq file.\n";
- helpString += "The sra command parameters are: sff, fastq, file, oligos, project, mimarksfile, pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, platform, orientation, libstrategy, datatype, libsource, libselection and instrument.\n";
+ helpString += "The sra command parameters are: sff, fastq, file, oligos, project, mimarksfile, pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, checkorient, platform, orientation, libstrategy, datatype, libsource, libselection and instrument.\n";
helpString += "The sff parameter is used to provide the original sff file.\n";
helpString += "The fastq parameter is used to provide the original fastq file.\n";
helpString += "The project parameter is used to provide your project file.\n";
helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n";
helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n";
+ helpString += "The checkorient parameter will check look for the reverse compliment of the barcode or primer in the sequence. The default is false.\n";
helpString += "The platform parameter is used to specify platform you are using choices are: _LS454,ILLUMINA,ION_TORRENT,PACBIO_SMRT. Default=_LS454. This is a controlled vocabulary section in the XML file that will be generated.\n";
helpString += "The orientation parameter is used to specify sequence orientation. Choices are: forward and reverse. Default=forward. This is a controlled vocabulary section in the XML file that will be generated.\n";
helpString += "The instrument parameter is used to specify instrument. Choices are 454_GS-454_GS_20-454_GS_FLX-454_GS_FLX_Titanium-454_GS_Junior-Illumina_Genome_Analyzer-Illumina_Genome_Analyzer_II-Illumina_Genome_Analyzer_IIx-Illumina_HiSeq_2000-Illumina_HiSeq_1000-Illumina_MiSeq-PacBio_RS-Ion_Torrent_PGM-unspecified. Default=454_GS. This is a controlled vocabulary section in the XML file that will be generated. \n";
outputTypes["xml"] = tempOutNames;
//if the user changes the input directory command factory will send this info to us in the output parameter
- string inputDir = validParameter.validFile(parameters, "inputdir", false);
+ inputDir = validParameter.validFile(parameters, "inputdir", false);
if (inputDir == "not found"){ inputDir = ""; }
else {
m->mothurConvert(temp, tdiffs);
if(tdiffs == 0){ tdiffs = bdiffs + pdiffs + ldiffs + sdiffs; }
+
+ checkorient = validParameter.validFile(parameters, "checkorient", false); if (temp == "not found") { temp = "F"; }
}
if (m->debug) { m->mothurOut("[DEBUG]: group = " + group + ", thisFileName1 = " + thisFileName1 + ", thisFileName2 = " + thisFileName2 + ".\n"); }
+ if (inputDir != "") {
+ string path = m->hasPath(thisFileName1);
+ if (path == "") { thisFileName1 = inputDir + thisFileName1; }
+
+ path = m->hasPath(thisFileName2);
+ if (path == "") { thisFileName2 = inputDir + thisFileName2; }
+ }
+
//check to make sure both are able to be opened
ifstream in2;
int openForward = m->openInputFile(thisFileName1, in2, "noerror");
//if you can't open it, try default location
if (openForward == 1) {
+
if (m->getDefaultPath() != "") { //default path is set
string tryPath = m->getDefaultPath() + m->getSimpleName(thisFileName1);
m->mothurOut("Unable to open " + thisFileName1 + ". Trying default " + tryPath); m->mothurOutEndLine();
if (ldiffs != 0) { commandString += ", ldiffs=" + toString(ldiffs); }
if (sdiffs != 0) { commandString += ", sdiffs=" + toString(sdiffs); }
if (tdiffs != 0) { commandString += ", tdiffs=" + toString(tdiffs); }
+ if (m->isTrue(checkorient)) { commandString += ", checkorient=" + checkorient; }
m->mothurOutEndLine();
m->mothurOut("/******************************************/"); m->mothurOutEndLine();
if (ldiffs != 0) { commandString += ", ldiffs=" + toString(ldiffs); }
if (sdiffs != 0) { commandString += ", sdiffs=" + toString(sdiffs); }
if (tdiffs != 0) { commandString += ", tdiffs=" + toString(tdiffs); }
+ if (m->isTrue(checkorient)) { commandString += ", checkorient=" + checkorient; }
m->mothurOutEndLine();
m->mothurOut("/******************************************/"); m->mothurOutEndLine();
//***************************************************************************************************************
int SRACommand::readOligos(){
try {
- ifstream inOligos;
- m->openInputFile(oligosfile, inOligos);
-
- string type, oligo, roligo, group;
- bool hasPrimer = false; bool hasPairedBarcodes = false; pairedOligos = false;
- map<int, oligosPair> pairedBarcodes;
- map<int, oligosPair> pairedPrimers;
- map<string, int> barcodes;
- map<string, int> primers;
- vector<string> linker;
- vector<string> spacer, revPrimer;
- int indexPrimer = 0;
- int indexBarcode = 0;
- int indexPairedPrimer = 0;
- int indexPairedBarcode = 0;
- set<string> uniquePrimers;
- set<string> uniqueBarcodes;
-
- while(!inOligos.eof()){
-
- inOligos >> type;
-
- if (m->debug) { m->mothurOut("[DEBUG]: reading type - " + type + ".\n"); }
-
- if(type[0] == '#'){
- while (!inOligos.eof()) { char c = inOligos.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there
- m->gobble(inOligos);
- }
- else{
- m->gobble(inOligos);
- //make type case insensitive
- for(int i=0;i<type.length();i++){ type[i] = toupper(type[i]); }
-
- inOligos >> oligo;
-
- if (m->debug) { m->mothurOut("[DEBUG]: reading - " + oligo + ".\n"); }
-
- for(int i=0;i<oligo.length();i++){
- oligo[i] = toupper(oligo[i]);
- if(oligo[i] == 'U') { oligo[i] = 'T'; }
- }
-
- if(type == "FORWARD"){
- group = "";
-
- // get rest of line in case there is a primer name
- while (!inOligos.eof()) {
- char c = inOligos.get();
- if (c == 10 || c == 13 || c == -1){ break; }
- else if (c == 32 || c == 9){;} //space or tab
- else { group += c; }
- }
-
- //check for repeat barcodes
- map<string, int>::iterator itPrime = primers.find(oligo);
- if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); }
-
- if (m->debug) { if (group != "") { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); }else{ m->mothurOut("[DEBUG]: no group for primer " + oligo + ".\n"); } }
-
- primers[oligo] = indexPrimer; indexPrimer++;
- primerNameVector.push_back(group);
- }
- else if (type == "PRIMER"){
- m->gobble(inOligos);
-
- inOligos >> roligo;
-
- for(int i=0;i<roligo.length();i++){
- roligo[i] = toupper(roligo[i]);
- if(roligo[i] == 'U') { roligo[i] = 'T'; }
- }
- roligo = reverseOligo(roligo);
-
- group = "";
-
- // get rest of line in case there is a primer name
- while (!inOligos.eof()) {
- char c = inOligos.get();
- if (c == 10 || c == 13 || c == -1){ break; }
- else if (c == 32 || c == 9){;} //space or tab
- else { group += c; }
- }
-
- oligosPair newPrimer(oligo, roligo);
-
- if (m->debug) { m->mothurOut("[DEBUG]: primer pair " + newPrimer.forward + " " + newPrimer.reverse + ", and group = " + group + ".\n"); }
-
- //check for repeat barcodes
- string tempPair = oligo+roligo;
- if (uniquePrimers.count(tempPair) != 0) { m->mothurOut("primer pair " + newPrimer.forward + " " + newPrimer.reverse + " is in your oligos file already."); m->mothurOutEndLine(); }
- else { uniquePrimers.insert(tempPair); }
-
- if (m->debug) { if (group != "") { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); }else{ m->mothurOut("[DEBUG]: no group for primer pair " + newPrimer.forward + " " + newPrimer.reverse + ".\n"); } }
-
- pairedPrimers[indexPairedPrimer]=newPrimer; indexPairedPrimer++;
- primerNameVector.push_back(group);
- hasPrimer = true;
- }
- else if(type == "REVERSE"){
- //Sequence oligoRC("reverse", oligo);
- //oligoRC.reverseComplement();
- string oligoRC = reverseOligo(oligo);
- revPrimer.push_back(oligoRC);
- }
- else if(type == "BARCODE"){
- inOligos >> group;
-
- //barcode lines can look like BARCODE atgcatgc groupName - for 454 seqs
- //or BARCODE atgcatgc atgcatgc groupName - for illumina data that has forward and reverse info
-
- string temp = "";
- while (!inOligos.eof()) {
- char c = inOligos.get();
- if (c == 10 || c == 13 || c == -1){ break; }
- else if (c == 32 || c == 9){;} //space or tab
- else { temp += c; }
- }
-
- //then this is illumina data with 4 columns
- if (temp != "") {
- hasPairedBarcodes = true;
- string reverseBarcode = group; //reverseOligo(group); //reverse barcode
- group = temp;
-
- for(int i=0;i<reverseBarcode.length();i++){
- reverseBarcode[i] = toupper(reverseBarcode[i]);
- if(reverseBarcode[i] == 'U') { reverseBarcode[i] = 'T'; }
- }
-
- reverseBarcode = reverseOligo(reverseBarcode);
- oligosPair newPair(oligo, reverseBarcode);
-
- if (m->debug) { m->mothurOut("[DEBUG]: barcode pair " + newPair.forward + " " + newPair.reverse + ", and group = " + group + ".\n"); }
- //check for repeat barcodes
- string tempPair = oligo+reverseBarcode;
- if (uniqueBarcodes.count(tempPair) != 0) { m->mothurOut("barcode pair " + newPair.forward + " " + newPair.reverse + " is in your oligos file already, disregarding."); m->mothurOutEndLine(); }
- else { uniqueBarcodes.insert(tempPair); }
-
- pairedBarcodes[indexPairedBarcode]=newPair; indexPairedBarcode++;
- barcodeNameVector.push_back(group);
- }else {
- //check for repeat barcodes
- map<string, int>::iterator itBar = barcodes.find(oligo);
- if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); }
-
- barcodes[oligo]=indexBarcode; indexBarcode++;
- barcodeNameVector.push_back(group);
- }
- }else if(type == "LINKER"){
- linker.push_back(oligo);
- }else if(type == "SPACER"){
- spacer.push_back(oligo);
- }
- else{ m->mothurOut("[WARNING]: " + type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); }
- }
- m->gobble(inOligos);
- }
- inOligos.close();
-
- if (hasPairedBarcodes || hasPrimer) {
- pairedOligos = true;
- if ((primers.size() != 0) || (barcodes.size() != 0) || (linker.size() != 0) || (spacer.size() != 0) || (revPrimer.size() != 0)) { m->control_pressed = true; m->mothurOut("[ERROR]: cannot mix paired primers and barcodes with non paired or linkers and spacers, quitting."); m->mothurOutEndLine(); return 0; }
- }
-
+ Oligos oligos(oligosfile);
- //add in potential combos
- if(barcodeNameVector.size() == 0){
- barcodeNameVector.push_back("");
- }
-
- if(primerNameVector.size() == 0){
- primerNameVector.push_back("");
- }
+ if (m->control_pressed) { return false; } //error in reading oligos
+
+ if (oligos.hasPairedBarcodes()) { pairedOligos = true; }
+ else { pairedOligos = false; }
+ set<string> uniqueNames; //used to cleanup outputFileNames
if (pairedOligos) {
- for(map<int, oligosPair>::iterator itBar = pairedBarcodes.begin();itBar != pairedBarcodes.end();itBar++){
- for(map<int, oligosPair>::iterator itPrimer = pairedPrimers.begin();itPrimer != pairedPrimers.end(); itPrimer++){
+ map<int, oligosPair> barcodes = oligos.getPairedBarcodes();
+ map<int, oligosPair> primers = oligos.getPairedPrimers();
+ for(map<int, oligosPair>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
+ for(map<int, oligosPair>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
- string primerName = primerNameVector[itPrimer->first];
- string barcodeName = barcodeNameVector[itBar->first];
+ string primerName = oligos.getPrimerName(itPrimer->first);
+ string barcodeName = oligos.getBarcodeName(itBar->first);
if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing
+ else if ((primerName == "") && (barcodeName == "")) { } //do nothing
else {
string comboGroupName = "";
- string fastqFileName = "";
+ string fastaFileName = "";
+ string qualFileName = "";
+ string nameFileName = "";
+ string countFileName = "";
if(primerName == ""){
- comboGroupName = barcodeNameVector[itBar->first];
- }
- else{
+ comboGroupName = barcodeName;
+ }else{
if(barcodeName == ""){
- comboGroupName = primerNameVector[itPrimer->first];
+ comboGroupName = primerName;
}
else{
- comboGroupName = barcodeNameVector[itBar->first] + "." + primerNameVector[itPrimer->first];
+ comboGroupName = barcodeName + "." + primerName;
}
}
uniqueNames.insert(comboGroupName);
}
}
}else {
+ map<string, int> barcodes = oligos.getBarcodes() ;
+ map<string, int> primers = oligos.getPrimers();
for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
- string primerName = primerNameVector[itPrimer->second];
- string barcodeName = barcodeNameVector[itBar->second];
+ string primerName = oligos.getPrimerName(itPrimer->second);
+ string barcodeName = oligos.getBarcodeName(itBar->second);
if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing
+ else if ((primerName == "") && (barcodeName == "")) { } //do nothing
else {
string comboGroupName = "";
- string fastqFileName = "";
+ string fastaFileName = "";
+ string qualFileName = "";
+ string nameFileName = "";
+ string countFileName = "";
if(primerName == ""){
- comboGroupName = barcodeNameVector[itBar->second];
- }
- else{
+ comboGroupName = barcodeName;
+ }else{
if(barcodeName == ""){
- comboGroupName = primerNameVector[itPrimer->second];
+ comboGroupName = primerName;
}
else{
- comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second];
+ comboGroupName = barcodeName + "." + primerName;
}
}
uniqueNames.insert(comboGroupName);
}
}
}
-
-
- if (m->debug) { int count = 0; for (set<string>::iterator it = uniqueNames.begin(); it != uniqueNames.end(); it++) { m->mothurOut("[DEBUG]: " + toString(count) + " groupName = " + *it + "\n"); count++; } }
+ if (m->debug) { int count = 0; for (set<string>::iterator it = uniqueNames.begin(); it != uniqueNames.end(); it++) { m->mothurOut("[DEBUG]: " + toString(count) + " groupName = " + *it + "\n"); count++; } }
return true;
#include "command.hpp"
#include "trimoligos.h"
+#include "oligos.h"
/**************************************************************************************************/
bool abort, isSFF, pairedOligos;
int tdiffs, bdiffs, pdiffs, sdiffs, ldiffs;
string sfffile, fastqfile, outputDir, file, oligosfile, contactfile, inputfile, mimarksfile;
- string libStrategy, libSource, libSelection, libLayout, platform, instrumentModel, fileType, dataType;
+ string libStrategy, libSource, libSelection, libLayout, platform, instrumentModel, fileType, dataType, checkorient;
string submissionName, lastName, firstName, email, centerName, centerType, description, website, orientation, packageType;
- string projectName, grantId, grantTitle, grantAgency, projectTitle;
+ string projectName, grantId, grantTitle, grantAgency, projectTitle, inputDir;
vector<string> outputNames, Groups;
- vector<string> primerNameVector;
- vector<string> barcodeNameVector;
map<string, vector<string> > Group2Barcode;
map<string, vector<string> > Group2Primer;
map<string, string> Group2Organism;
try {
CommandParameter pflow("flow", "InputTypes", "", "", "none", "none", "none","flow-file",false,true,true); parameters.push_back(pflow);
CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none","",false,false,true); parameters.push_back(poligos);
+ CommandParameter preorient("checkorient", "Boolean", "", "F", "", "", "","",false,false,true); parameters.push_back(preorient);
CommandParameter pmaxhomop("maxhomop", "Number", "", "9", "", "", "","",false,false); parameters.push_back(pmaxhomop);
CommandParameter pmaxflows("maxflows", "Number", "", "450", "", "", "","",false,false); parameters.push_back(pmaxflows);
CommandParameter pminflows("minflows", "Number", "", "450", "", "", "","",false,false); parameters.push_back(pminflows);
try {
string helpString = "";
helpString += "The trim.flows command reads a flowgram file and creates .....\n";
+ helpString += "The oligos parameter allows you to provide an oligos file.\n";
+ helpString += "The maxhomop parameter allows you to set a maximum homopolymer length. \n";
+ helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n";
+ helpString += "The checkorient parameter will check look for the reverse compliment of the barcode or primer in the sequence. The default is false.\n";
+ helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";
+ helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
+ helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n";
+ helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n";
helpString += "The order parameter options are A, B or I. Default=A. A = TACG and B = TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC and I = TACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGC.\n";
helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
helpString += "For more details please check out the wiki http://www.mothur.org/wiki/Trim.flows.\n";
if(oligoFileName == "") { allFiles = 0; }
else { allFiles = 1; }
+
+ temp = validParameter.validFile(parameters, "checkorient", false); if (temp == "not found") { temp = "F"; }
+ reorient = m->isTrue(temp);
numFPrimers = 0;
numRPrimers = 0;
int count = 0;
bool moreSeqs = 1;
- TrimOligos trimOligos(pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, revPrimer, linker, spacer);
+ TrimOligos* trimOligos = NULL;
+ if (pairedOligos) { trimOligos = new TrimOligos(pdiffs, bdiffs, 0, 0, oligos.getPairedPrimers(), oligos.getPairedBarcodes()); }
+ else { trimOligos = new TrimOligos(pdiffs, bdiffs, ldiffs, sdiffs, oligos.getPrimers(), oligos.getBarcodes(), oligos.getReversePrimers(), oligos.getLinkers(), oligos.getSpacers()); }
+
+ TrimOligos* rtrimOligos = NULL;
+ if (reorient) {
+ rtrimOligos = new TrimOligos(pdiffs, bdiffs, 0, 0, oligos.getReorientedPairedPrimers(), oligos.getReorientedPairedBarcodes()); numBarcodes = oligos.getReorientedPairedBarcodes().size();
+ }
+
while(moreSeqs) {
flowData.capFlows(maxFlows);
Sequence currSeq = flowData.getSequence();
- //cout << currSeq.getName() << '\t' << currSeq.getUnaligned() << endl;
+ //for reorient
+ Sequence savedSeq(currSeq.getName(), currSeq.getAligned());
+
if(!flowData.hasMinFlows(minFlows)){ //screen to see if sequence is of a minimum number of flows
success = 0;
trashCode += 'l';
int barcodeIndex = 0;
if(numLinkers != 0){
- success = trimOligos.stripLinker(currSeq);
+ success = trimOligos->stripLinker(currSeq);
if(success > ldiffs) { trashCode += 'k'; }
else{ currentSeqDiffs += success; }
if (m->debug) { m->mothurOut("[DEBUG]: " + currSeq.getName() + " " + currSeq.getUnaligned() + "\n"); }
- if(barcodes.size() != 0){
- success = trimOligos.stripBarcode(currSeq, barcodeIndex);
+ if(numBarcodes != 0){
+ success = trimOligos->stripBarcode(currSeq, barcodeIndex);
if(success > bdiffs) { trashCode += 'b'; }
else{ currentSeqDiffs += success; }
}
if(numSpacers != 0){
- success = trimOligos.stripSpacer(currSeq);
+ success = trimOligos->stripSpacer(currSeq);
if(success > sdiffs) { trashCode += 's'; }
else{ currentSeqDiffs += success; }
}
if(numFPrimers != 0){
- success = trimOligos.stripForward(currSeq, primerIndex);
+ success = trimOligos->stripForward(currSeq, primerIndex);
if(success > pdiffs) { trashCode += 'f'; }
else{ currentSeqDiffs += success; }
}
if (currentSeqDiffs > tdiffs) { trashCode += 't'; }
if(numRPrimers != 0){
- success = trimOligos.stripReverse(currSeq);
+ success = trimOligos->stripReverse(currSeq);
if(!success) { trashCode += 'r'; }
}
-
- if(trashCode.length() == 0){
- string thisGroup = "";
- if(barcodes.size() != 0){
- thisGroup = barcodeNameVector[barcodeIndex];
- if (primers.size() != 0) {
- if (primerNameVector[primerIndex] != "") {
- if(thisGroup != "") {
- thisGroup += "." + primerNameVector[primerIndex];
- }else {
- thisGroup = primerNameVector[primerIndex];
- }
- }
- }
+
+ if (reorient && (trashCode != "")) { //if you failed and want to check the reverse
+ int thisSuccess = 0;
+ string thisTrashCode = "";
+ int thisCurrentSeqsDiffs = 0;
+
+ int thisBarcodeIndex = 0;
+ int thisPrimerIndex = 0;
+ //cout << currSeq.getName() << '\t' << savedSeq.getUnaligned() << endl;
+ if(numBarcodes != 0){
+ thisSuccess = rtrimOligos->stripBarcode(savedSeq, thisBarcodeIndex);
+ if(thisSuccess > bdiffs) { thisTrashCode += "b"; }
+ else{ thisCurrentSeqsDiffs += thisSuccess; }
}
+ //cout << currSeq.getName() << '\t' << savedSeq.getUnaligned() << endl;
+ if(numFPrimers != 0){
+ thisSuccess = rtrimOligos->stripForward(savedSeq, thisPrimerIndex);
+ if(thisSuccess > pdiffs) { thisTrashCode += "f"; }
+ else{ thisCurrentSeqsDiffs += thisSuccess; }
+ }
+
+ if (thisCurrentSeqsDiffs > tdiffs) { thisTrashCode += 't'; }
+
+ if (thisTrashCode == "") {
+ trashCode = thisTrashCode;
+ success = thisSuccess;
+ currentSeqDiffs = thisCurrentSeqsDiffs;
+ barcodeIndex = thisBarcodeIndex;
+ primerIndex = thisPrimerIndex;
+ savedSeq.reverseComplement();
+ currSeq.setAligned(savedSeq.getAligned());
+ }else { trashCode += "(" + thisTrashCode + ")"; }
+ }
+
+ if(trashCode.length() == 0){
+ string thisGroup = oligos.getGroupName(barcodeIndex, primerIndex);
int pos = thisGroup.find("ignore");
if (pos == string::npos) {
scrapFlowFile.close();
flowFile.close();
if(fasta){ fastaFile.close(); }
+ delete trimOligos;
+ if (reorient) { delete rtrimOligos; }
return count;
}
//***************************************************************************************************************
-void TrimFlowsCommand::getOligos(vector<vector<string> >& outFlowFileNames){
+int TrimFlowsCommand::getOligos(vector<vector<string> >& outFlowFileNames){
try {
- ifstream oligosFile;
- m->openInputFile(oligoFileName, oligosFile);
-
- string type, oligo, group;
-
- int indexPrimer = 0;
- int indexBarcode = 0;
-
- while(!oligosFile.eof()){
-
- oligosFile >> type; //get the first column value of the row - is it a comment or a feature we are interested in?
-
- if (m->debug) { m->mothurOut("[DEBUG]: type = " + type + ".\n"); }
-
- if(type[0] == '#'){ //igore the line because there's a comment
- while (!oligosFile.eof()) { char c = oligosFile.get(); if (c == 10 || c == 13){ break; } }
- m->gobble(oligosFile);// get rest of line if there's any crap there
- }
- else{ //there's a feature we're interested in
- m->gobble(oligosFile);
- for(int i=0;i<type.length();i++){ type[i] = toupper(type[i]); } //make type case insensitive
-
- oligosFile >> oligo; //get the DNA sequence for the feature
-
- for(int i=0;i<oligo.length();i++){ //make type case insensitive and change any U's to T's
- oligo[i] = toupper(oligo[i]);
- if(oligo[i] == 'U') { oligo[i] = 'T'; }
- }
-
- if (m->debug) { m->mothurOut("[DEBUG]: oligos = " + oligo + ".\n"); }
-
- if(type == "FORWARD"){ //if the feature is a forward primer...
- group = "";
-
- while (!oligosFile.eof()) { // get rest of line in case there is a primer name = will have the name of the primer
- char c = oligosFile.get();
- if (c == 10 || c == 13 || c == -1){ break; }
- else if (c == 32 || c == 9){;} //space or tab
- else { group += c; }
- }
-
- //have we seen this primer already?
- map<string, int>::iterator itPrimer = primers.find(oligo);
- if (itPrimer != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); }
-
- primers[oligo]=indexPrimer; indexPrimer++;
- primerNameVector.push_back(group);
-
- }
- else if(type == "REVERSE"){
- string oligoRC = reverseOligo(oligo);
- revPrimer.push_back(oligoRC);
- if (m->debug) { m->mothurOut("[DEBUG]: reverse oligos = " + oligoRC + ".\n"); }
- }
- else if(type == "BARCODE"){
- oligosFile >> group;
-
- //check for repeat barcodes
- map<string, int>::iterator itBar = barcodes.find(oligo);
- if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); }
-
- if (m->debug) { m->mothurOut("[DEBUG]: group = " + group + ".\n"); }
-
- barcodes[oligo]=indexBarcode; indexBarcode++;
- barcodeNameVector.push_back(group);
- }else if(type == "LINKER"){
- linker.push_back(oligo);
- }else if(type == "SPACER"){
- spacer.push_back(oligo);
- }
- else{
- m->mothurOut(type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine();
- }
- }
-
- m->gobble(oligosFile);
- }
- oligosFile.close();
-
- if(barcodeNameVector.size() == 0 && primerNameVector[0] == ""){ allFiles = 0; }
-
- //add in potential combos
- if(barcodeNameVector.size() == 0){
- barcodes[""] = 0;
- barcodeNameVector.push_back("");
- }
-
- if(primerNameVector.size() == 0){
- primers[""] = 0;
- primerNameVector.push_back("");
- }
-
- outFlowFileNames.resize(barcodeNameVector.size());
+ bool allBlank = false;
+ oligos.read(oligoFileName);
+
+ if (m->control_pressed) { return 0; } //error in reading oligos
+
+ if (oligos.hasPairedBarcodes()) {
+ pairedOligos = true;
+ numFPrimers = oligos.getPairedPrimers().size();
+ numBarcodes = oligos.getPairedBarcodes().size();
+ }else {
+ pairedOligos = false;
+ numFPrimers = oligos.getPrimers().size();
+ numBarcodes = oligos.getBarcodes().size();
+ }
+
+ numLinkers = oligos.getLinkers().size();
+ numSpacers = oligos.getSpacers().size();
+ numRPrimers = oligos.getReversePrimers().size();
+
+ vector<string> groupNames = oligos.getGroupNames();
+ if (groupNames.size() == 0) { allFiles = 0; allBlank = true; }
+
+
+ outFlowFileNames.resize(oligos.getBarcodeNames().size());
for(int i=0;i<outFlowFileNames.size();i++){
- outFlowFileNames[i].assign(primerNameVector.size(), "");
+ for(int j=0;j<oligos.getPrimerNames().size();j++){ outFlowFileNames[i].push_back(""); }
}
-
- if(allFiles){
-
- for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
- for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
- string primerName = primerNameVector[itPrimer->second];
- string barcodeName = barcodeNameVector[itBar->second];
-
- if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing
- else {
- string comboGroupName = "";
- string fileName = "";
+ if (allFiles) {
+ set<string> uniqueNames; //used to cleanup outputFileNames
+ if (pairedOligos) {
+ map<int, oligosPair> barcodes = oligos.getPairedBarcodes();
+ map<int, oligosPair> primers = oligos.getPairedPrimers();
+ for(map<int, oligosPair>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
+ for(map<int, oligosPair>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
- map<string, string> variables;
- variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(flowFileName));
+ string primerName = oligos.getPrimerName(itPrimer->first);
+ string barcodeName = oligos.getBarcodeName(itBar->first);
- if(primerName == ""){
- comboGroupName = barcodeNameVector[itBar->second];
- variables["[tag]"] = comboGroupName;
- fileName = getOutputFileName("flow", variables);
- }
- else{
- if(barcodeName == ""){
- comboGroupName = primerNameVector[itPrimer->second];
- }
- else{
- comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second];
+ if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing
+ else if ((primerName == "") && (barcodeName == "")) { } //do nothing
+ else {
+ string comboGroupName = "";
+
+ if(primerName == ""){
+ comboGroupName = barcodeName;
+ }else{
+ if(barcodeName == ""){
+ comboGroupName = primerName;
+ }
+ else{
+ comboGroupName = barcodeName + "." + primerName;
+ }
}
+
+
+ ofstream temp;
+ map<string, string> variables;
+ variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(flowFileName));
variables["[tag]"] = comboGroupName;
- fileName = getOutputFileName("flow", variables);
+ string fileName = getOutputFileName("flow", variables);
+ if (uniqueNames.count(fileName) == 0) {
+ outputNames.push_back(fileName);
+ outputTypes["flow"].push_back(fileName);
+ uniqueNames.insert(fileName);
+ }
+
+ outFlowFileNames[itBar->first][itPrimer->first] = fileName;
+ m->openOutputFile(fileName, temp); temp.close();
}
+ }
+ }
+ }else {
+ map<string, int> barcodes = oligos.getBarcodes() ;
+ map<string, int> primers = oligos.getPrimers();
+ for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
+ for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
- outFlowFileNames[itBar->second][itPrimer->second] = fileName;
+ string primerName = oligos.getPrimerName(itPrimer->second);
+ string barcodeName = oligos.getBarcodeName(itBar->second);
- ofstream temp;
- m->openOutputFile(fileName, temp);
- temp.close();
+ if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing
+ else if ((primerName == "") && (barcodeName == "")) { } //do nothing
+ else {
+ string comboGroupName = "";
+
+ if(primerName == ""){
+ comboGroupName = barcodeName;
+ }else{
+ if(barcodeName == ""){
+ comboGroupName = primerName;
+ }
+ else{
+ comboGroupName = barcodeName + "." + primerName;
+ }
+ }
+
+ ofstream temp;
+ map<string, string> variables;
+ variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(flowFileName));
+ variables["[tag]"] = comboGroupName;
+ string fileName = getOutputFileName("flow", variables);
+ if (uniqueNames.count(fileName) == 0) {
+ outputNames.push_back(fileName);
+ outputTypes["flow"].push_back(fileName);
+ uniqueNames.insert(fileName);
+ }
+
+ outFlowFileNames[itBar->second][itPrimer->second] = fileName;
+ m->openOutputFile(fileName, temp); temp.close();
+ }
}
- }
- }
- }
-
- numFPrimers = primers.size();
- numRPrimers = revPrimer.size();
- numLinkers = linker.size();
- numSpacers = spacer.size();
-
- }
- catch(exception& e) {
- m->errorOut(e, "TrimSeqsCommand", "getOligos");
- exit(1);
- }
-}
-//********************************************************************/
-string TrimFlowsCommand::reverseOligo(string oligo){
- try {
- string reverse = "";
-
- for(int i=oligo.length()-1;i>=0;i--){
-
- if(oligo[i] == 'A') { reverse += 'T'; }
- else if(oligo[i] == 'T'){ reverse += 'A'; }
- else if(oligo[i] == 'U'){ reverse += 'A'; }
-
- else if(oligo[i] == 'G'){ reverse += 'C'; }
- else if(oligo[i] == 'C'){ reverse += 'G'; }
-
- else if(oligo[i] == 'R'){ reverse += 'Y'; }
- else if(oligo[i] == 'Y'){ reverse += 'R'; }
-
- else if(oligo[i] == 'M'){ reverse += 'K'; }
- else if(oligo[i] == 'K'){ reverse += 'M'; }
-
- else if(oligo[i] == 'W'){ reverse += 'W'; }
- else if(oligo[i] == 'S'){ reverse += 'S'; }
-
- else if(oligo[i] == 'B'){ reverse += 'V'; }
- else if(oligo[i] == 'V'){ reverse += 'B'; }
-
- else if(oligo[i] == 'D'){ reverse += 'H'; }
- else if(oligo[i] == 'H'){ reverse += 'D'; }
+ }
+ }
- else { reverse += 'N'; }
}
-
-
- return reverse;
- }
+ return 0;
+ }
catch(exception& e) {
- m->errorOut(e, "TrimFlowsCommand", "reverseOligo");
+ m->errorOut(e, "TrimFlowsCommand", "getOligos");
exit(1);
}
}
-
/**************************************************************************************************/
vector<unsigned long long> TrimFlowsCommand::getFlowFileBreaks() {
//Windows version shared memory, so be careful when passing variables through the trimFlowData struct.
//Above fork() will clone, so memory is separate, but that's not the case with windows,
//////////////////////////////////////////////////////////////////////////////////////////////////////
-
+ /*
vector<trimFlowData*> pDataArray;
DWORD dwThreadIdArray[processors-1];
HANDLE hThreadArray[processors-1];
CloseHandle(hThreadArray[i]);
delete pDataArray[i];
}
-
+ */
#endif
//append files
#include "flowdata.h"
#include "groupmap.h"
#include "trimoligos.h"
+#include "oligos.h"
class TrimFlowsCommand : public Command {
public:
int comboStarts;
vector<int> processIDS; //processid
vector<linePair*> lines;
-
- vector<unsigned long long> getFlowFileBreaks();
- int createProcessesCreateTrim(string, string, string, string, vector<vector<string> >);
- int driverCreateTrim(string, string, string, string, vector<vector<string> >, linePair*);
- string reverseOligo(string);
-
- vector<string> outputNames;
+ vector<string> outputNames;
set<string> filesToRemove;
-
- void getOligos(vector<vector<string> >&); //a rewrite of what is in trimseqscommand.h
-
- bool allFiles;
+ bool allFiles;
int processors;
- int numFPrimers, numRPrimers;
+ int numFPrimers, numRPrimers, numBarcodes;
int maxFlows, minFlows, minLength, maxLength, maxHomoP, tdiffs, bdiffs, pdiffs, sdiffs, ldiffs, numLinkers, numSpacers;
int numFlows;
float signal, noise;
- bool fasta;
- string flowOrder;
-
- string flowFileName, oligoFileName, outputDir;
+ bool fasta, pairedOligos, reorient;
+ string flowOrder, flowFileName, oligoFileName, outputDir;
+ Oligos oligos;
- map<string, int> barcodes;
- map<string, int> primers;
- vector<string> revPrimer;
- vector<string> linker;
- vector<string> spacer;
-
- vector<string> primerNameVector; //needed here?
- vector<string> barcodeNameVector; //needed here?
- map<string, int> combos; //needed here?
- map<string, int> groupToIndex; //needed here?
+ vector<unsigned long long> getFlowFileBreaks();
+ int createProcessesCreateTrim(string, string, string, string, vector<vector<string> >);
+ int driverCreateTrim(string, string, string, string, vector<vector<string> >, linePair*);
+ int getOligos(vector<vector<string> >&); //a rewrite of what is in trimseqscommand.h
+
+
};
-/**************************************************************************************************/
+/**************************************************************************************************
//custom data structure for threads to use.
// This is passed by void pointer so it can be any data type
// that can be passed using a single void pointer (LPVOID).
}
};
-/**************************************************************************************************/
+/**************************************************************************************************
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
#else
static DWORD WINAPI MyTrimFlowThreadFunction(LPVOID lpParam){
}
}
#endif
-
+*/
#endif
if(minDiff > bdiffs) { success = minDiff; } //no good matches
else {
+ if (m->debug) {
+ string fMatches = "";
+ for (int i = 0; i < minFGroup.size(); i++) { for (int j = 0; j < minFGroup[i].size(); j++) { fMatches += toString(minFGroup[i][j]) + " "; } }
+ m->mothurOut("[DEBUG]: forward matches = " + fMatches + "\n");
+ string rMatches = "";
+ for (int i = 0; i < minRGroup.size(); i++) { for (int j = 0; j < minRGroup[i].size(); j++) { rMatches += toString(minRGroup[i][j]) + " "; } }
+ m->mothurOut("[DEBUG]: reverse matches = " + rMatches + "\n");
+ }
bool foundMatch = false;
vector<int> matches;
for (int i = 0; i < minFGroup.size(); i++) {
group = matches[0];
fStart = minFPos[0];
rStart = minRPos[0];
+ if (m->debug) { m->mothurOut("[DEBUG]: found match = " + toString(matches[0]) + ".\n"); }
+ }else {
+ if (m->debug) { m->mothurOut("[DEBUG]: failed too many matches.\n"); }
}
//we have an acceptable match for the forward and reverse, but do they match?
exit(1);
}
+}
+//*******************************************************************/
+int TrimOligos::stripPairedBarcode(Sequence& seq, int& group){
+ try {
+ //look for forward barcode
+ string rawSeq = seq.getUnaligned();
+ int success = bdiffs + 1; //guilty until proven innocent
+ //cout << seq.getName() << endl;
+ //can you find the forward barcode
+ for(map<int,oligosPair>::iterator it=ipbarcodes.begin();it!=ipbarcodes.end();it++){
+ string foligo = it->second.forward;
+ string roligo = it->second.reverse;
+ //cout << it->first << '\t' << foligo << '\t' << roligo << endl;
+ //cout << it->first << '\t' << rawSeq.substr(0,foligo.length()) << '\t' << rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length()) << endl;
+ if(rawSeq.length() < foligo.length()){ //let's just assume that the barcodes are the same length
+ success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out
+ break;
+ }
+ if(rawSeq.length() < roligo.length()){ //let's just assume that the barcodes are the same length
+ success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out
+ break;
+ }
+
+ if (rawSeq.length() < (foligo.length() + roligo.length())) { success = pdiffs + 10; break; }
+
+ if((compareDNASeq(foligo, rawSeq.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length())))) {
+ group = it->first;
+ string trimmedSeq = rawSeq.substr(foligo.length()); //trim forward barcode
+ seq.setUnaligned(trimmedSeq.substr(0,(trimmedSeq.length()-roligo.length()))); //trim reverse barcode
+ success = 0;
+ break;
+ }
+ }
+ //cout << "success=" << success << endl;
+ //if you found the barcode or if you don't want to allow for diffs
+ if ((bdiffs == 0) || (success == 0)) { return success; }
+ else { //try aligning and see if you can find it
+ Alignment* alignment;
+ if (ifbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1)); }
+ else{ alignment = NULL; }
+
+ //can you find the barcode
+ int minDiff = 1e6;
+ int minCount = 1;
+ vector< vector<int> > minFGroup;
+ vector<int> minFPos;
+
+ //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
+ /*
+ 1 Sarah Westcott
+ 2 John Westcott
+ 3 Anna Westcott
+ 4 Sarah Schloss
+ 5 Pat Schloss
+ 6 Gail Brown
+ 7 Pat Moore
+ only want to look for forward = Sarah, John, Anna, Pat, Gail
+ reverse = Westcott, Schloss, Brown, Moore
+ but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same. so both barcodes map to same group.
+ */
+ //cout << endl << forwardSeq.getName() << endl;
+ for(map<string, vector<int> >::iterator it=ifbarcodes.begin();it!=ifbarcodes.end();it++){
+ string oligo = it->first;
+
+ if(rawSeq.length() < maxFBarcodeLength){ //let's just assume that the barcodes are the same length
+ success = bdiffs + 10;
+ break;
+ }
+ //cout << "before = " << oligo << '\t' << rawSeq.substr(0,oligo.length()+bdiffs) << endl;
+ //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
+ alignment->alignPrimer(oligo, rawSeq.substr(0,oligo.length()+bdiffs));
+ oligo = alignment->getSeqAAln();
+ string temp = alignment->getSeqBAln();
+
+ int alnLength = oligo.length();
+
+ for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
+ oligo = oligo.substr(0,alnLength);
+ temp = temp.substr(0,alnLength);
+ int numDiff = countDiffs(oligo, temp);
+
+ if (alnLength == 0) { numDiff = bdiffs + 100; }
+ //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
+
+ if(numDiff < minDiff){
+ minDiff = numDiff;
+ minCount = 1;
+ minFGroup.clear();
+ minFGroup.push_back(it->second);
+ int tempminFPos = 0;
+ minFPos.clear();
+ for(int i=0;i<alnLength;i++){
+ if(temp[i] != '-'){
+ tempminFPos++;
+ }
+ }
+ minFPos.push_back(tempminFPos);
+ }else if(numDiff == minDiff){
+ minFGroup.push_back(it->second);
+ int tempminFPos = 0;
+ for(int i=0;i<alnLength;i++){
+ if(temp[i] != '-'){
+ tempminFPos++;
+ }
+ }
+ minFPos.push_back(tempminFPos);
+ }
+ }
+
+ //cout << minDiff << '\t' << minCount << '\t' << endl;
+ if(minDiff > bdiffs) { success = minDiff; } //no good matches
+ else{
+ //check for reverse match
+ if (alignment != NULL) { delete alignment; }
+
+ if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRBarcodeLength+bdiffs+1)); }
+ else{ alignment = NULL; }
+
+ //can you find the barcode
+ minDiff = 1e6;
+ minCount = 1;
+ vector< vector<int> > minRGroup;
+ vector<int> minRPos;
+
+ string rawRSequence = reverseOligo(seq.getUnaligned());
+ //cout << irbarcodes.size() << '\t' << maxRBarcodeLength << endl;
+ for(map<string, vector<int> >::iterator it=irbarcodes.begin();it!=irbarcodes.end();it++){
+ string oligo = reverseOligo(it->first);
+ //cout << "r before = " << reverseOligo(oligo) << '\t' << reverseOligo(rawRSequence.substr(0,oligo.length()+bdiffs)) << endl;
+ if(rawRSequence.length() < maxRBarcodeLength){ //let's just assume that the barcodes are the same length
+ success = bdiffs + 10;
+ break;
+ }
+
+ //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
+ alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+bdiffs));
+ oligo = alignment->getSeqAAln();
+ string temp = alignment->getSeqBAln();
+
+ int alnLength = oligo.length();
+ for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
+ oligo = oligo.substr(0,alnLength);
+ temp = temp.substr(0,alnLength);
+ int numDiff = countDiffs(oligo, temp);
+ if (alnLength == 0) { numDiff = bdiffs + 100; }
+
+ //cout << "r after = " << reverseOligo(oligo) << '\t' << reverseOligo(temp) << '\t' << numDiff << endl;
+ if(numDiff < minDiff){
+ minDiff = numDiff;
+ minCount = 1;
+ minRGroup.clear();
+ minRGroup.push_back(it->second);
+ int tempminRPos = 0;
+ minRPos.clear();
+ for(int i=0;i<alnLength;i++){
+ if(temp[i] != '-'){
+ tempminRPos++;
+ }
+ }
+ minRPos.push_back(tempminRPos);
+ }else if(numDiff == minDiff){
+ int tempminRPos = 0;
+ for(int i=0;i<alnLength;i++){
+ if(temp[i] != '-'){
+ tempminRPos++;
+ }
+ }
+ minRPos.push_back(tempminRPos);
+ minRGroup.push_back(it->second);
+ }
+
+ }
+
+ if(minDiff > bdiffs) { success = minDiff; } //no good matches
+ else {
+ bool foundMatch = false;
+ vector<int> matches;
+ for (int i = 0; i < minFGroup.size(); i++) {
+ for (int j = 0; j < minFGroup[i].size(); j++) {
+ for (int k = 0; k < minRGroup.size(); k++) {
+ if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
+ }
+ }
+ }
+
+ int fStart = 0;
+ int rStart = 0;
+ if (matches.size() == 1) {
+ foundMatch = true;
+ group = matches[0];
+ fStart = minFPos[0];
+ rStart = rawSeq.length() - minRPos[0];
+ if (fStart > rStart) { foundMatch = false; } //only barcodes not a good sequence
+ }
+
+ //we have an acceptable match for the forward and reverse, but do they match?
+ if (foundMatch) {
+ string trimmedSeq = rawSeq.substr(0, rStart); //trim reverse barcode
+ seq.setUnaligned(trimmedSeq.substr(fStart)); //trim forward barcode
+ success = minDiff;
+ //cout << "barcode = " << ipbarcodes[group].forward << '\t' << ipbarcodes[group].reverse << endl;
+ }else { success = bdiffs + 100; }
+ }
+ }
+
+ if (alignment != NULL) { delete alignment; }
+ }
+
+ return success;
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "TrimOligos", "stripPairedBarcode");
+ exit(1);
+ }
+
}
//*******************************************************************/
int TrimOligos::stripPairedPrimers(Sequence& seq, QualityScores& qual, int& group, bool keepForward){
}
}
-
+//*******************************************************************/
+int TrimOligos::stripPairedPrimers(Sequence& seq, int& group){
+ try {
+ //look for forward
+ string rawSeq = seq.getUnaligned();
+ int success = pdiffs + 1; //guilty until proven innocent
+ //cout << seq.getName() << endl;
+ //can you find the forward
+ for(map<int,oligosPair>::iterator it=ipprimers.begin();it!=ipprimers.end();it++){
+ string foligo = it->second.forward;
+ string roligo = it->second.reverse;
+
+ //cout << it->first << '\t' << foligo << '\t' << roligo << endl;
+ //cout << it->first << '\t' << rawSeq.substr(0,foligo.length()) << '\t' << rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length()) << endl;
+ if(rawSeq.length() < foligo.length()){ //let's just assume that the barcodes are the same length
+ success = pdiffs + 10; //if the sequence is shorter than the barcode then bail out
+ break;
+ }
+ if(rawSeq.length() < roligo.length()){ //let's just assume that the barcodes are the same length
+ success = pdiffs + 10; //if the sequence is shorter than the barcode then bail out
+ break;
+ }
+
+ if (rawSeq.length() < (foligo.length() + roligo.length())) { success = pdiffs + 10; break; }
+
+ if((compareDNASeq(foligo, rawSeq.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length())))) {
+ group = it->first;
+ string trimmedSeq = rawSeq.substr(foligo.length()); //trim forward barcode
+ seq.setUnaligned(trimmedSeq.substr(0,(trimmedSeq.length()-roligo.length()))); //trim reverse barcode
+ success = 0;
+ break;
+ }
+ }
+ //cout << "success=" << success << endl;
+ //if you found the barcode or if you don't want to allow for diffs
+ if ((pdiffs == 0) || (success == 0)) { return success; }
+ else { //try aligning and see if you can find it
+ Alignment* alignment;
+ if (ifprimers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1)); }
+ else{ alignment = NULL; }
+
+ //can you find the barcode
+ int minDiff = 1e6;
+ int minCount = 1;
+ vector< vector<int> > minFGroup;
+ vector<int> minFPos;
+
+ //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
+ /*
+ 1 Sarah Westcott
+ 2 John Westcott
+ 3 Anna Westcott
+ 4 Sarah Schloss
+ 5 Pat Schloss
+ 6 Gail Brown
+ 7 Pat Moore
+ only want to look for forward = Sarah, John, Anna, Pat, Gail
+ reverse = Westcott, Schloss, Brown, Moore
+ but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same. so both barcodes map to same group.
+ */
+ //cout << endl << forwardSeq.getName() << endl;
+ for(map<string, vector<int> >::iterator it=ifprimers.begin();it!=ifprimers.end();it++){
+ string oligo = it->first;
+
+ if(rawSeq.length() < maxFPrimerLength){ //let's just assume that the barcodes are the same length
+ success = pdiffs + 10;
+ break;
+ }
+ //cout << "before = " << oligo << '\t' << rawSeq.substr(0,oligo.length()+pdiffs) << endl;
+ //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
+ alignment->alignPrimer(oligo, rawSeq.substr(0,oligo.length()+pdiffs));
+ oligo = alignment->getSeqAAln();
+ string temp = alignment->getSeqBAln();
+
+ int alnLength = oligo.length();
+
+ for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
+ oligo = oligo.substr(0,alnLength);
+ temp = temp.substr(0,alnLength);
+ int numDiff = countDiffs(oligo, temp);
+
+ if (alnLength == 0) { numDiff = pdiffs + 100; }
+ //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
+
+ if(numDiff < minDiff){
+ minDiff = numDiff;
+ minCount = 1;
+ minFGroup.clear();
+ minFGroup.push_back(it->second);
+ int tempminFPos = 0;
+ minFPos.clear();
+ for(int i=0;i<alnLength;i++){
+ if(temp[i] != '-'){
+ tempminFPos++;
+ }
+ }
+ minFPos.push_back(tempminFPos);
+ }else if(numDiff == minDiff){
+ minFGroup.push_back(it->second);
+ int tempminFPos = 0;
+ for(int i=0;i<alnLength;i++){
+ if(temp[i] != '-'){
+ tempminFPos++;
+ }
+ }
+ minFPos.push_back(tempminFPos);
+ }
+ }
+
+ //cout << minDiff << '\t' << minCount << '\t' << endl;
+ if(minDiff > pdiffs) { success = minDiff; } //no good matches
+ else{
+ //check for reverse match
+ if (alignment != NULL) { delete alignment; }
+
+ if (irprimers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRPrimerLength+pdiffs+1)); }
+ else{ alignment = NULL; }
+
+ //can you find the barcode
+ minDiff = 1e6;
+ minCount = 1;
+ vector< vector<int> > minRGroup;
+ vector<int> minRPos;
+
+ string rawRSequence = reverseOligo(seq.getUnaligned());
+
+ for(map<string, vector<int> >::iterator it=irprimers.begin();it!=irprimers.end();it++){
+ string oligo = reverseOligo(it->first);
+ //cout << "r before = " << reverseOligo(oligo) << '\t' << reverseOligo(rawRSequence.substr(0,oligo.length()+pdiffs)) << endl;
+ if(rawRSequence.length() < maxRPrimerLength){ //let's just assume that the barcodes are the same length
+ success = pdiffs + 10;
+ break;
+ }
+
+ //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
+ alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+pdiffs));
+ oligo = alignment->getSeqAAln();
+ string temp = alignment->getSeqBAln();
+
+ int alnLength = oligo.length();
+ for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
+ oligo = oligo.substr(0,alnLength);
+ temp = temp.substr(0,alnLength);
+ int numDiff = countDiffs(oligo, temp);
+ if (alnLength == 0) { numDiff = pdiffs + 100; }
+
+ //cout << "r after = " << reverseOligo(oligo) << '\t' << reverseOligo(temp) << '\t' << numDiff << endl;
+ if(numDiff < minDiff){
+ minDiff = numDiff;
+ minCount = 1;
+ minRGroup.clear();
+ minRGroup.push_back(it->second);
+ int tempminRPos = 0;
+ minRPos.clear();
+ for(int i=0;i<alnLength;i++){
+ if(temp[i] != '-'){
+ tempminRPos++;
+ }
+ }
+ minRPos.push_back(tempminRPos);
+ }else if(numDiff == minDiff){
+ int tempminRPos = 0;
+ for(int i=0;i<alnLength;i++){
+ if(temp[i] != '-'){
+ tempminRPos++;
+ }
+ }
+ minRPos.push_back(tempminRPos);
+ minRGroup.push_back(it->second);
+ }
+
+ }
+
+ if(minDiff > pdiffs) { success = minDiff; } //no good matches
+ else {
+ bool foundMatch = false;
+ vector<int> matches;
+ for (int i = 0; i < minFGroup.size(); i++) {
+ for (int j = 0; j < minFGroup[i].size(); j++) {
+ for (int k = 0; k < minRGroup.size(); k++) {
+ if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
+ }
+ }
+ }
+
+ int fStart = 0;
+ int rStart = 0;
+ if (matches.size() == 1) {
+ foundMatch = true;
+ group = matches[0];
+ fStart = minFPos[0];
+ rStart = rawSeq.length() - minRPos[0];
+ if (fStart > rStart) { foundMatch = false; } //only primers not a good sequence
+ }
+
+ //we have an acceptable match for the forward and reverse, but do they match?
+ if (foundMatch) {
+ string trimmedSeq = rawSeq.substr(0, rStart); //trim reverse barcode
+ seq.setUnaligned(trimmedSeq.substr(fStart)); //trim forward barcode
+ success = minDiff;
+ //cout << "primer = " << ipprimers[group].forward << '\t' << ipprimers[group].reverse << endl;
+ }else { success = pdiffs + 100; }
+ }
+ }
+
+ if (alignment != NULL) { delete alignment; }
+ }
+
+ return success;
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "TrimOligos", "stripPairedPrimers");
+ exit(1);
+ }
+
+}
//*******************************************************************/
int TrimOligos::stripForward(Sequence& forwardSeq, Sequence& reverseSeq, QualityScores& forwardQual, QualityScores& reverseQual, int& group){
try {
int TrimOligos::stripBarcode(Sequence& seq, int& group){
try {
+ if (paired) { int success = stripPairedBarcode(seq, group); return success; }
+
string rawSequence = seq.getUnaligned();
int success = bdiffs + 1; //guilty until proven innocent
int TrimOligos::stripForward(Sequence& seq, int& group){
try {
+ if (paired) { int success = stripPairedPrimers(seq, group); return success; }
+
string rawSequence = seq.getUnaligned();
int success = pdiffs + 1; //guilty until proven innocent
#include "sequence.hpp"
#include "qualityscores.h"
-struct oligosPair {
- string forward;
- string reverse;
-
- oligosPair() { forward = ""; reverse = ""; }
- oligosPair(string f, string r) : forward(f), reverse(r) {}
- ~oligosPair() {}
-};
class TrimOligos {
int stripPairedBarcode(Sequence& seq, QualityScores& qual, int& group);
int stripPairedPrimers(Sequence& seq, QualityScores& qual, int& group, bool);
+ int stripPairedBarcode(Sequence& seq, int& group);
+ int stripPairedPrimers(Sequence& seq, int& group);
+
};
#endif
#include "trimoligos.h"
+
//**********************************************************************************************************************
vector<string> TrimSeqsCommand::setParameters(){
try {
helpString += "The trim.seqs command parameters are fasta, name, count, flip, checkorient, oligos, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim, keepfirst, removelast, logtransform and allfiles.\n";
helpString += "The fasta parameter is required.\n";
helpString += "The flip parameter will output the reverse compliment of your trimmed sequence. The default is false.\n";
- helpString += "The checkorient parameter will check the reverse compliment of the sequence if the barcodes and primers cannot be found in the forward. The default is false.\n";
+ helpString += "The checkorient parameter will check look for the reverse compliment of the barcode or primer in the sequence. If found the sequence is flipped. The default is false.\n";
helpString += "The oligos parameter allows you to provide an oligos file.\n";
helpString += "The name parameter allows you to provide a names file with your fasta file.\n";
helpString += "The count parameter allows you to provide a count file with your fasta file.\n";
vector<vector<string> > fastaFileNames;
vector<vector<string> > qualFileNames;
vector<vector<string> > nameFileNames;
+ map<string, string> uniqueFastaNames;// so we don't add the same groupfile multiple times
map<string, string> variables;
variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFile));
string outputGroupFileName;
if(oligoFile != ""){
- createGroup = getOligos(fastaFileNames, qualFileNames, nameFileNames);
+ createGroup = getOligos(fastaFileNames, qualFileNames, nameFileNames, uniqueFastaNames);
if ((createGroup) && (countfile == "")){
map<string, string> myvariables;
myvariables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFile));
if (m->control_pressed) { return 0; }
if(allFiles){
- map<string, string> uniqueFastaNames;// so we don't add the same groupfile multiple times
map<string, string>::iterator it;
set<string> namesToRemove;
for(int i=0;i<fastaFileNames.size();i++){
m->mothurRemove(nameFileNames[i][j]);
namesToRemove.insert(nameFileNames[i][j]);
}
- }else{
- it = uniqueFastaNames.find(fastaFileNames[i][j]);
- if (it == uniqueFastaNames.end()) {
- uniqueFastaNames[fastaFileNames[i][j]] = barcodeNameVector[i];
- }
+ uniqueFastaNames.erase(fastaFileNames[i][j]); //remove from list for group file print
}
}
}
int count = 0;
bool moreSeqs = 1;
- int numBarcodes = barcodes.size();
TrimOligos* trimOligos = NULL;
- if (pairedOligos) { trimOligos = new TrimOligos(pdiffs, bdiffs, 0, 0, pairedPrimers, pairedBarcodes); numBarcodes = pairedBarcodes.size(); }
- else { trimOligos = new TrimOligos(pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, revPrimer, linker, spacer); }
+ if (pairedOligos) { trimOligos = new TrimOligos(pdiffs, bdiffs, 0, 0, oligos.getPairedPrimers(), oligos.getPairedBarcodes()); }
+ else { trimOligos = new TrimOligos(pdiffs, bdiffs, ldiffs, sdiffs, oligos.getPrimers(), oligos.getBarcodes(), oligos.getReversePrimers(), oligos.getLinkers(), oligos.getSpacers()); }
TrimOligos* rtrimOligos = NULL;
if (reorient) {
- //create reoriented primer and barcode pairs
- map<int, oligosPair> rpairedPrimers, rpairedBarcodes;
- for (map<int, oligosPair>::iterator it = pairedPrimers.begin(); it != pairedPrimers.end(); it++) {
- oligosPair tempPair(reverseOligo((it->second).reverse), (reverseOligo((it->second).forward))); //reversePrimer, rc ForwardPrimer
- rpairedPrimers[it->first] = tempPair;
- //cout << reverseOligo((it->second).reverse) << '\t' << (reverseOligo((it->second).forward)) << '\t' << primerNameVector[it->first] << endl;
- }
- for (map<int, oligosPair>::iterator it = pairedBarcodes.begin(); it != pairedBarcodes.end(); it++) {
- oligosPair tempPair(reverseOligo((it->second).reverse), (reverseOligo((it->second).forward))); //reverseBarcode, rc ForwardBarcode
- rpairedBarcodes[it->first] = tempPair;
- //cout << reverseOligo((it->second).reverse) << '\t' << (reverseOligo((it->second).forward)) << '\t' << barcodeNameVector[it->first] << endl;
- }
- int index = rpairedBarcodes.size();
- for (map<string, int>::iterator it = barcodes.begin(); it != barcodes.end(); it++) {
- oligosPair tempPair("", reverseOligo((it->first))); //reverseBarcode, rc ForwardBarcode
- rpairedBarcodes[index] = tempPair; index++;
- //cout << reverseOligo((it->second).reverse) << '\t' << (reverseOligo((it->second).forward)) << '\t' << barcodeNameVector[it->first] << endl;
- }
-
- index = rpairedPrimers.size();
- for (map<string, int>::iterator it = primers.begin(); it != primers.end(); it++) {
- oligosPair tempPair("", reverseOligo((it->first))); //reverseBarcode, rc ForwardBarcode
- rpairedPrimers[index] = tempPair; index++;
- //cout << reverseOligo((it->second).reverse) << '\t' << (reverseOligo((it->second).forward)) << '\t' << primerNameVector[it->first] << endl;
- }
-
- rtrimOligos = new TrimOligos(pdiffs, bdiffs, 0, 0, rpairedPrimers, rpairedBarcodes); numBarcodes = rpairedBarcodes.size();
+ rtrimOligos = new TrimOligos(pdiffs, bdiffs, 0, 0, oligos.getReorientedPairedPrimers(), oligos.getReorientedPairedBarcodes()); numBarcodes = oligos.getReorientedPairedBarcodes().size();
}
while (moreSeqs) {
if(trashCode.length() == 0){
string thisGroup = "";
- if (createGroup) {
- if(numBarcodes != 0){
- thisGroup = barcodeNameVector[barcodeIndex];
- if (numFPrimers != 0) {
- if (primerNameVector[primerIndex] != "") {
- if(thisGroup != "") {
- thisGroup += "." + primerNameVector[primerIndex];
- }else {
- thisGroup = primerNameVector[primerIndex];
- }
- }
- }
- }
- }
+ if (createGroup) { thisGroup = oligos.getGroupName(barcodeIndex, primerIndex); }
int pos = thisGroup.find("ignore");
if (pos == string::npos) {
tempPrimerQualFileNames,
tempNameFileNames,
lines[h].start, lines[h].end, qLines[h].start, qLines[h].end, m,
- pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, primers, barcodes, revPrimer, linker, spacer, pairedBarcodes, pairedPrimers, pairedOligos,
- primerNameVector, barcodeNameVector, createGroup, allFiles, keepforward, keepFirst, removeLast,
+ pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, oligoFile,
+ createGroup, allFiles, keepforward, keepFirst, removeLast,
qWindowStep, qWindowSize, qWindowAverage, qtrim, qThreshold, qAverage, qRollAverage, logtransform,
minLength, maxAmbig, maxHomoP, maxLength, flip, reorient, nameMap, nameCount);
pDataArray.push_back(tempTrim);
//***************************************************************************************************************
-bool TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<vector<string> >& qualFileNames, vector<vector<string> >& nameFileNames){
+bool TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<vector<string> >& qualFileNames, vector<vector<string> >& nameFileNames, map<string, string>& fastaFile2Group){
try {
- ifstream inOligos;
+
+ bool allBlank = false;
+ oligos.read(oligoFile);
+
+ if (m->control_pressed) { return false; } //error in reading oligos
+
+ if (oligos.hasPairedBarcodes()) {
+ pairedOligos = true;
+ numFPrimers = oligos.getPairedPrimers().size();
+ numBarcodes = oligos.getPairedBarcodes().size();
+ }else {
+ pairedOligos = false;
+ numFPrimers = oligos.getPrimers().size();
+ numBarcodes = oligos.getBarcodes().size();
+ }
+
+ numLinkers = oligos.getLinkers().size();
+ numSpacers = oligos.getSpacers().size();
+ numRPrimers = oligos.getReversePrimers().size();
+
+ vector<string> groupNames = oligos.getGroupNames();
+ if (groupNames.size() == 0) { allFiles = 0; allBlank = true; }
+
+
+ fastaFileNames.resize(oligos.getBarcodeNames().size());
+ for(int i=0;i<fastaFileNames.size();i++){
+ for(int j=0;j<oligos.getPrimerNames().size();j++){ fastaFileNames[i].push_back(""); }
+ }
+
+ if(qFileName != "") { qualFileNames = fastaFileNames; }
+ if(nameFile != "") { nameFileNames = fastaFileNames; }
+
+
+ if (allFiles) {
+ set<string> uniqueNames; //used to cleanup outputFileNames
+ if (pairedOligos) {
+ map<int, oligosPair> barcodes = oligos.getPairedBarcodes();
+ map<int, oligosPair> primers = oligos.getPairedPrimers();
+ for(map<int, oligosPair>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
+ for(map<int, oligosPair>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
+
+ string primerName = oligos.getPrimerName(itPrimer->first);
+ string barcodeName = oligos.getBarcodeName(itBar->first);
+
+ if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing
+ else if ((primerName == "") && (barcodeName == "")) { } //do nothing
+ else {
+ string comboGroupName = "";
+ string fastaFileName = "";
+ string qualFileName = "";
+ string nameFileName = "";
+ string countFileName = "";
+
+ if(primerName == ""){
+ comboGroupName = barcodeName;
+ }else{
+ if(barcodeName == ""){
+ comboGroupName = primerName;
+ }
+ else{
+ comboGroupName = barcodeName + "." + primerName;
+ }
+ }
+
+
+ ofstream temp;
+ map<string, string> variables;
+ variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFile));
+ variables["[tag]"] = comboGroupName;
+ fastaFileName = getOutputFileName("fasta", variables);
+ if (uniqueNames.count(fastaFileName) == 0) {
+ outputNames.push_back(fastaFileName);
+ outputTypes["fasta"].push_back(fastaFileName);
+ uniqueNames.insert(fastaFileName);
+ fastaFile2Group[fastaFileName] = comboGroupName;
+ }
+
+ fastaFileNames[itBar->first][itPrimer->first] = fastaFileName;
+ m->openOutputFile(fastaFileName, temp); temp.close();
+
+ if(qFileName != ""){
+ variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(qFileName));
+ qualFileName = getOutputFileName("qfile", variables);
+ if (uniqueNames.count(qualFileName) == 0) {
+ outputNames.push_back(qualFileName);
+ outputTypes["qfile"].push_back(qualFileName);
+ }
+
+ qualFileNames[itBar->first][itPrimer->first] = qualFileName;
+ m->openOutputFile(qualFileName, temp); temp.close();
+ }
+
+ if(nameFile != ""){
+ variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(nameFile));
+ nameFileName = getOutputFileName("name", variables);
+ if (uniqueNames.count(nameFileName) == 0) {
+ outputNames.push_back(nameFileName);
+ outputTypes["name"].push_back(nameFileName);
+ }
+
+ nameFileNames[itBar->first][itPrimer->first] = nameFileName;
+ m->openOutputFile(nameFileName, temp); temp.close();
+ }
+ }
+ }
+ }
+ }else {
+ map<string, int> barcodes = oligos.getBarcodes() ;
+ map<string, int> primers = oligos.getPrimers();
+ for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
+ for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
+
+ string primerName = oligos.getPrimerName(itPrimer->second);
+ string barcodeName = oligos.getBarcodeName(itBar->second);
+
+ if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing
+ else if ((primerName == "") && (barcodeName == "")) { } //do nothing
+ else {
+ string comboGroupName = "";
+ string fastaFileName = "";
+ string qualFileName = "";
+ string nameFileName = "";
+ string countFileName = "";
+
+ if(primerName == ""){
+ comboGroupName = barcodeName;
+ }else{
+ if(barcodeName == ""){
+ comboGroupName = primerName;
+ }
+ else{
+ comboGroupName = barcodeName + "." + primerName;
+ }
+ }
+
+
+
+ ofstream temp;
+ map<string, string> variables;
+ variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFile));
+ variables["[tag]"] = comboGroupName;
+ fastaFileName = getOutputFileName("fasta", variables);
+ if (uniqueNames.count(fastaFileName) == 0) {
+ outputNames.push_back(fastaFileName);
+ outputTypes["fasta"].push_back(fastaFileName);
+ uniqueNames.insert(fastaFileName);
+ fastaFile2Group[fastaFileName] = comboGroupName;
+ }
+
+ fastaFileNames[itBar->second][itPrimer->second] = fastaFileName;
+ m->openOutputFile(fastaFileName, temp); temp.close();
+
+ if(qFileName != ""){
+ variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(qFileName));
+ qualFileName = getOutputFileName("qfile", variables);
+ if (uniqueNames.count(qualFileName) == 0) {
+ outputNames.push_back(qualFileName);
+ outputTypes["qfile"].push_back(qualFileName);
+ }
+
+ qualFileNames[itBar->second][itPrimer->second] = qualFileName;
+ m->openOutputFile(qualFileName, temp); temp.close();
+ }
+
+ if(nameFile != ""){
+ variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(nameFile));
+ nameFileName = getOutputFileName("name", variables);
+ if (uniqueNames.count(nameFileName) == 0) {
+ outputNames.push_back(nameFileName);
+ outputTypes["name"].push_back(nameFileName);
+ }
+
+ nameFileNames[itBar->second][itPrimer->second] = nameFileName;
+ m->openOutputFile(nameFileName, temp); temp.close();
+ }
+ }
+ }
+ }
+ }
+
+ }
+
+
+
+ if (allBlank) {
+ m->mothurOut("[WARNING]: your oligos file does not contain any group names. mothur will not create a groupfile."); m->mothurOutEndLine();
+ allFiles = false;
+ return false;
+ }
+
+
+
+ /*ifstream inOligos;
m->openInputFile(oligoFile, inOligos);
ofstream test;
while(!inOligos.eof()){
- inOligos >> type;
+ inOligos >> type;
if (m->debug) { m->mothurOut("[DEBUG]: reading type - " + type + ".\n"); }
}
}
}
- numFPrimers = primers.size();
- if (pairedOligos) { numFPrimers = pairedPrimers.size(); }
- numRPrimers = revPrimer.size();
- numLinkers = linker.size();
- numSpacers = spacer.size();
-
- bool allBlank = true;
- for (int i = 0; i < barcodeNameVector.size(); i++) {
- if (barcodeNameVector[i] != "") {
- allBlank = false;
- break;
- }
- }
- for (int i = 0; i < primerNameVector.size(); i++) {
- if (primerNameVector[i] != "") {
- allBlank = false;
- break;
- }
- }
-
- if (allBlank) {
- m->mothurOut("[WARNING]: your oligos file does not contain any group names. mothur will not create a groupfile."); m->mothurOutEndLine();
- allFiles = false;
- return false;
- }
-
- return true;
+ */
+ return true;
}
catch(exception& e) {
}
}
-//********************************************************************/
-string TrimSeqsCommand::reverseOligo(string oligo){
- try {
- string reverse = "";
-
- for(int i=oligo.length()-1;i>=0;i--){
-
- if(oligo[i] == 'A') { reverse += 'T'; }
- else if(oligo[i] == 'T'){ reverse += 'A'; }
- else if(oligo[i] == 'U'){ reverse += 'A'; }
-
- else if(oligo[i] == 'G'){ reverse += 'C'; }
- else if(oligo[i] == 'C'){ reverse += 'G'; }
-
- else if(oligo[i] == 'R'){ reverse += 'Y'; }
- else if(oligo[i] == 'Y'){ reverse += 'R'; }
-
- else if(oligo[i] == 'M'){ reverse += 'K'; }
- else if(oligo[i] == 'K'){ reverse += 'M'; }
-
- else if(oligo[i] == 'W'){ reverse += 'W'; }
- else if(oligo[i] == 'S'){ reverse += 'S'; }
-
- else if(oligo[i] == 'B'){ reverse += 'V'; }
- else if(oligo[i] == 'V'){ reverse += 'B'; }
-
- else if(oligo[i] == 'D'){ reverse += 'H'; }
- else if(oligo[i] == 'H'){ reverse += 'D'; }
-
- else { reverse += 'N'; }
- }
-
-
- return reverse;
- }
- catch(exception& e) {
- m->errorOut(e, "TrimSeqsCommand", "reverseOligo");
- exit(1);
- }
-}
//***************************************************************************************************************
#include "qualityscores.h"
#include "trimoligos.h"
#include "counttable.h"
+#include "oligos.h"
class TrimSeqsCommand : public Command {
linePair() {}
};
- bool getOligos(vector<vector<string> >&, vector<vector<string> >&, vector<vector<string> >&);
+ Oligos oligos;
+ bool getOligos(vector<vector<string> >&, vector<vector<string> >&, vector<vector<string> >&, map<string, string>&);
bool keepFirstTrim(Sequence&, QualityScores&);
bool removeLastTrim(Sequence&, QualityScores&);
bool cullLength(Sequence&);
bool cullHomoP(Sequence&);
bool cullAmbigs(Sequence&);
- string reverseOligo(string);
-
bool abort, createGroup;
string fastaFile, oligoFile, qFileName, groupfile, nameFile, countfile, outputDir;
bool flip, allFiles, qtrim, keepforward, pairedOligos, reorient, logtransform;
- int numFPrimers, numRPrimers, numLinkers, numSpacers, maxAmbig, maxHomoP, minLength, maxLength, processors, tdiffs, bdiffs, pdiffs, ldiffs, sdiffs, comboStarts;
+ int numBarcodes, numFPrimers, numRPrimers, numLinkers, numSpacers, maxAmbig, maxHomoP, minLength, maxLength, processors, tdiffs, bdiffs, pdiffs, ldiffs, sdiffs, comboStarts;
int qWindowSize, qWindowStep, keepFirst, removeLast;
double qRollAverage, qThreshold, qWindowAverage, qAverage;
- vector<string> revPrimer, outputNames;
set<string> filesToRemove;
- map<int, oligosPair> pairedBarcodes;
- map<int, oligosPair> pairedPrimers;
- map<string, int> barcodes;
- vector<string> groupVector;
- map<string, int> primers;
- vector<string> linker;
- vector<string> spacer;
- map<string, int> combos;
- map<string, int> groupToIndex;
- vector<string> primerNameVector; //needed here?
- vector<string> barcodeNameVector; //needed here?
+ vector<string> groupVector,outputNames;
map<string, int> groupCounts;
map<string, string> nameMap;
map<string, int> nameCount; //for countfile name -> repCount
struct trimData {
unsigned long long start, end;
MothurOut* m;
- string filename, qFileName, trimFileName, scrapFileName, trimQFileName, scrapQFileName, trimNFileName, scrapNFileName, trimCFileName, scrapCFileName, groupFileName, nameFile, countfile;
+ string filename, qFileName, oligosfile, trimFileName, scrapFileName, trimQFileName, scrapQFileName, trimNFileName, scrapNFileName, trimCFileName, scrapCFileName, groupFileName, nameFile, countfile;
vector<vector<string> > fastaFileNames;
vector<vector<string> > qualFileNames;
vector<vector<string> > nameFileNames;
unsigned long long lineStart, lineEnd, qlineStart, qlineEnd;
- bool flip, allFiles, qtrim, keepforward, createGroup, pairedOligos, reorient, logtransform;
- int numFPrimers, numRPrimers, numLinkers, numSpacers, maxAmbig, maxHomoP, minLength, maxLength, tdiffs, bdiffs, pdiffs, ldiffs, sdiffs;
+ bool flip, allFiles, qtrim, keepforward, createGroup, reorient, logtransform;
+ int maxAmbig, maxHomoP, minLength, maxLength, tdiffs, bdiffs, pdiffs, ldiffs, sdiffs;
int qWindowSize, qWindowStep, keepFirst, removeLast, count;
double qRollAverage, qThreshold, qWindowAverage, qAverage;
- vector<string> revPrimer;
- map<string, int> barcodes;
- map<string, int> primers;
map<string, int> nameCount;
- vector<string> linker;
- vector<string> spacer;
- map<string, int> combos;
- vector<string> primerNameVector;
- vector<string> barcodeNameVector;
map<string, int> groupCounts;
map<string, string> nameMap;
map<string, string> groupMap;
- map<int, oligosPair> pairedBarcodes;
- map<int, oligosPair> pairedPrimers;
trimData(){}
trimData(string fn, string qn, string nf, string cf, string tn, string sn, string tqn, string sqn, string tnn, string snn, string tcn, string scn,string gn, vector<vector<string> > ffn, vector<vector<string> > qfn, vector<vector<string> > nfn, unsigned long long lstart, unsigned long long lend, unsigned long long qstart, unsigned long long qend, MothurOut* mout,
- int pd, int bd, int ld, int sd, int td, map<string, int> pri, map<string, int> bar, vector<string> revP, vector<string> li, vector<string> spa, map<int, oligosPair> pbr, map<int, oligosPair> ppr, bool po,
- vector<string> priNameVector, vector<string> barNameVector, bool cGroup, bool aFiles, bool keepF, int keepfi, int removeL,
+ int pd, int bd, int ld, int sd, int td, string o, bool cGroup, bool aFiles, bool keepF, int keepfi, int removeL,
int WindowStep, int WindowSize, int WindowAverage, bool trim, double Threshold, double Average, double RollAverage, bool lt,
int minL, int maxA, int maxH, int maxL, bool fli, bool reo, map<string, string> nm, map<string, int> ncount) {
filename = fn;
qlineEnd = qend;
m = mout;
nameCount = ncount;
+ oligosfile = o;
pdiffs = pd;
bdiffs = bd;
ldiffs = ld;
sdiffs = sd;
tdiffs = td;
- barcodes = bar;
- pairedPrimers = ppr;
- pairedBarcodes = pbr;
- pairedOligos = po;
- primers = pri; numFPrimers = primers.size();
- revPrimer = revP; numRPrimers = revPrimer.size();
- linker = li; numLinkers = linker.size();
- spacer = spa; numSpacers = spacer.size();
- primerNameVector = priNameVector;
- barcodeNameVector = barNameVector;
createGroup = cGroup;
allFiles = aFiles;
}
}
+ int numBarcodes, numLinkers, numSpacers, numRPrimers, numFPrimers;
+ bool pairedOligos;
+ Oligos oligos(pDataArray->oligosfile);
+ if (oligos.hasPairedBarcodes()) {
+ pairedOligos = true;
+ numFPrimers = oligos.getPairedPrimers().size();
+ numBarcodes = oligos.getPairedBarcodes().size();
+ }else {
+ pairedOligos = false;
+ numFPrimers = oligos.getPrimers().size();
+ numBarcodes = oligos.getBarcodes().size();
+ }
+
+ numLinkers = oligos.getLinkers().size();
+ numSpacers = oligos.getSpacers().size();
+ numRPrimers = oligos.getReversePrimers().size();
+
TrimOligos* trimOligos = NULL;
- int numBarcodes = pDataArray->barcodes.size();
- if (pDataArray->pairedOligos) { trimOligos = new TrimOligos(pDataArray->pdiffs, pDataArray->bdiffs, 0, 0, pDataArray->pairedPrimers, pDataArray->pairedBarcodes); numBarcodes = pDataArray->pairedBarcodes.size(); pDataArray->numFPrimers = pDataArray->pairedPrimers.size(); }
- else { trimOligos = new TrimOligos(pDataArray->pdiffs, pDataArray->bdiffs, pDataArray->ldiffs, pDataArray->sdiffs, pDataArray->primers, pDataArray->barcodes, pDataArray->revPrimer, pDataArray->linker, pDataArray->spacer); }
+ if (pairedOligos) { trimOligos = new TrimOligos(pDataArray->pdiffs, pDataArray->bdiffs, 0, 0, oligos.getPairedPrimers(), oligos.getPairedBarcodes()); }
+ else { trimOligos = new TrimOligos(pDataArray->pdiffs, pDataArray->bdiffs, pDataArray->ldiffs, pDataArray->sdiffs, oligos.getPrimers(), oligos.getBarcodes(), oligos.getReversePrimers(), oligos.getLinkers(), oligos.getSpacers()); }
TrimOligos* rtrimOligos = NULL;
if (pDataArray->reorient) {
- //create reoriented primer and barcode pairs
- map<int, oligosPair> rpairedPrimers, rpairedBarcodes;
- for (map<int, oligosPair>::iterator it = pDataArray->pairedPrimers.begin(); it != pDataArray->pairedPrimers.end(); it++) {
- oligosPair tempPair(trimOligos->reverseOligo((it->second).reverse), (trimOligos->reverseOligo((it->second).forward))); //reversePrimer, rc ForwardPrimer
- rpairedPrimers[it->first] = tempPair;
- }
- for (map<int, oligosPair>::iterator it = pDataArray->pairedBarcodes.begin(); it != pDataArray->pairedBarcodes.end(); it++) {
- oligosPair tempPair(trimOligos->reverseOligo((it->second).reverse), (trimOligos->reverseOligo((it->second).forward))); //reverseBarcode, rc ForwardBarcode
- rpairedBarcodes[it->first] = tempPair;
- }
-
- int index = rpairedBarcodes.size();
- for (map<string, int>::iterator it = pDataArray->barcodes.begin(); it != pDataArray->barcodes.end(); it++) {
- oligosPair tempPair("", trimOligos->reverseOligo((it->first))); //reverseBarcode, rc ForwardBarcode
- rpairedBarcodes[index] = tempPair; index++;
- }
-
- index = rpairedPrimers.size();
- for (map<string, int>::iterator it = pDataArray->primers.begin(); it != pDataArray->primers.end(); it++) {
- oligosPair tempPair("", trimOligos->reverseOligo((it->first))); //reverseBarcode, rc ForwardBarcode
- rpairedPrimers[index] = tempPair; index++;
- }
-
- rtrimOligos = new TrimOligos(pDataArray->pdiffs, pDataArray->bdiffs, 0, 0, rpairedPrimers, rpairedBarcodes); numBarcodes = rpairedBarcodes.size();
+ rtrimOligos = new TrimOligos(pDataArray->pdiffs, pDataArray->bdiffs, 0, 0, oligos.getReorientedPairedPrimers(), oligos.getReorientedPairedBarcodes()); numBarcodes = oligos.getReorientedPairedBarcodes().size();
}
pDataArray->count = 0;
int barcodeIndex = 0;
int primerIndex = 0;
- if(pDataArray->numLinkers != 0){
+ if(numLinkers != 0){
success = trimOligos->stripLinker(currSeq, currQual);
if(success > pDataArray->ldiffs) { trashCode += 'k'; }
else{ currentSeqsDiffs += success; }
else{ currentSeqsDiffs += success; }
}
- if(pDataArray->numSpacers != 0){
+ if(numSpacers != 0){
success = trimOligos->stripSpacer(currSeq, currQual);
if(success > pDataArray->sdiffs) { trashCode += 's'; }
else{ currentSeqsDiffs += success; }
}
- if(pDataArray->numFPrimers != 0){
+ if(numFPrimers != 0){
success = trimOligos->stripForward(currSeq, currQual, primerIndex, pDataArray->keepforward);
if(success > pDataArray->pdiffs) { trashCode += 'f'; }
else{ currentSeqsDiffs += success; }
if (currentSeqsDiffs > pDataArray->tdiffs) { trashCode += 't'; }
- if(pDataArray->numRPrimers != 0){
+ if(numRPrimers != 0){
success = trimOligos->stripReverse(currSeq, currQual);
if(!success) { trashCode += 'r'; }
}
else{ thisCurrentSeqsDiffs += thisSuccess; }
}
- if(pDataArray->numFPrimers != 0){
+ if(numFPrimers != 0){
thisSuccess = rtrimOligos->stripForward(savedSeq, savedQual, thisPrimerIndex, pDataArray->keepforward);
if(thisSuccess > pDataArray->pdiffs) { thisTrashCode += 'f'; }
else{ thisCurrentSeqsDiffs += thisSuccess; }
if(trashCode.length() == 0){
string thisGroup = "";
- if (pDataArray->createGroup) {
- if(numBarcodes != 0){
- thisGroup = pDataArray->barcodeNameVector[barcodeIndex];
- if (pDataArray->numFPrimers != 0) {
- if (pDataArray->primerNameVector[primerIndex] != "") {
- if(thisGroup != "") {
- thisGroup += "." + pDataArray->primerNameVector[primerIndex];
- }else {
- thisGroup = pDataArray->primerNameVector[primerIndex];
- }
- }
- }
- }
- }
+ if (pDataArray->createGroup) { thisGroup = oligos.getGroupName(barcodeIndex, primerIndex); }
int pos = thisGroup.find("ignore");
if (pos == string::npos) {