try { \r
CommandParameter psff("sff", "InputTypes", "", "", "none", "none", "none","",false,false,true); parameters.push_back(psff);\r
CommandParameter poligos("oligos", "InputTypes", "", "", "oligosGroup", "none", "none","",false,false); parameters.push_back(poligos);\r
+ CommandParameter preorient("checkorient", "Boolean", "", "F", "", "", "","",false,false,true); parameters.push_back(preorient);\r
CommandParameter pgroup("group", "InputTypes", "", "", "oligosGroup", "none", "none","",false,false); parameters.push_back(pgroup);\r
CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(paccnos);\r
CommandParameter psfftxt("sfftxt", "String", "", "", "", "", "","",false,false); parameters.push_back(psfftxt);\r
try {\r
string helpString = "";\r
helpString += "The sffinfo command reads a sff file and extracts the sequence data, or you can use it to parse a sfftxt file.\n";\r
- helpString += "The sffinfo command parameters are sff, fasta, qfile, accnos, flow, sfftxt, oligos, group, bdiffs, tdiffs, ldiffs, sdiffs, pdiffs and trim. sff is required. \n";\r
+ helpString += "The sffinfo command parameters are sff, fasta, qfile, accnos, flow, sfftxt, oligos, group, bdiffs, tdiffs, ldiffs, sdiffs, pdiffs, checkorient and trim. sff is required. \n";\r
helpString += "The sff parameter allows you to enter the sff file you would like to extract data from. You may enter multiple files by separating them by -'s.\n";\r
helpString += "The fasta parameter allows you to indicate if you would like a fasta formatted file generated. Default=True. \n";\r
helpString += "The qfile parameter allows you to indicate if you would like a quality file generated. Default=True. \n";\r
helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";\r
helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n";\r
helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n";\r
+ helpString += "The checkorient parameter will check look for the reverse compliment of the barcode or primer in the sequence. The default is false.\n";\r
helpString += "The flow parameter allows you to indicate if you would like a flowgram file generated. Default=True. \n";\r
helpString += "The sfftxt parameter allows you to indicate if you would like a sff.txt file generated. Default=False. \n";\r
helpString += "If you want to parse an existing sfftxt file into flow, fasta and quality file, enter the file name using the sfftxt parameter. \n";\r
else { m->mothurOut("[ERROR]: you must provide a valid sff or sfftxt file."); m->mothurOutEndLine(); abort=true; }\r
}\r
\r
+ temp = validParameter.validFile(parameters, "checkorient", false); if (temp == "not found") { temp = "F"; }\r
+ reorient = m->isTrue(temp);\r
\r
}\r
}\r
//**********************************************************************************************************************\r
int SffInfoCommand::extractSffInfo(string input, string accnos, string oligos){\r
try {\r
+ oligosObject = new Oligos();\r
currentFileName = input;\r
if (outputDir == "") { outputDir += m->hasPath(input); }\r
\r
if (accnos != "") { readAccnosFile(accnos); }\r
else { seqNames.clear(); }\r
\r
- if (hasOligos) { readOligos(oligos); split = 2; }\r
+ TrimOligos* trimOligos = NULL; TrimOligos* rtrimOligos = NULL;\r
+ if (hasOligos) {\r
+ readOligos(oligos); split = 2;\r
+ if (m->control_pressed) { delete oligosObject; return 0; }\r
+ trimOligos = new TrimOligos(pdiffs, bdiffs, ldiffs, sdiffs, oligosObject->getPrimers(), oligosObject->getBarcodes(), oligosObject->getReversePrimers(), oligosObject->getLinkers(), oligosObject->getSpacers()); numFPrimers = oligosObject->getPrimers().size(); numBarcodes = oligosObject->getBarcodes().size();\r
+ if (reorient) {\r
+ rtrimOligos = new TrimOligos(pdiffs, bdiffs, 0, 0, oligosObject->getReorientedPairedPrimers(), oligosObject->getReorientedPairedBarcodes()); numBarcodes = oligosObject->getReorientedPairedBarcodes().size();\r
+ }\r
+ }\r
if (hasGroup) { readGroup(oligos); split = 2; }\r
\r
ofstream outSfftxt, outFasta, outQual, outFlow;\r
int count = 0;\r
\r
//check magic number and version\r
- if (header.magicNumber != 779314790) { m->mothurOut("Magic Number is not correct, not a valid .sff file"); m->mothurOutEndLine(); return count; }\r
- if (header.version != "0001") { m->mothurOut("Version is not supported, only support version 0001."); m->mothurOutEndLine(); return count; }\r
+ if (header.magicNumber != 779314790) { m->mothurOut("Magic Number is not correct, not a valid .sff file"); m->mothurOutEndLine(); delete oligosObject; if (hasOligos) { delete trimOligos; if (reorient) { delete rtrimOligos; } } return count; }\r
+ if (header.version != "0001") { m->mothurOut("Version is not supported, only support version 0001."); m->mothurOutEndLine(); delete oligosObject; if (hasOligos) { delete trimOligos; if (reorient) { delete rtrimOligos; } } return count; }\r
\r
//print common header\r
if (sfftxt) { printCommonHeader(outSfftxt, header); }\r
\r
//read data\r
seqRead read; Header readheader;\r
- readSeqData(in, read, header.numFlowsPerRead, readheader);\r
+ readSeqData(in, read, header.numFlowsPerRead, readheader, trimOligos, rtrimOligos);\r
\r
bool okay = sanityCheck(readheader, read);\r
if (!okay) { break; }\r
else { outputNames.push_back(noMatchFile); outputTypes["sff"].push_back(noMatchFile); }\r
}\r
\r
+ delete oligosObject;\r
+ if (hasOligos) { delete trimOligos; if (reorient) { delete rtrimOligos; } }\r
+ \r
return count;\r
}\r
catch(exception& e) {\r
}\r
}\r
//**********************************************************************************************************************\r
-bool SffInfoCommand::readSeqData(ifstream& in, seqRead& read, int numFlowReads, Header& header){\r
+bool SffInfoCommand::readSeqData(ifstream& in, seqRead& read, int numFlowReads, Header& header, TrimOligos*& trimOligos, TrimOligos*& rtrimOligos){\r
try {\r
unsigned long long startSpotInFile = in.tellg();\r
if (!in.eof()) {\r
\r
int barcodeIndex, primerIndex, trashCodeLength;\r
\r
- if (hasOligos) { trashCodeLength = findGroup(header, read, barcodeIndex, primerIndex); }\r
+ if (hasOligos) { trashCodeLength = findGroup(header, read, barcodeIndex, primerIndex, trimOligos, rtrimOligos); }\r
else if (hasGroup) { trashCodeLength = findGroup(header, read, barcodeIndex, primerIndex, "groupMode"); }\r
else { m->mothurOut("[ERROR]: uh oh, we shouldn't be here...\n"); }\r
\r
}\r
}\r
//**********************************************************************************************************************\r
-int SffInfoCommand::findGroup(Header header, seqRead read, int& barcode, int& primer) {\r
+int SffInfoCommand::findGroup(Header header, seqRead read, int& barcode, int& primer, TrimOligos*& trimOligos, TrimOligos*& rtrimOligos) {\r
try {\r
- //find group read belongs to\r
- TrimOligos trimOligos(pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, revPrimer, linker, spacer);\r
\r
int success = 1;\r
string trashCode = "";\r
Sequence currSeq(header.name, seq);\r
QualityScores currQual;\r
\r
+ //for reorient\r
+ Sequence savedSeq(currSeq.getName(), currSeq.getAligned());\r
+ QualityScores savedQual(currQual.getName(), currQual.getScores());\r
+ \r
if(numLinkers != 0){\r
- success = trimOligos.stripLinker(currSeq, currQual);\r
+ success = trimOligos->stripLinker(currSeq, currQual);\r
if(success > ldiffs) { trashCode += 'k'; }\r
else{ currentSeqsDiffs += success; }\r
\r
}\r
\r
- if(barcodes.size() != 0){\r
- success = trimOligos.stripBarcode(currSeq, currQual, barcode);\r
+ if(numBarcodes != 0){\r
+ success = trimOligos->stripBarcode(currSeq, currQual, barcode);\r
if(success > bdiffs) { trashCode += 'b'; }\r
else{ currentSeqsDiffs += success; }\r
}\r
\r
if(numSpacers != 0){\r
- success = trimOligos.stripSpacer(currSeq, currQual);\r
+ success = trimOligos->stripSpacer(currSeq, currQual);\r
if(success > sdiffs) { trashCode += 's'; }\r
else{ currentSeqsDiffs += success; }\r
\r
}\r
\r
if(numFPrimers != 0){\r
- success = trimOligos.stripForward(currSeq, currQual, primer, true);\r
+ success = trimOligos->stripForward(currSeq, currQual, primer, true);\r
if(success > pdiffs) { trashCode += 'f'; }\r
else{ currentSeqsDiffs += success; }\r
}\r
\r
if (currentSeqsDiffs > tdiffs) { trashCode += 't'; }\r
\r
- if(revPrimer.size() != 0){\r
- success = trimOligos.stripReverse(currSeq, currQual);\r
+ if(numRPrimers != 0){\r
+ success = trimOligos->stripReverse(currSeq, currQual);\r
if(!success) { trashCode += 'r'; }\r
}\r
\r
- if (trashCode.length() == 0) { //is this sequence in the ignore group\r
- string thisGroup = "";\r
+ if (reorient && (trashCode != "")) { //if you failed and want to check the reverse\r
+ int thisSuccess = 0;\r
+ string thisTrashCode = "";\r
+ int thisCurrentSeqsDiffs = 0;\r
\r
- if(barcodes.size() != 0){\r
- thisGroup = barcodeNameVector[barcode];\r
- if (numFPrimers != 0) {\r
- if (primerNameVector[primer] != "") {\r
- if(thisGroup != "") {\r
- thisGroup += "." + primerNameVector[primer];\r
- }else {\r
- thisGroup = primerNameVector[primer];\r
- }\r
- }\r
- }\r
+ int thisBarcodeIndex = 0;\r
+ int thisPrimerIndex = 0;\r
+ //cout << currSeq.getName() << '\t' << savedSeq.getUnaligned() << endl;\r
+ if(numBarcodes != 0){\r
+ thisSuccess = rtrimOligos->stripBarcode(savedSeq, savedQual, thisBarcodeIndex);\r
+ if(thisSuccess > bdiffs) { thisTrashCode += "b"; }\r
+ else{ thisCurrentSeqsDiffs += thisSuccess; }\r
+ }\r
+ //cout << currSeq.getName() << '\t' << savedSeq.getUnaligned() << endl;\r
+ if(numFPrimers != 0){\r
+ thisSuccess = rtrimOligos->stripForward(savedSeq, savedQual, thisPrimerIndex, true);\r
+ if(thisSuccess > pdiffs) { thisTrashCode += "f"; }\r
+ else{ thisCurrentSeqsDiffs += thisSuccess; }\r
}\r
\r
+ if (thisCurrentSeqsDiffs > tdiffs) { thisTrashCode += 't'; }\r
+ \r
+ if (thisTrashCode == "") {\r
+ trashCode = thisTrashCode;\r
+ success = thisSuccess;\r
+ currentSeqsDiffs = thisCurrentSeqsDiffs;\r
+ barcode = thisBarcodeIndex;\r
+ primer = thisPrimerIndex;\r
+ savedSeq.reverseComplement();\r
+ currSeq.setAligned(savedSeq.getAligned());\r
+ savedQual.flipQScores();\r
+ currQual.setScores(savedQual.getScores());\r
+ }else { trashCode += "(" + thisTrashCode + ")"; }\r
+ }\r
+\r
+ if (trashCode.length() == 0) { //is this sequence in the ignore group\r
+ string thisGroup = oligosObject->getGroupName(barcode, primer);\r
+ \r
int pos = thisGroup.find("ignore");\r
if (pos != string::npos) { trashCode += "i"; }\r
}\r
\r
string group = groupMap->getGroup(header.name);\r
if (group == "not found") { trashCode += "g"; } //scrap for group\r
- else { //find file group\r
- map<string, int>::iterator it = barcodes.find(group);\r
- if (it != barcodes.end()) {\r
- barcode = it->second;\r
- }else { trashCode += "g"; }\r
- }\r
\r
return trashCode.length();\r
}\r
numSplitReads.clear();\r
filehandlesHeaders.clear();\r
\r
- ifstream inOligos;\r
- m->openInputFile(oligoFile, inOligos);\r
- \r
- string type, oligo, group;\r
+ bool allBlank = false;\r
+ oligosObject->read(oligoFile);\r
\r
- int indexPrimer = 0;\r
- int indexBarcode = 0;\r
- \r
- while(!inOligos.eof()){\r
- \r
- inOligos >> type;\r
- \r
- if(type[0] == '#'){\r
- while (!inOligos.eof()) { char c = inOligos.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there\r
- m->gobble(inOligos);\r
- }\r
- else{\r
- m->gobble(inOligos);\r
- //make type case insensitive\r
- for(int i=0;i<type.length();i++){ type[i] = toupper(type[i]); }\r
- \r
- inOligos >> oligo;\r
- \r
- for(int i=0;i<oligo.length();i++){\r
- oligo[i] = toupper(oligo[i]);\r
- if(oligo[i] == 'U') { oligo[i] = 'T'; }\r
- }\r
- \r
- if(type == "FORWARD"){\r
- group = "";\r
- \r
- // get rest of line in case there is a primer name\r
- while (!inOligos.eof()) {\r
- char c = inOligos.get();\r
- if (c == 10 || c == 13 || c == -1){ break; }\r
- else if (c == 32 || c == 9){;} //space or tab\r
- else { group += c; }\r
- }\r
- \r
- //check for repeat barcodes\r
- map<string, int>::iterator itPrime = primers.find(oligo);\r
- if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); }\r
- \r
- primers[oligo]=indexPrimer; indexPrimer++;\r
- primerNameVector.push_back(group);\r
- \r
- }else if(type == "REVERSE"){\r
- //Sequence oligoRC("reverse", oligo);\r
- //oligoRC.reverseComplement();\r
- string oligoRC = reverseOligo(oligo);\r
- revPrimer.push_back(oligoRC);\r
- }\r
- else if(type == "BARCODE"){\r
- inOligos >> group;\r
- \r
- \r
- //check for repeat barcodes\r
- map<string, int>::iterator itBar = barcodes.find(oligo);\r
- if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); }\r
- \r
- barcodes[oligo]=indexBarcode; indexBarcode++;\r
- barcodeNameVector.push_back(group);\r
- }else if(type == "LINKER"){\r
- linker.push_back(oligo);\r
- }else if(type == "SPACER"){\r
- spacer.push_back(oligo);\r
- }\r
- else{ m->mothurOut("[WARNING]: " + type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); }\r
- }\r
- m->gobble(inOligos);\r
- }\r
- inOligos.close();\r
- \r
- if(barcodeNameVector.size() == 0 && primerNameVector[0] == ""){ split = 1; }\r
- \r
- //add in potential combos\r
- if(barcodeNameVector.size() == 0){\r
- barcodes[""] = 0;\r
- barcodeNameVector.push_back("");\r
- }\r
- \r
- if(primerNameVector.size() == 0){\r
- primers[""] = 0;\r
- primerNameVector.push_back("");\r
- }\r
- \r
- filehandles.resize(barcodeNameVector.size());\r
+ if (m->control_pressed) { return false; } //error in reading oligos\r
+ \r
+ if (oligosObject->hasPairedBarcodes()) {\r
+ pairedOligos = true;\r
+ m->mothurOut("[ERROR]: sffinfo does not support paired barcodes and primers, aborting.\n"); m->control_pressed = true; return true;\r
+ }else {\r
+ pairedOligos = false;\r
+ numFPrimers = oligosObject->getPrimers().size();\r
+ numBarcodes = oligosObject->getBarcodes().size();\r
+ }\r
+ \r
+ numLinkers = oligosObject->getLinkers().size();\r
+ numSpacers = oligosObject->getSpacers().size();\r
+ numRPrimers = oligosObject->getReversePrimers().size();\r
+ \r
+ vector<string> groupNames = oligosObject->getGroupNames();\r
+ if (groupNames.size() == 0) { allBlank = true; }\r
+ \r
+ filehandles.resize(oligosObject->getBarcodeNames().size());\r
for(int i=0;i<filehandles.size();i++){\r
- filehandles[i].assign(primerNameVector.size(), "");\r
+ for(int j=0;j<oligosObject->getPrimerNames().size();j++){ filehandles[i].push_back(""); }\r
}\r
\r
- if(split > 1){\r
- set<string> uniqueNames; //used to cleanup outputFileNames\r
- for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){\r
- for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){\r
- \r
- string primerName = primerNameVector[itPrimer->second];\r
- string barcodeName = barcodeNameVector[itBar->second];\r
- \r
+ if(split > 1){\r
+ set<string> uniqueNames; //used to cleanup outputFileNames\r
+ map<string, int> barcodes = oligosObject->getBarcodes() ;\r
+ map<string, int> primers = oligosObject->getPrimers();\r
+ for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){\r
+ for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){\r
+ \r
+ string primerName = oligosObject->getPrimerName(itPrimer->second);\r
+ string barcodeName = oligosObject->getBarcodeName(itBar->second);\r
+ \r
if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing\r
+ else if ((primerName == "") && (barcodeName == "")) { } //do nothing\r
else {\r
string comboGroupName = "";\r
string fastaFileName = "";\r
string qualFileName = "";\r
string nameFileName = "";\r
+ string countFileName = "";\r
\r
if(primerName == ""){\r
- comboGroupName = barcodeNameVector[itBar->second];\r
- }\r
- else{\r
+ comboGroupName = barcodeName;\r
+ }else{\r
if(barcodeName == ""){\r
- comboGroupName = primerNameVector[itPrimer->second];\r
+ comboGroupName = primerName;\r
}\r
else{\r
- comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second];\r
+ comboGroupName = barcodeName + "." + primerName;\r
}\r
}\r
\r
filehandles[itBar->second][itPrimer->second] = thisFilename;\r
temp.open(thisFilename.c_str(), ios::binary); temp.close();\r
}\r
- }\r
- }\r
- }\r
- numFPrimers = primers.size();\r
- numLinkers = linker.size();\r
- numSpacers = spacer.size();\r
+ }\r
+ }\r
+ }\r
+ \r
map<string, string> variables;\r
variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(currentFileName));\r
variables["[group]"] = "scrap";\r
noMatchFile = getOutputFileName("sff",variables);\r
m->mothurRemove(noMatchFile);\r
numNoMatch = 0;\r
- \r
- \r
- bool allBlank = true;\r
- for (int i = 0; i < barcodeNameVector.size(); i++) {\r
- if (barcodeNameVector[i] != "") {\r
- allBlank = false;\r
- break;\r
- }\r
- }\r
- for (int i = 0; i < primerNameVector.size(); i++) {\r
- if (primerNameVector[i] != "") {\r
- allBlank = false;\r
- break;\r
- }\r
- }\r
\r
filehandlesHeaders.resize(filehandles.size());\r
numSplitReads.resize(filehandles.size());\r
filehandles.clear();\r
numSplitReads.clear();\r
filehandlesHeaders.clear();\r
- barcodes.clear();\r
\r
groupMap = new GroupMap();\r
groupMap->readMap(oligoFile);\r
ofstream temp;\r
m->openOutputFileBinary(thisFilename, temp); temp.close();\r
filehandles[i].push_back(thisFilename);\r
- barcodes[groups[i]] = i;\r
}\r
}\r
\r