vector<string> SffInfoCommand::setParameters(){ \r
try { \r
CommandParameter psff("sff", "InputTypes", "", "", "none", "none", "none","",false,false,true); parameters.push_back(psff);\r
- CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(poligos);\r
+ CommandParameter poligos("oligos", "InputTypes", "", "", "oligosGroup", "none", "none","",false,false); parameters.push_back(poligos);\r
+ CommandParameter preorient("checkorient", "Boolean", "", "F", "", "", "","",false,false,true); parameters.push_back(preorient);\r
+ CommandParameter pgroup("group", "InputTypes", "", "", "oligosGroup", "none", "none","",false,false); parameters.push_back(pgroup);\r
CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(paccnos);\r
CommandParameter psfftxt("sfftxt", "String", "", "", "", "", "","",false,false); parameters.push_back(psfftxt);\r
CommandParameter pflow("flow", "Boolean", "", "T", "", "", "","flow",false,false); parameters.push_back(pflow);\r
try {\r
string helpString = "";\r
helpString += "The sffinfo command reads a sff file and extracts the sequence data, or you can use it to parse a sfftxt file.\n";\r
- helpString += "The sffinfo command parameters are sff, fasta, qfile, accnos, flow, sfftxt, oligos, bdiffs, tdiffs, ldiffs, sdiffs, pdiffs and trim. sff is required. \n";\r
+ helpString += "The sffinfo command parameters are sff, fasta, qfile, accnos, flow, sfftxt, oligos, group, bdiffs, tdiffs, ldiffs, sdiffs, pdiffs, checkorient and trim. sff is required. \n";\r
helpString += "The sff parameter allows you to enter the sff file you would like to extract data from. You may enter multiple files by separating them by -'s.\n";\r
helpString += "The fasta parameter allows you to indicate if you would like a fasta formatted file generated. Default=True. \n";\r
helpString += "The qfile parameter allows you to indicate if you would like a quality file generated. Default=True. \n";\r
helpString += "The oligos parameter allows you to provide an oligos file to split your sff file into separate sff files by barcode. \n";\r
+ helpString += "The group parameter allows you to provide a group file to split your sff file into separate sff files by group. \n";\r
helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n";\r
helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";\r
helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";\r
helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n";\r
helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n";\r
+ helpString += "The checkorient parameter will check look for the reverse compliment of the barcode or primer in the sequence. The default is false.\n";\r
helpString += "The flow parameter allows you to indicate if you would like a flowgram file generated. Default=True. \n";\r
helpString += "The sfftxt parameter allows you to indicate if you would like a sff.txt file generated. Default=False. \n";\r
helpString += "If you want to parse an existing sfftxt file into flow, fasta and quality file, enter the file name using the sfftxt parameter. \n";\r
SffInfoCommand::SffInfoCommand(string option) {\r
try {\r
abort = false; calledHelp = false; \r
- hasAccnos = false; hasOligos = false;\r
+ hasAccnos = false; hasOligos = false; hasGroup = false;\r
split = 1;\r
\r
//allow user to run help\r
bool ignore = false;\r
if (oligosFileNames[i] == "current") { \r
oligosFileNames[i] = m->getOligosFile(); \r
- if (oligosFileNames[i] != "") { m->mothurOut("Using " + oligosFileNames[i] + " as input file for the accnos parameter where you had given current."); m->mothurOutEndLine(); }\r
+ if (oligosFileNames[i] != "") { m->mothurOut("Using " + oligosFileNames[i] + " as input file for the oligos parameter where you had given current."); m->mothurOutEndLine(); }\r
else { \r
m->mothurOut("You have no current oligosfile, ignoring current."); m->mothurOutEndLine(); ignore=true; \r
//erase from file list\r
//make sure there is at least one valid file left\r
if (oligosFileNames.size() == 0) { m->mothurOut("no valid oligos files."); m->mothurOutEndLine(); abort = true; }\r
}\r
+ \r
+ groupfile = validParameter.validFile(parameters, "group", false);\r
+ if (groupfile == "not found") { groupfile = ""; }\r
+ else {\r
+ hasGroup = true;\r
+ m->splitAtDash(groupfile, groupFileNames);\r
+ \r
+ //go through files and make sure they are good, if not, then disregard them\r
+ for (int i = 0; i < groupFileNames.size(); i++) {\r
+ bool ignore = false;\r
+ if (groupFileNames[i] == "current") {\r
+ groupFileNames[i] = m->getGroupFile();\r
+ if (groupFileNames[i] != "") { m->mothurOut("Using " + groupFileNames[i] + " as input file for the group parameter where you had given current."); m->mothurOutEndLine(); }\r
+ else {\r
+ m->mothurOut("You have no current group file, ignoring current."); m->mothurOutEndLine(); ignore=true;\r
+ //erase from file list\r
+ groupFileNames.erase(groupFileNames.begin()+i);\r
+ i--;\r
+ }\r
+ }\r
+ \r
+ if (!ignore) {\r
+ \r
+ if (inputDir != "") {\r
+ string path = m->hasPath(groupFileNames[i]);\r
+ //if the user has not given a path then, add inputdir. else leave path alone.\r
+ if (path == "") { groupFileNames[i] = inputDir + groupFileNames[i]; }\r
+ }\r
+ \r
+ ifstream in;\r
+ int ableToOpen = m->openInputFile(groupFileNames[i], in, "noerror");\r
+ \r
+ //if you can't open it, try default location\r
+ if (ableToOpen == 1) {\r
+ if (m->getDefaultPath() != "") { //default path is set\r
+ string tryPath = m->getDefaultPath() + m->getSimpleName(groupFileNames[i]);\r
+ m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();\r
+ ifstream in2;\r
+ ableToOpen = m->openInputFile(tryPath, in2, "noerror");\r
+ in2.close();\r
+ groupFileNames[i] = tryPath;\r
+ }\r
+ }\r
+ //if you can't open it, try default location\r
+ if (ableToOpen == 1) {\r
+ if (m->getOutputDir() != "") { //default path is set\r
+ string tryPath = m->getOutputDir() + m->getSimpleName(groupFileNames[i]);\r
+ m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();\r
+ ifstream in2;\r
+ ableToOpen = m->openInputFile(tryPath, in2, "noerror");\r
+ in2.close();\r
+ groupFileNames[i] = tryPath;\r
+ }\r
+ }\r
+ in.close();\r
+ \r
+ if (ableToOpen == 1) {\r
+ m->mothurOut("Unable to open " + groupFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();\r
+ //erase from file list\r
+ groupFileNames.erase(groupFileNames.begin()+i);\r
+ i--;\r
+ }\r
+ }\r
+ }\r
+ \r
+ //make sure there is at least one valid file left\r
+ if (groupFileNames.size() == 0) { m->mothurOut("no valid group files."); m->mothurOutEndLine(); abort = true; }\r
+ }\r
\r
- if (hasOligos) {\r
+ if (hasGroup) {\r
+ split = 2;\r
+ if (groupFileNames.size() != filenames.size()) { abort = true; m->mothurOut("If you provide a group file, you must have one for each sff file."); m->mothurOutEndLine(); }\r
+ }\r
+ \r
+ if (hasOligos) {\r
split = 2;\r
- if (oligosFileNames.size() != filenames.size()) { abort = true; m->mothurOut("If you provide a oligos file, you must have one for each sff file."); m->mothurOutEndLine(); }\r
+ if (oligosFileNames.size() != filenames.size()) { abort = true; m->mothurOut("If you provide an oligos file, you must have one for each sff file."); m->mothurOutEndLine(); }\r
}\r
\r
+ if (hasGroup && hasOligos) { m->mothurOut("You must enter ONLY ONE of the following: oligos or group."); m->mothurOutEndLine(); abort = true;}\r
+ \r
if (hasAccnos) {\r
if (accnosFileNames.size() != filenames.size()) { abort = true; m->mothurOut("If you provide a accnos file, you must have one for each sff file."); m->mothurOutEndLine(); }\r
}\r
else { m->mothurOut("[ERROR]: you must provide a valid sff or sfftxt file."); m->mothurOutEndLine(); abort=true; }\r
}\r
\r
+ temp = validParameter.validFile(parameters, "checkorient", false); if (temp == "not found") { temp = "F"; }\r
+ reorient = m->isTrue(temp);\r
\r
}\r
}\r
\r
string oligos = "";\r
if (hasOligos) { oligos = oligosFileNames[s]; }\r
- \r
+ if (hasGroup) { oligos = groupFileNames[s]; }\r
+ \r
int numReads = extractSffInfo(filenames[s], accnos, oligos);\r
\r
m->mothurOut("It took " + toString(time(NULL) - start) + " secs to extract " + toString(numReads) + ".");\r
//**********************************************************************************************************************\r
int SffInfoCommand::extractSffInfo(string input, string accnos, string oligos){\r
try {\r
+ oligosObject = new Oligos();\r
currentFileName = input;\r
if (outputDir == "") { outputDir += m->hasPath(input); }\r
\r
if (accnos != "") { readAccnosFile(accnos); }\r
else { seqNames.clear(); }\r
- \r
- if (oligos != "") { readOligos(oligos); split = 2; }\r
-\r
+ \r
+ TrimOligos* trimOligos = NULL; TrimOligos* rtrimOligos = NULL;\r
+ if (hasOligos) {\r
+ readOligos(oligos); split = 2;\r
+ if (m->control_pressed) { delete oligosObject; return 0; }\r
+ trimOligos = new TrimOligos(pdiffs, bdiffs, ldiffs, sdiffs, oligosObject->getPrimers(), oligosObject->getBarcodes(), oligosObject->getReversePrimers(), oligosObject->getLinkers(), oligosObject->getSpacers()); numFPrimers = oligosObject->getPrimers().size(); numBarcodes = oligosObject->getBarcodes().size();\r
+ if (reorient) {\r
+ rtrimOligos = new TrimOligos(pdiffs, bdiffs, 0, 0, oligosObject->getReorientedPairedPrimers(), oligosObject->getReorientedPairedBarcodes()); numBarcodes = oligosObject->getReorientedPairedBarcodes().size();\r
+ }\r
+ }\r
+ if (hasGroup) { readGroup(oligos); split = 2; }\r
+ \r
ofstream outSfftxt, outFasta, outQual, outFlow;\r
string outFastaFileName, outQualFileName;\r
string rootName = outputDir + m->getRootName(m->getSimpleName(input));\r
int count = 0;\r
\r
//check magic number and version\r
- if (header.magicNumber != 779314790) { m->mothurOut("Magic Number is not correct, not a valid .sff file"); m->mothurOutEndLine(); return count; }\r
- if (header.version != "0001") { m->mothurOut("Version is not supported, only support version 0001."); m->mothurOutEndLine(); return count; }\r
+ if (header.magicNumber != 779314790) { m->mothurOut("Magic Number is not correct, not a valid .sff file"); m->mothurOutEndLine(); delete oligosObject; if (hasOligos) { delete trimOligos; if (reorient) { delete rtrimOligos; } } return count; }\r
+ if (header.version != "0001") { m->mothurOut("Version is not supported, only support version 0001."); m->mothurOutEndLine(); delete oligosObject; if (hasOligos) { delete trimOligos; if (reorient) { delete rtrimOligos; } } return count; }\r
\r
//print common header\r
if (sfftxt) { printCommonHeader(outSfftxt, header); }\r
if (flow) { outFlow << header.numFlowsPerRead << endl; }\r
- \r
+ \r
//read through the sff file\r
while (!in.eof()) {\r
\r
\r
//read data\r
seqRead read; Header readheader;\r
- readSeqData(in, read, header.numFlowsPerRead, readheader);\r
+ readSeqData(in, read, header.numFlowsPerRead, readheader, trimOligos, rtrimOligos);\r
\r
bool okay = sanityCheck(readheader, read);\r
if (!okay) { break; }\r
}\r
\r
count++;\r
- \r
+ \r
//report progress\r
if((count+1) % 10000 == 0){ m->mothurOut(toString(count+1)); m->mothurOutEndLine(); }\r
\r
//create new common headers for each file with the correct number of reads\r
adjustCommonHeader(header);\r
\r
- //close files and delete ofstreams\r
- for(int i=0;i<filehandles.size();i++){\r
- for(int j=0;j<filehandles[0].size();j++){\r
- (filehandles[i][j].begin()->second)->close(); delete (filehandles[i][j].begin()->second);\r
- (filehandlesHeaders[i][j].begin()->second)->close(); delete (filehandlesHeaders[i][j].begin()->second);\r
- }\r
- }\r
+ if (hasGroup) { delete groupMap; }\r
\r
//cout << "here" << endl;\r
map<string, string>::iterator it;\r
for(int i=0;i<filehandles.size();i++){\r
for(int j=0;j<filehandles[0].size();j++){\r
//cout << i << '\t' << '\t' << j << '\t' << filehandles[i][j] << endl;\r
- if (filehandles[i][j].begin()->first != "") {\r
- if (namesToRemove.count(filehandles[i][j].begin()->first) == 0) {\r
- if(m->isBlank(filehandles[i][j].begin()->first)){\r
+ if (filehandles[i][j] != "") {\r
+ if (namesToRemove.count(filehandles[i][j]) == 0) {\r
+ if(m->isBlank(filehandles[i][j])){\r
//cout << i << '\t' << '\t' << j << '\t' << filehandles[i][j] << " is blank removing" << endl;\r
- m->mothurRemove(filehandles[i][j].begin()->first);\r
- m->mothurRemove(filehandlesHeaders[i][j].begin()->first);\r
- namesToRemove.insert(filehandles[i][j].begin()->first);\r
+ m->mothurRemove(filehandles[i][j]);\r
+ m->mothurRemove(filehandlesHeaders[i][j]);\r
+ namesToRemove.insert(filehandles[i][j]);\r
}\r
}\r
}\r
//append new header to reads\r
for (int i = 0; i < filehandles.size(); i++) {\r
for (int j = 0; j < filehandles[i].size(); j++) {\r
- m->appendBinaryFiles(filehandles[i][j].begin()->first, filehandlesHeaders[i][j].begin()->first);\r
- m->renameFile(filehandlesHeaders[i][j].begin()->first, filehandles[i][j].begin()->first);\r
- m->mothurRemove(filehandlesHeaders[i][j].begin()->first);\r
- //cout << i << '\t' << '\t' << j << '\t' << filehandles[i][j] << " done appending headers and removing " << filehandlesHeaders[i][j] << endl;\r
- if (numSplitReads[i][j] == 0) { m->mothurRemove(filehandles[i][j].begin()->first); }\r
+ if (filehandles[i][j] != "") {\r
+ m->appendBinaryFiles(filehandles[i][j], filehandlesHeaders[i][j]);\r
+ m->renameFile(filehandlesHeaders[i][j], filehandles[i][j]);\r
+ m->mothurRemove(filehandlesHeaders[i][j]);\r
+ //cout << i << '\t' << '\t' << j << '\t' << filehandles[i][j] << " done appending headers and removing " << filehandlesHeaders[i][j] << endl;\r
+ if (numSplitReads[i][j] == 0) { m->mothurRemove(filehandles[i][j]); }\r
+ }\r
}\r
}\r
//cout << "here3" << endl;\r
else { outputNames.push_back(noMatchFile); outputTypes["sff"].push_back(noMatchFile); }\r
}\r
\r
+ delete oligosObject;\r
+ if (hasOligos) { delete trimOligos; if (reorient) { delete rtrimOligos; } }\r
+ \r
return count;\r
}\r
catch(exception& e) {\r
in.read(mybuffer,4);\r
for (int i = 0; i < filehandlesHeaders.size(); i++) { \r
for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
- (*(filehandlesHeaders[i][j].begin()->second)).write(mybuffer, in.gcount());\r
+ ofstream out;\r
+ m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);\r
+ out.write(mybuffer, in.gcount());\r
+ out.close();\r
}\r
}\r
outNoMatchHeader.write(mybuffer, in.gcount());\r
in.read(mybuffer,4);\r
for (int i = 0; i < filehandlesHeaders.size(); i++) { \r
for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
- (*(filehandlesHeaders[i][j].begin()->second)).write(mybuffer, in.gcount());\r
+ ofstream out;\r
+ m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);\r
+ out.write(mybuffer, in.gcount());\r
+ out.close();\r
}\r
}\r
outNoMatchHeader.write(mybuffer, in.gcount());\r
thisbuffer[7] = offset & 0xFF;\r
for (int i = 0; i < filehandlesHeaders.size(); i++) {\r
for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
- (*(filehandlesHeaders[i][j].begin()->second)).write(thisbuffer, 8);\r
+ ofstream out;\r
+ m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);\r
+ out.write(thisbuffer, 8);\r
+ out.close();\r
}\r
}\r
outNoMatchHeader.write(thisbuffer, 8);\r
thisbuffer2[3] = offset & 0xFF;\r
for (int i = 0; i < filehandlesHeaders.size(); i++) {\r
for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
- (*(filehandlesHeaders[i][j].begin()->second)).write(thisbuffer2, 4);\r
+ ofstream out;\r
+ m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);\r
+ out.write(thisbuffer2, 4);\r
+ out.close();\r
}\r
}\r
outNoMatchHeader.write(thisbuffer2, 4);\r
thisbuffer[2] = (numSplitReads[i][j] >> 16) & 0xFF;\r
thisbuffer[3] = (numSplitReads[i][j] >> 24) & 0xFF;\r
}\r
- (*(filehandlesHeaders[i][j].begin()->second)).write(thisbuffer, 4);\r
+ ofstream out;\r
+ m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);\r
+ out.write(thisbuffer, 4);\r
+ out.close();\r
delete[] thisbuffer;\r
}\r
}\r
in.read(mybuffer,2);\r
for (int i = 0; i < filehandlesHeaders.size(); i++) { \r
for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
- (*(filehandlesHeaders[i][j].begin()->second)).write(mybuffer, in.gcount());\r
+ ofstream out;\r
+ m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);\r
+ out.write(mybuffer, in.gcount());\r
+ out.close();\r
}\r
}\r
outNoMatchHeader.write(mybuffer, in.gcount());\r
in.read(mybuffer,2);\r
for (int i = 0; i < filehandlesHeaders.size(); i++) { \r
for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
- (*(filehandlesHeaders[i][j].begin()->second)).write(mybuffer, in.gcount());\r
+ ofstream out;\r
+ m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);\r
+ out.write(mybuffer, in.gcount());\r
+ out.close();\r
}\r
}\r
outNoMatchHeader.write(mybuffer, in.gcount());\r
in.read(mybuffer,2);\r
for (int i = 0; i < filehandlesHeaders.size(); i++) { \r
for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
- (*(filehandlesHeaders[i][j].begin()->second)).write(mybuffer, in.gcount());\r
+ ofstream out;\r
+ m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);\r
+ out.write(mybuffer, in.gcount());\r
+ out.close();\r
}\r
}\r
outNoMatchHeader.write(mybuffer, in.gcount());\r
in.read(mybuffer,1);\r
for (int i = 0; i < filehandlesHeaders.size(); i++) { \r
for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
- (*(filehandlesHeaders[i][j].begin()->second)).write(mybuffer, in.gcount());\r
+ ofstream out;\r
+ m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);\r
+ out.write(mybuffer, in.gcount());\r
+ out.close();\r
}\r
}\r
outNoMatchHeader.write(mybuffer, in.gcount());\r
in.read(mybuffer,header.numFlowsPerRead);\r
for (int i = 0; i < filehandlesHeaders.size(); i++) { \r
for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
- (*(filehandlesHeaders[i][j].begin()->second)).write(mybuffer, in.gcount());\r
+ ofstream out;\r
+ m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);\r
+ out.write(mybuffer, in.gcount());\r
+ out.close();\r
}\r
}\r
outNoMatchHeader.write(mybuffer, in.gcount());\r
in.read(mybuffer,header.keyLength);\r
for (int i = 0; i < filehandlesHeaders.size(); i++) { \r
for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
- (*(filehandlesHeaders[i][j].begin()->second)).write(mybuffer, in.gcount());\r
+ ofstream out;\r
+ m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);\r
+ out.write(mybuffer, in.gcount());\r
+ out.close();\r
}\r
}\r
outNoMatchHeader.write(mybuffer, in.gcount());\r
mybuffer = new char[spot-spotInFile];\r
for (int i = 0; i < filehandlesHeaders.size(); i++) { \r
for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
- (*(filehandlesHeaders[i][j].begin()->second)).write(mybuffer, spot-spotInFile);\r
+ ofstream out;\r
+ m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);\r
+ out.write(mybuffer, spot-spotInFile);\r
+ out.close();\r
}\r
}\r
outNoMatchHeader.write(mybuffer, spot-spotInFile);\r
}\r
}\r
//**********************************************************************************************************************\r
-bool SffInfoCommand::readSeqData(ifstream& in, seqRead& read, int numFlowReads, Header& header){\r
+bool SffInfoCommand::readSeqData(ifstream& in, seqRead& read, int numFlowReads, Header& header, TrimOligos*& trimOligos, TrimOligos*& rtrimOligos){\r
try {\r
unsigned long long startSpotInFile = in.tellg();\r
if (!in.eof()) {\r
\r
if (split > 1) { \r
\r
- int barcodeIndex, primerIndex;\r
- int trashCodeLength = findGroup(header, read, barcodeIndex, primerIndex);\r
- \r
+ int barcodeIndex, primerIndex, trashCodeLength;\r
+ \r
+ if (hasOligos) { trashCodeLength = findGroup(header, read, barcodeIndex, primerIndex, trimOligos, rtrimOligos); }\r
+ else if (hasGroup) { trashCodeLength = findGroup(header, read, barcodeIndex, primerIndex, "groupMode"); }\r
+ else { m->mothurOut("[ERROR]: uh oh, we shouldn't be here...\n"); }\r
+\r
char * mybuffer;\r
mybuffer = new char [spot-startSpotInFile];\r
\r
\r
\r
if(trashCodeLength == 0){\r
- (*(filehandles[barcodeIndex][primerIndex].begin()->second)).write(mybuffer, in2.gcount());\r
+ ofstream out;\r
+ m->openOutputFileBinaryAppend(filehandles[barcodeIndex][primerIndex], out);\r
+ out.write(mybuffer, in2.gcount());\r
+ out.close();\r
numSplitReads[barcodeIndex][primerIndex]++;\r
}\r
else{\r
}\r
}\r
//**********************************************************************************************************************\r
-int SffInfoCommand::findGroup(Header header, seqRead read, int& barcode, int& primer) {\r
+int SffInfoCommand::findGroup(Header header, seqRead read, int& barcode, int& primer, TrimOligos*& trimOligos, TrimOligos*& rtrimOligos) {\r
try {\r
- //find group read belongs to\r
- TrimOligos trimOligos(pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, revPrimer, linker, spacer);\r
\r
int success = 1;\r
string trashCode = "";\r
Sequence currSeq(header.name, seq);\r
QualityScores currQual;\r
\r
+ //for reorient\r
+ Sequence savedSeq(currSeq.getName(), currSeq.getAligned());\r
+ QualityScores savedQual(currQual.getName(), currQual.getScores());\r
+ \r
if(numLinkers != 0){\r
- success = trimOligos.stripLinker(currSeq, currQual);\r
+ success = trimOligos->stripLinker(currSeq, currQual);\r
if(success > ldiffs) { trashCode += 'k'; }\r
else{ currentSeqsDiffs += success; }\r
\r
}\r
\r
- if(barcodes.size() != 0){\r
- success = trimOligos.stripBarcode(currSeq, currQual, barcode);\r
+ if(numBarcodes != 0){\r
+ success = trimOligos->stripBarcode(currSeq, currQual, barcode);\r
if(success > bdiffs) { trashCode += 'b'; }\r
else{ currentSeqsDiffs += success; }\r
}\r
\r
if(numSpacers != 0){\r
- success = trimOligos.stripSpacer(currSeq, currQual);\r
+ success = trimOligos->stripSpacer(currSeq, currQual);\r
if(success > sdiffs) { trashCode += 's'; }\r
else{ currentSeqsDiffs += success; }\r
\r
}\r
\r
if(numFPrimers != 0){\r
- success = trimOligos.stripForward(currSeq, currQual, primer, true);\r
+ success = trimOligos->stripForward(currSeq, currQual, primer, true);\r
if(success > pdiffs) { trashCode += 'f'; }\r
else{ currentSeqsDiffs += success; }\r
}\r
\r
if (currentSeqsDiffs > tdiffs) { trashCode += 't'; }\r
\r
- if(revPrimer.size() != 0){\r
- success = trimOligos.stripReverse(currSeq, currQual);\r
+ if(numRPrimers != 0){\r
+ success = trimOligos->stripReverse(currSeq, currQual);\r
if(!success) { trashCode += 'r'; }\r
}\r
\r
+ if (reorient && (trashCode != "")) { //if you failed and want to check the reverse\r
+ int thisSuccess = 0;\r
+ string thisTrashCode = "";\r
+ int thisCurrentSeqsDiffs = 0;\r
+ \r
+ int thisBarcodeIndex = 0;\r
+ int thisPrimerIndex = 0;\r
+ //cout << currSeq.getName() << '\t' << savedSeq.getUnaligned() << endl;\r
+ if(numBarcodes != 0){\r
+ thisSuccess = rtrimOligos->stripBarcode(savedSeq, savedQual, thisBarcodeIndex);\r
+ if(thisSuccess > bdiffs) { thisTrashCode += "b"; }\r
+ else{ thisCurrentSeqsDiffs += thisSuccess; }\r
+ }\r
+ //cout << currSeq.getName() << '\t' << savedSeq.getUnaligned() << endl;\r
+ if(numFPrimers != 0){\r
+ thisSuccess = rtrimOligos->stripForward(savedSeq, savedQual, thisPrimerIndex, true);\r
+ if(thisSuccess > pdiffs) { thisTrashCode += "f"; }\r
+ else{ thisCurrentSeqsDiffs += thisSuccess; }\r
+ }\r
+ \r
+ if (thisCurrentSeqsDiffs > tdiffs) { thisTrashCode += 't'; }\r
+ \r
+ if (thisTrashCode == "") {\r
+ trashCode = thisTrashCode;\r
+ success = thisSuccess;\r
+ currentSeqsDiffs = thisCurrentSeqsDiffs;\r
+ barcode = thisBarcodeIndex;\r
+ primer = thisPrimerIndex;\r
+ savedSeq.reverseComplement();\r
+ currSeq.setAligned(savedSeq.getAligned());\r
+ savedQual.flipQScores();\r
+ currQual.setScores(savedQual.getScores());\r
+ }else { trashCode += "(" + thisTrashCode + ")"; }\r
+ }\r
+\r
+ if (trashCode.length() == 0) { //is this sequence in the ignore group\r
+ string thisGroup = oligosObject->getGroupName(barcode, primer);\r
+ \r
+ int pos = thisGroup.find("ignore");\r
+ if (pos != string::npos) { trashCode += "i"; }\r
+ }\r
\r
return trashCode.length();\r
}\r
m->errorOut(e, "SffInfoCommand", "findGroup");\r
exit(1);\r
}\r
-} \r
+}\r
+//**********************************************************************************************************************\r
+int SffInfoCommand::findGroup(Header header, seqRead read, int& barcode, int& primer, string groupMode) {\r
+ try {\r
+ string trashCode = "";\r
+ primer = 0;\r
+ \r
+ string group = groupMap->getGroup(header.name);\r
+ if (group == "not found") { trashCode += "g"; } //scrap for group\r
+ \r
+ return trashCode.length();\r
+ }\r
+ catch(exception& e) {\r
+ m->errorOut(e, "SffInfoCommand", "findGroup");\r
+ exit(1);\r
+ }\r
+}\r
//**********************************************************************************************************************\r
int SffInfoCommand::decodeName(string& timestamp, string& region, string& xy, string name) {\r
try {\r
numSplitReads.clear();\r
filehandlesHeaders.clear();\r
\r
- ifstream inOligos;\r
- m->openInputFile(oligoFile, inOligos);\r
- \r
- string type, oligo, group;\r
+ bool allBlank = false;\r
+ oligosObject->read(oligoFile);\r
\r
- int indexPrimer = 0;\r
- int indexBarcode = 0;\r
- \r
- while(!inOligos.eof()){\r
- \r
- inOligos >> type; \r
- \r
- if(type[0] == '#'){\r
- while (!inOligos.eof()) { char c = inOligos.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there\r
- m->gobble(inOligos);\r
- }\r
- else{\r
- m->gobble(inOligos);\r
- //make type case insensitive\r
- for(int i=0;i<type.length();i++){ type[i] = toupper(type[i]); }\r
- \r
- inOligos >> oligo;\r
- \r
- for(int i=0;i<oligo.length();i++){\r
- oligo[i] = toupper(oligo[i]);\r
- if(oligo[i] == 'U') { oligo[i] = 'T'; }\r
- }\r
- \r
- if(type == "FORWARD"){\r
- group = "";\r
- \r
- // get rest of line in case there is a primer name\r
- while (!inOligos.eof()) { \r
- char c = inOligos.get(); \r
- if (c == 10 || c == 13 || c == -1){ break; }\r
- else if (c == 32 || c == 9){;} //space or tab\r
- else { group += c; }\r
- } \r
- \r
- //check for repeat barcodes\r
- map<string, int>::iterator itPrime = primers.find(oligo);\r
- if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); }\r
- \r
- primers[oligo]=indexPrimer; indexPrimer++; \r
- primerNameVector.push_back(group);\r
- }else if(type == "REVERSE"){\r
- //Sequence oligoRC("reverse", oligo);\r
- //oligoRC.reverseComplement();\r
- string oligoRC = reverseOligo(oligo);\r
- revPrimer.push_back(oligoRC);\r
- }\r
- else if(type == "BARCODE"){\r
- inOligos >> group;\r
- \r
- //check for repeat barcodes\r
- map<string, int>::iterator itBar = barcodes.find(oligo);\r
- if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); }\r
- \r
- barcodes[oligo]=indexBarcode; indexBarcode++;\r
- barcodeNameVector.push_back(group);\r
- }else if(type == "LINKER"){\r
- linker.push_back(oligo);\r
- }else if(type == "SPACER"){\r
- spacer.push_back(oligo);\r
- }\r
- else{ m->mothurOut("[WARNING]: " + type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); }\r
- }\r
- m->gobble(inOligos);\r
- } \r
- inOligos.close();\r
- \r
- if(barcodeNameVector.size() == 0 && primerNameVector[0] == ""){ split = 1; }\r
- \r
- //add in potential combos\r
- if(barcodeNameVector.size() == 0){\r
- barcodes[""] = 0;\r
- barcodeNameVector.push_back(""); \r
- }\r
- \r
- if(primerNameVector.size() == 0){\r
- primers[""] = 0;\r
- primerNameVector.push_back(""); \r
+ if (m->control_pressed) { return false; } //error in reading oligos\r
+ \r
+ if (oligosObject->hasPairedBarcodes()) {\r
+ pairedOligos = true;\r
+ m->mothurOut("[ERROR]: sffinfo does not support paired barcodes and primers, aborting.\n"); m->control_pressed = true; return true;\r
+ }else {\r
+ pairedOligos = false;\r
+ numFPrimers = oligosObject->getPrimers().size();\r
+ numBarcodes = oligosObject->getBarcodes().size();\r
+ }\r
+ \r
+ numLinkers = oligosObject->getLinkers().size();\r
+ numSpacers = oligosObject->getSpacers().size();\r
+ numRPrimers = oligosObject->getReversePrimers().size();\r
+ \r
+ vector<string> groupNames = oligosObject->getGroupNames();\r
+ if (groupNames.size() == 0) { allBlank = true; }\r
+ \r
+ filehandles.resize(oligosObject->getBarcodeNames().size());\r
+ for(int i=0;i<filehandles.size();i++){\r
+ for(int j=0;j<oligosObject->getPrimerNames().size();j++){ filehandles[i].push_back(""); }\r
}\r
- \r
- filehandles.resize(barcodeNameVector.size());\r
- for (int i = 0; i < filehandles.size(); i++) {\r
- for (int j = 0; j < primerNameVector.size(); j++) {\r
- ofstream* temp;\r
- map<string, ofstream*> myMap; myMap[""] = temp;\r
- filehandles[i].push_back(myMap);\r
+ \r
+ if(split > 1){\r
+ set<string> uniqueNames; //used to cleanup outputFileNames\r
+ map<string, int> barcodes = oligosObject->getBarcodes() ;\r
+ map<string, int> primers = oligosObject->getPrimers();\r
+ for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){\r
+ for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){\r
+ \r
+ string primerName = oligosObject->getPrimerName(itPrimer->second);\r
+ string barcodeName = oligosObject->getBarcodeName(itBar->second);\r
+ \r
+ if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing\r
+ else if ((primerName == "") && (barcodeName == "")) { } //do nothing\r
+ else {\r
+ string comboGroupName = "";\r
+ string fastaFileName = "";\r
+ string qualFileName = "";\r
+ string nameFileName = "";\r
+ string countFileName = "";\r
+ \r
+ if(primerName == ""){\r
+ comboGroupName = barcodeName;\r
+ }else{\r
+ if(barcodeName == ""){\r
+ comboGroupName = primerName;\r
+ }\r
+ else{\r
+ comboGroupName = barcodeName + "." + primerName;\r
+ }\r
+ }\r
+ \r
+ ofstream temp;\r
+ map<string, string> variables;\r
+ variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(currentFileName));\r
+ variables["[group]"] = comboGroupName;\r
+ string thisFilename = getOutputFileName("sff",variables);\r
+ if (uniqueNames.count(thisFilename) == 0) {\r
+ outputNames.push_back(thisFilename);\r
+ outputTypes["sff"].push_back(thisFilename);\r
+ uniqueNames.insert(thisFilename);\r
+ }\r
+ \r
+ filehandles[itBar->second][itPrimer->second] = thisFilename;\r
+ temp.open(thisFilename.c_str(), ios::binary); temp.close();\r
+ }\r
+ }\r
}\r
}\r
- \r
- if(split > 1){\r
- set<string> uniqueNames; //used to cleanup outputFileNames\r
- for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){\r
- for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){\r
- \r
- string primerName = primerNameVector[itPrimer->second];\r
- string barcodeName = barcodeNameVector[itBar->second];\r
- \r
- string comboGroupName = "";\r
- string fastaFileName = "";\r
- string qualFileName = "";\r
- string nameFileName = "";\r
- \r
- if(primerName == ""){\r
- comboGroupName = barcodeNameVector[itBar->second];\r
- }\r
- else{\r
- if(barcodeName == ""){\r
- comboGroupName = primerNameVector[itPrimer->second];\r
- }\r
- else{\r
- comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second];\r
- }\r
- }\r
- \r
- ofstream* temp = new ofstream;\r
- map<string, string> variables; \r
- variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(currentFileName));\r
- variables["[group]"] = comboGroupName;\r
- string thisFilename = getOutputFileName("sff",variables);\r
- if (uniqueNames.count(thisFilename) == 0) {\r
- outputNames.push_back(thisFilename);\r
- outputTypes["sff"].push_back(thisFilename);\r
- uniqueNames.insert(thisFilename);\r
- }\r
- \r
- map<string, ofstream*> myMap; myMap[thisFilename] = temp;\r
- m->openOutputFileBinary(thisFilename, *(temp));\r
- filehandles[itBar->second][itPrimer->second] = myMap;\r
- map<string, ofstream*>::iterator itOfstream = filehandles[itBar->second][itPrimer->second].find("");\r
- if (itOfstream != filehandles[itBar->second][itPrimer->second].end()) { filehandles[itBar->second][itPrimer->second].erase(itOfstream); } //remove blank entry so we dont mess with .begin() above. code above assumes only 1 file name in the map\r
- }\r
- }\r
- }\r
- numFPrimers = primers.size();\r
- numLinkers = linker.size();\r
- numSpacers = spacer.size();\r
- map<string, string> variables; \r
+ \r
+ map<string, string> variables;\r
variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(currentFileName));\r
variables["[group]"] = "scrap";\r
noMatchFile = getOutputFileName("sff",variables);\r
m->mothurRemove(noMatchFile);\r
numNoMatch = 0;\r
- \r
- bool allBlank = true;\r
- for (int i = 0; i < barcodeNameVector.size(); i++) {\r
- if (barcodeNameVector[i] != "") {\r
- allBlank = false;\r
- break;\r
- }\r
- }\r
- for (int i = 0; i < primerNameVector.size(); i++) {\r
- if (primerNameVector[i] != "") {\r
- allBlank = false;\r
- break;\r
- }\r
- }\r
\r
filehandlesHeaders.resize(filehandles.size());\r
numSplitReads.resize(filehandles.size());\r
- for (int i = 0; i < filehandles.size(); i++) { \r
- numSplitReads[i].resize(filehandles[i].size(), 0); \r
+ for (int i = 0; i < filehandles.size(); i++) {\r
+ numSplitReads[i].resize(filehandles[i].size(), 0);\r
for (int j = 0; j < filehandles[i].size(); j++) {\r
- ofstream* temp = new ofstream;\r
- map<string, ofstream* > myMap;\r
- string thisHeader = (filehandles[i][j].begin())->first+"headers";\r
- myMap[thisHeader] = temp;\r
- m->openOutputFileBinary(thisHeader, *(temp));\r
- filehandlesHeaders[i].push_back(myMap);\r
+ filehandlesHeaders[i].push_back(filehandles[i][j]+"headers");\r
}\r
}\r
\r
exit(1);\r
}\r
}\r
+//***************************************************************************************************************\r
+\r
+bool SffInfoCommand::readGroup(string oligoFile){\r
+ try {\r
+ filehandles.clear();\r
+ numSplitReads.clear();\r
+ filehandlesHeaders.clear();\r
+ \r
+ groupMap = new GroupMap();\r
+ groupMap->readMap(oligoFile);\r
+ \r
+ //like barcodeNameVector - no primer names\r
+ vector<string> groups = groupMap->getNamesOfGroups();\r
+ \r
+ filehandles.resize(groups.size());\r
+ for (int i = 0; i < filehandles.size(); i++) {\r
+ for (int j = 0; j < 1; j++) {\r
+ \r
+ map<string, string> variables;\r
+ variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(currentFileName));\r
+ variables["[group]"] = groups[i];\r
+ string thisFilename = getOutputFileName("sff",variables);\r
+ outputNames.push_back(thisFilename);\r
+ outputTypes["sff"].push_back(thisFilename);\r
+ \r
+ ofstream temp;\r
+ m->openOutputFileBinary(thisFilename, temp); temp.close();\r
+ filehandles[i].push_back(thisFilename);\r
+ }\r
+ }\r
+ \r
+ map<string, string> variables;\r
+ variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(currentFileName));\r
+ variables["[group]"] = "scrap";\r
+ noMatchFile = getOutputFileName("sff",variables);\r
+ m->mothurRemove(noMatchFile);\r
+ numNoMatch = 0;\r
+ \r
+ \r
+ filehandlesHeaders.resize(groups.size());\r
+ numSplitReads.resize(filehandles.size());\r
+ for (int i = 0; i < filehandles.size(); i++) {\r
+ numSplitReads[i].resize(filehandles[i].size(), 0);\r
+ for (int j = 0; j < filehandles[i].size(); j++) {\r
+ ofstream temp ;\r
+ string thisHeader = filehandles[i][j]+"headers";\r
+ m->openOutputFileBinary(thisHeader, temp); temp.close();\r
+ filehandlesHeaders[i].push_back(thisHeader);\r
+ }\r
+ }\r
+ \r
+ return true;\r
+ \r
+ }\r
+ catch(exception& e) {\r
+ m->errorOut(e, "SffInfoCommand", "readGroup");\r
+ exit(1);\r
+ }\r
+}\r
+\r
//********************************************************************/\r
string SffInfoCommand::reverseOligo(string oligo){\r
try {\r