X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=sffinfocommand.cpp;h=dc8eb54abb749bffe4d0ffab55e53331f1933b32;hb=fefd5ee1517abd3bc38b469cb2dffc85a1571c7e;hp=bccb75122ffbc9cebc5862f00df0b17d9e644f51;hpb=ac03f1f6c27b5bfdf2cfb6d45c3667c3e0281f51;p=mothur.git diff --git a/sffinfocommand.cpp b/sffinfocommand.cpp index bccb751..dc8eb54 100755 --- a/sffinfocommand.cpp +++ b/sffinfocommand.cpp @@ -17,7 +17,9 @@ vector SffInfoCommand::setParameters(){ try { CommandParameter psff("sff", "InputTypes", "", "", "none", "none", "none","",false,false,true); parameters.push_back(psff); - CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(poligos); + CommandParameter poligos("oligos", "InputTypes", "", "", "oligosGroup", "none", "none","",false,false); parameters.push_back(poligos); + CommandParameter preorient("checkorient", "Boolean", "", "F", "", "", "","",false,false,true); parameters.push_back(preorient); + CommandParameter pgroup("group", "InputTypes", "", "", "oligosGroup", "none", "none","",false,false); parameters.push_back(pgroup); CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(paccnos); CommandParameter psfftxt("sfftxt", "String", "", "", "", "", "","",false,false); parameters.push_back(psfftxt); CommandParameter pflow("flow", "Boolean", "", "T", "", "", "","flow",false,false); parameters.push_back(pflow); @@ -46,16 +48,18 @@ string SffInfoCommand::getHelpString(){ try { string helpString = ""; helpString += "The sffinfo command reads a sff file and extracts the sequence data, or you can use it to parse a sfftxt file.\n"; - helpString += "The sffinfo command parameters are sff, fasta, qfile, accnos, flow, sfftxt, oligos, bdiffs, tdiffs, ldiffs, sdiffs, pdiffs and trim. sff is required. \n"; + helpString += "The sffinfo command parameters are sff, fasta, qfile, accnos, flow, sfftxt, oligos, group, bdiffs, tdiffs, ldiffs, sdiffs, pdiffs, checkorient and trim. sff is required. \n"; helpString += "The sff parameter allows you to enter the sff file you would like to extract data from. You may enter multiple files by separating them by -'s.\n"; helpString += "The fasta parameter allows you to indicate if you would like a fasta formatted file generated. Default=True. \n"; helpString += "The qfile parameter allows you to indicate if you would like a quality file generated. Default=True. \n"; helpString += "The oligos parameter allows you to provide an oligos file to split your sff file into separate sff files by barcode. \n"; + helpString += "The group parameter allows you to provide a group file to split your sff file into separate sff files by group. \n"; helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n"; helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n"; helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n"; helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n"; helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n"; + helpString += "The checkorient parameter will check look for the reverse compliment of the barcode or primer in the sequence. The default is false.\n"; helpString += "The flow parameter allows you to indicate if you would like a flowgram file generated. Default=True. \n"; helpString += "The sfftxt parameter allows you to indicate if you would like a sff.txt file generated. Default=False. \n"; helpString += "If you want to parse an existing sfftxt file into flow, fasta and quality file, enter the file name using the sfftxt parameter. \n"; @@ -112,7 +116,7 @@ SffInfoCommand::SffInfoCommand(){ SffInfoCommand::SffInfoCommand(string option) { try { abort = false; calledHelp = false; - hasAccnos = false; hasOligos = false; + hasAccnos = false; hasOligos = false; hasGroup = false; split = 1; //allow user to run help @@ -293,7 +297,7 @@ SffInfoCommand::SffInfoCommand(string option) { bool ignore = false; if (oligosFileNames[i] == "current") { oligosFileNames[i] = m->getOligosFile(); - if (oligosFileNames[i] != "") { m->mothurOut("Using " + oligosFileNames[i] + " as input file for the accnos parameter where you had given current."); m->mothurOutEndLine(); } + if (oligosFileNames[i] != "") { m->mothurOut("Using " + oligosFileNames[i] + " as input file for the oligos parameter where you had given current."); m->mothurOutEndLine(); } else { m->mothurOut("You have no current oligosfile, ignoring current."); m->mothurOutEndLine(); ignore=true; //erase from file list @@ -349,12 +353,87 @@ SffInfoCommand::SffInfoCommand(string option) { //make sure there is at least one valid file left if (oligosFileNames.size() == 0) { m->mothurOut("no valid oligos files."); m->mothurOutEndLine(); abort = true; } } + + groupfile = validParameter.validFile(parameters, "group", false); + if (groupfile == "not found") { groupfile = ""; } + else { + hasGroup = true; + m->splitAtDash(groupfile, groupFileNames); + + //go through files and make sure they are good, if not, then disregard them + for (int i = 0; i < groupFileNames.size(); i++) { + bool ignore = false; + if (groupFileNames[i] == "current") { + groupFileNames[i] = m->getGroupFile(); + if (groupFileNames[i] != "") { m->mothurOut("Using " + groupFileNames[i] + " as input file for the group parameter where you had given current."); m->mothurOutEndLine(); } + else { + m->mothurOut("You have no current group file, ignoring current."); m->mothurOutEndLine(); ignore=true; + //erase from file list + groupFileNames.erase(groupFileNames.begin()+i); + i--; + } + } + + if (!ignore) { + + if (inputDir != "") { + string path = m->hasPath(groupFileNames[i]); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { groupFileNames[i] = inputDir + groupFileNames[i]; } + } + + ifstream in; + int ableToOpen = m->openInputFile(groupFileNames[i], in, "noerror"); + + //if you can't open it, try default location + if (ableToOpen == 1) { + if (m->getDefaultPath() != "") { //default path is set + string tryPath = m->getDefaultPath() + m->getSimpleName(groupFileNames[i]); + m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine(); + ifstream in2; + ableToOpen = m->openInputFile(tryPath, in2, "noerror"); + in2.close(); + groupFileNames[i] = tryPath; + } + } + //if you can't open it, try default location + if (ableToOpen == 1) { + if (m->getOutputDir() != "") { //default path is set + string tryPath = m->getOutputDir() + m->getSimpleName(groupFileNames[i]); + m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine(); + ifstream in2; + ableToOpen = m->openInputFile(tryPath, in2, "noerror"); + in2.close(); + groupFileNames[i] = tryPath; + } + } + in.close(); + + if (ableToOpen == 1) { + m->mothurOut("Unable to open " + groupFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); + //erase from file list + groupFileNames.erase(groupFileNames.begin()+i); + i--; + } + } + } + + //make sure there is at least one valid file left + if (groupFileNames.size() == 0) { m->mothurOut("no valid group files."); m->mothurOutEndLine(); abort = true; } + } - if (hasOligos) { + if (hasGroup) { + split = 2; + if (groupFileNames.size() != filenames.size()) { abort = true; m->mothurOut("If you provide a group file, you must have one for each sff file."); m->mothurOutEndLine(); } + } + + if (hasOligos) { split = 2; - if (oligosFileNames.size() != filenames.size()) { abort = true; m->mothurOut("If you provide a oligos file, you must have one for each sff file."); m->mothurOutEndLine(); } + if (oligosFileNames.size() != filenames.size()) { abort = true; m->mothurOut("If you provide an oligos file, you must have one for each sff file."); m->mothurOutEndLine(); } } + if (hasGroup && hasOligos) { m->mothurOut("You must enter ONLY ONE of the following: oligos or group."); m->mothurOutEndLine(); abort = true;} + if (hasAccnos) { if (accnosFileNames.size() != filenames.size()) { abort = true; m->mothurOut("If you provide a accnos file, you must have one for each sff file."); m->mothurOutEndLine(); } } @@ -415,6 +494,8 @@ SffInfoCommand::SffInfoCommand(string option) { else { m->mothurOut("[ERROR]: you must provide a valid sff or sfftxt file."); m->mothurOutEndLine(); abort=true; } } + temp = validParameter.validFile(parameters, "checkorient", false); if (temp == "not found") { temp = "F"; } + reorient = m->isTrue(temp); } } @@ -442,7 +523,8 @@ int SffInfoCommand::execute(){ string oligos = ""; if (hasOligos) { oligos = oligosFileNames[s]; } - + if (hasGroup) { oligos = groupFileNames[s]; } + int numReads = extractSffInfo(filenames[s], accnos, oligos); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to extract " + toString(numReads) + "."); @@ -485,14 +567,24 @@ int SffInfoCommand::execute(){ //********************************************************************************************************************** int SffInfoCommand::extractSffInfo(string input, string accnos, string oligos){ try { + oligosObject = new Oligos(); currentFileName = input; if (outputDir == "") { outputDir += m->hasPath(input); } if (accnos != "") { readAccnosFile(accnos); } else { seqNames.clear(); } - - if (oligos != "") { readOligos(oligos); split = 2; } - + + TrimOligos* trimOligos = NULL; TrimOligos* rtrimOligos = NULL; + if (hasOligos) { + readOligos(oligos); split = 2; + if (m->control_pressed) { delete oligosObject; return 0; } + trimOligos = new TrimOligos(pdiffs, bdiffs, ldiffs, sdiffs, oligosObject->getPrimers(), oligosObject->getBarcodes(), oligosObject->getReversePrimers(), oligosObject->getLinkers(), oligosObject->getSpacers()); numFPrimers = oligosObject->getPrimers().size(); numBarcodes = oligosObject->getBarcodes().size(); + if (reorient) { + rtrimOligos = new TrimOligos(pdiffs, bdiffs, 0, 0, oligosObject->getReorientedPairedPrimers(), oligosObject->getReorientedPairedBarcodes()); numBarcodes = oligosObject->getReorientedPairedBarcodes().size(); + } + } + if (hasGroup) { readGroup(oligos); split = 2; } + ofstream outSfftxt, outFasta, outQual, outFlow; string outFastaFileName, outQualFileName; string rootName = outputDir + m->getRootName(m->getSimpleName(input)); @@ -520,13 +612,13 @@ int SffInfoCommand::extractSffInfo(string input, string accnos, string oligos){ int count = 0; //check magic number and version - if (header.magicNumber != 779314790) { m->mothurOut("Magic Number is not correct, not a valid .sff file"); m->mothurOutEndLine(); return count; } - if (header.version != "0001") { m->mothurOut("Version is not supported, only support version 0001."); m->mothurOutEndLine(); return count; } + if (header.magicNumber != 779314790) { m->mothurOut("Magic Number is not correct, not a valid .sff file"); m->mothurOutEndLine(); delete oligosObject; if (hasOligos) { delete trimOligos; if (reorient) { delete rtrimOligos; } } return count; } + if (header.version != "0001") { m->mothurOut("Version is not supported, only support version 0001."); m->mothurOutEndLine(); delete oligosObject; if (hasOligos) { delete trimOligos; if (reorient) { delete rtrimOligos; } } return count; } //print common header if (sfftxt) { printCommonHeader(outSfftxt, header); } if (flow) { outFlow << header.numFlowsPerRead << endl; } - + //read through the sff file while (!in.eof()) { @@ -534,7 +626,7 @@ int SffInfoCommand::extractSffInfo(string input, string accnos, string oligos){ //read data seqRead read; Header readheader; - readSeqData(in, read, header.numFlowsPerRead, readheader); + readSeqData(in, read, header.numFlowsPerRead, readheader, trimOligos, rtrimOligos); bool okay = sanityCheck(readheader, read); if (!okay) { break; } @@ -551,7 +643,7 @@ int SffInfoCommand::extractSffInfo(string input, string accnos, string oligos){ } count++; - + //report progress if((count+1) % 10000 == 0){ m->mothurOut(toString(count+1)); m->mothurOutEndLine(); } @@ -574,13 +666,7 @@ int SffInfoCommand::extractSffInfo(string input, string accnos, string oligos){ //create new common headers for each file with the correct number of reads adjustCommonHeader(header); - //close files and delete ofstreams - for(int i=0;isecond)->close(); delete (filehandles[i][j].begin()->second); - (filehandlesHeaders[i][j].begin()->second)->close(); delete (filehandlesHeaders[i][j].begin()->second); - } - } + if (hasGroup) { delete groupMap; } //cout << "here" << endl; map::iterator it; @@ -588,13 +674,13 @@ int SffInfoCommand::extractSffInfo(string input, string accnos, string oligos){ for(int i=0;ifirst != "") { - if (namesToRemove.count(filehandles[i][j].begin()->first) == 0) { - if(m->isBlank(filehandles[i][j].begin()->first)){ + if (filehandles[i][j] != "") { + if (namesToRemove.count(filehandles[i][j]) == 0) { + if(m->isBlank(filehandles[i][j])){ //cout << i << '\t' << '\t' << j << '\t' << filehandles[i][j] << " is blank removing" << endl; - m->mothurRemove(filehandles[i][j].begin()->first); - m->mothurRemove(filehandlesHeaders[i][j].begin()->first); - namesToRemove.insert(filehandles[i][j].begin()->first); + m->mothurRemove(filehandles[i][j]); + m->mothurRemove(filehandlesHeaders[i][j]); + namesToRemove.insert(filehandles[i][j]); } } } @@ -604,11 +690,13 @@ int SffInfoCommand::extractSffInfo(string input, string accnos, string oligos){ //append new header to reads for (int i = 0; i < filehandles.size(); i++) { for (int j = 0; j < filehandles[i].size(); j++) { - m->appendBinaryFiles(filehandles[i][j].begin()->first, filehandlesHeaders[i][j].begin()->first); - m->renameFile(filehandlesHeaders[i][j].begin()->first, filehandles[i][j].begin()->first); - m->mothurRemove(filehandlesHeaders[i][j].begin()->first); - //cout << i << '\t' << '\t' << j << '\t' << filehandles[i][j] << " done appending headers and removing " << filehandlesHeaders[i][j] << endl; - if (numSplitReads[i][j] == 0) { m->mothurRemove(filehandles[i][j].begin()->first); } + if (filehandles[i][j] != "") { + m->appendBinaryFiles(filehandles[i][j], filehandlesHeaders[i][j]); + m->renameFile(filehandlesHeaders[i][j], filehandles[i][j]); + m->mothurRemove(filehandlesHeaders[i][j]); + //cout << i << '\t' << '\t' << j << '\t' << filehandles[i][j] << " done appending headers and removing " << filehandlesHeaders[i][j] << endl; + if (numSplitReads[i][j] == 0) { m->mothurRemove(filehandles[i][j]); } + } } } //cout << "here3" << endl; @@ -625,6 +713,9 @@ int SffInfoCommand::extractSffInfo(string input, string accnos, string oligos){ else { outputNames.push_back(noMatchFile); outputTypes["sff"].push_back(noMatchFile); } } + delete oligosObject; + if (hasOligos) { delete trimOligos; if (reorient) { delete rtrimOligos; } } + return count; } catch(exception& e) { @@ -733,7 +824,10 @@ int SffInfoCommand::adjustCommonHeader(CommonHeader header){ in.read(mybuffer,4); for (int i = 0; i < filehandlesHeaders.size(); i++) { for (int j = 0; j < filehandlesHeaders[i].size(); j++) { - (*(filehandlesHeaders[i][j].begin()->second)).write(mybuffer, in.gcount()); + ofstream out; + m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out); + out.write(mybuffer, in.gcount()); + out.close(); } } outNoMatchHeader.write(mybuffer, in.gcount()); @@ -744,7 +838,10 @@ int SffInfoCommand::adjustCommonHeader(CommonHeader header){ in.read(mybuffer,4); for (int i = 0; i < filehandlesHeaders.size(); i++) { for (int j = 0; j < filehandlesHeaders[i].size(); j++) { - (*(filehandlesHeaders[i][j].begin()->second)).write(mybuffer, in.gcount()); + ofstream out; + m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out); + out.write(mybuffer, in.gcount()); + out.close(); } } outNoMatchHeader.write(mybuffer, in.gcount()); @@ -765,7 +862,10 @@ int SffInfoCommand::adjustCommonHeader(CommonHeader header){ thisbuffer[7] = offset & 0xFF; for (int i = 0; i < filehandlesHeaders.size(); i++) { for (int j = 0; j < filehandlesHeaders[i].size(); j++) { - (*(filehandlesHeaders[i][j].begin()->second)).write(thisbuffer, 8); + ofstream out; + m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out); + out.write(thisbuffer, 8); + out.close(); } } outNoMatchHeader.write(thisbuffer, 8); @@ -784,7 +884,10 @@ int SffInfoCommand::adjustCommonHeader(CommonHeader header){ thisbuffer2[3] = offset & 0xFF; for (int i = 0; i < filehandlesHeaders.size(); i++) { for (int j = 0; j < filehandlesHeaders[i].size(); j++) { - (*(filehandlesHeaders[i][j].begin()->second)).write(thisbuffer2, 4); + ofstream out; + m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out); + out.write(thisbuffer2, 4); + out.close(); } } outNoMatchHeader.write(thisbuffer2, 4); @@ -809,7 +912,10 @@ int SffInfoCommand::adjustCommonHeader(CommonHeader header){ thisbuffer[2] = (numSplitReads[i][j] >> 16) & 0xFF; thisbuffer[3] = (numSplitReads[i][j] >> 24) & 0xFF; } - (*(filehandlesHeaders[i][j].begin()->second)).write(thisbuffer, 4); + ofstream out; + m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out); + out.write(thisbuffer, 4); + out.close(); delete[] thisbuffer; } } @@ -834,7 +940,10 @@ int SffInfoCommand::adjustCommonHeader(CommonHeader header){ in.read(mybuffer,2); for (int i = 0; i < filehandlesHeaders.size(); i++) { for (int j = 0; j < filehandlesHeaders[i].size(); j++) { - (*(filehandlesHeaders[i][j].begin()->second)).write(mybuffer, in.gcount()); + ofstream out; + m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out); + out.write(mybuffer, in.gcount()); + out.close(); } } outNoMatchHeader.write(mybuffer, in.gcount()); @@ -845,7 +954,10 @@ int SffInfoCommand::adjustCommonHeader(CommonHeader header){ in.read(mybuffer,2); for (int i = 0; i < filehandlesHeaders.size(); i++) { for (int j = 0; j < filehandlesHeaders[i].size(); j++) { - (*(filehandlesHeaders[i][j].begin()->second)).write(mybuffer, in.gcount()); + ofstream out; + m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out); + out.write(mybuffer, in.gcount()); + out.close(); } } outNoMatchHeader.write(mybuffer, in.gcount()); @@ -856,7 +968,10 @@ int SffInfoCommand::adjustCommonHeader(CommonHeader header){ in.read(mybuffer,2); for (int i = 0; i < filehandlesHeaders.size(); i++) { for (int j = 0; j < filehandlesHeaders[i].size(); j++) { - (*(filehandlesHeaders[i][j].begin()->second)).write(mybuffer, in.gcount()); + ofstream out; + m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out); + out.write(mybuffer, in.gcount()); + out.close(); } } outNoMatchHeader.write(mybuffer, in.gcount()); @@ -867,7 +982,10 @@ int SffInfoCommand::adjustCommonHeader(CommonHeader header){ in.read(mybuffer,1); for (int i = 0; i < filehandlesHeaders.size(); i++) { for (int j = 0; j < filehandlesHeaders[i].size(); j++) { - (*(filehandlesHeaders[i][j].begin()->second)).write(mybuffer, in.gcount()); + ofstream out; + m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out); + out.write(mybuffer, in.gcount()); + out.close(); } } outNoMatchHeader.write(mybuffer, in.gcount()); @@ -878,7 +996,10 @@ int SffInfoCommand::adjustCommonHeader(CommonHeader header){ in.read(mybuffer,header.numFlowsPerRead); for (int i = 0; i < filehandlesHeaders.size(); i++) { for (int j = 0; j < filehandlesHeaders[i].size(); j++) { - (*(filehandlesHeaders[i][j].begin()->second)).write(mybuffer, in.gcount()); + ofstream out; + m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out); + out.write(mybuffer, in.gcount()); + out.close(); } } outNoMatchHeader.write(mybuffer, in.gcount()); @@ -889,7 +1010,10 @@ int SffInfoCommand::adjustCommonHeader(CommonHeader header){ in.read(mybuffer,header.keyLength); for (int i = 0; i < filehandlesHeaders.size(); i++) { for (int j = 0; j < filehandlesHeaders[i].size(); j++) { - (*(filehandlesHeaders[i][j].begin()->second)).write(mybuffer, in.gcount()); + ofstream out; + m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out); + out.write(mybuffer, in.gcount()); + out.close(); } } outNoMatchHeader.write(mybuffer, in.gcount()); @@ -904,7 +1028,10 @@ int SffInfoCommand::adjustCommonHeader(CommonHeader header){ mybuffer = new char[spot-spotInFile]; for (int i = 0; i < filehandlesHeaders.size(); i++) { for (int j = 0; j < filehandlesHeaders[i].size(); j++) { - (*(filehandlesHeaders[i][j].begin()->second)).write(mybuffer, spot-spotInFile); + ofstream out; + m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out); + out.write(mybuffer, spot-spotInFile); + out.close(); } } outNoMatchHeader.write(mybuffer, spot-spotInFile); @@ -925,7 +1052,7 @@ int SffInfoCommand::adjustCommonHeader(CommonHeader header){ } } //********************************************************************************************************************** -bool SffInfoCommand::readSeqData(ifstream& in, seqRead& read, int numFlowReads, Header& header){ +bool SffInfoCommand::readSeqData(ifstream& in, seqRead& read, int numFlowReads, Header& header, TrimOligos*& trimOligos, TrimOligos*& rtrimOligos){ try { unsigned long long startSpotInFile = in.tellg(); if (!in.eof()) { @@ -1031,9 +1158,12 @@ bool SffInfoCommand::readSeqData(ifstream& in, seqRead& read, int numFlowReads, if (split > 1) { - int barcodeIndex, primerIndex; - int trashCodeLength = findGroup(header, read, barcodeIndex, primerIndex); - + int barcodeIndex, primerIndex, trashCodeLength; + + if (hasOligos) { trashCodeLength = findGroup(header, read, barcodeIndex, primerIndex, trimOligos, rtrimOligos); } + else if (hasGroup) { trashCodeLength = findGroup(header, read, barcodeIndex, primerIndex, "groupMode"); } + else { m->mothurOut("[ERROR]: uh oh, we shouldn't be here...\n"); } + char * mybuffer; mybuffer = new char [spot-startSpotInFile]; @@ -1044,7 +1174,10 @@ bool SffInfoCommand::readSeqData(ifstream& in, seqRead& read, int numFlowReads, if(trashCodeLength == 0){ - (*(filehandles[barcodeIndex][primerIndex].begin()->second)).write(mybuffer, in2.gcount()); + ofstream out; + m->openOutputFileBinaryAppend(filehandles[barcodeIndex][primerIndex], out); + out.write(mybuffer, in2.gcount()); + out.close(); numSplitReads[barcodeIndex][primerIndex]++; } else{ @@ -1072,10 +1205,8 @@ bool SffInfoCommand::readSeqData(ifstream& in, seqRead& read, int numFlowReads, } } //********************************************************************************************************************** -int SffInfoCommand::findGroup(Header header, seqRead read, int& barcode, int& primer) { +int SffInfoCommand::findGroup(Header header, seqRead read, int& barcode, int& primer, TrimOligos*& trimOligos, TrimOligos*& rtrimOligos) { try { - //find group read belongs to - TrimOligos trimOligos(pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, revPrimer, linker, spacer); int success = 1; string trashCode = ""; @@ -1112,39 +1243,84 @@ int SffInfoCommand::findGroup(Header header, seqRead read, int& barcode, int& pr Sequence currSeq(header.name, seq); QualityScores currQual; + //for reorient + Sequence savedSeq(currSeq.getName(), currSeq.getAligned()); + QualityScores savedQual(currQual.getName(), currQual.getScores()); + if(numLinkers != 0){ - success = trimOligos.stripLinker(currSeq, currQual); + success = trimOligos->stripLinker(currSeq, currQual); if(success > ldiffs) { trashCode += 'k'; } else{ currentSeqsDiffs += success; } } - if(barcodes.size() != 0){ - success = trimOligos.stripBarcode(currSeq, currQual, barcode); + if(numBarcodes != 0){ + success = trimOligos->stripBarcode(currSeq, currQual, barcode); if(success > bdiffs) { trashCode += 'b'; } else{ currentSeqsDiffs += success; } } if(numSpacers != 0){ - success = trimOligos.stripSpacer(currSeq, currQual); + success = trimOligos->stripSpacer(currSeq, currQual); if(success > sdiffs) { trashCode += 's'; } else{ currentSeqsDiffs += success; } } if(numFPrimers != 0){ - success = trimOligos.stripForward(currSeq, currQual, primer, true); + success = trimOligos->stripForward(currSeq, currQual, primer, true); if(success > pdiffs) { trashCode += 'f'; } else{ currentSeqsDiffs += success; } } if (currentSeqsDiffs > tdiffs) { trashCode += 't'; } - if(revPrimer.size() != 0){ - success = trimOligos.stripReverse(currSeq, currQual); + if(numRPrimers != 0){ + success = trimOligos->stripReverse(currSeq, currQual); if(!success) { trashCode += 'r'; } } + if (reorient && (trashCode != "")) { //if you failed and want to check the reverse + int thisSuccess = 0; + string thisTrashCode = ""; + int thisCurrentSeqsDiffs = 0; + + int thisBarcodeIndex = 0; + int thisPrimerIndex = 0; + //cout << currSeq.getName() << '\t' << savedSeq.getUnaligned() << endl; + if(numBarcodes != 0){ + thisSuccess = rtrimOligos->stripBarcode(savedSeq, savedQual, thisBarcodeIndex); + if(thisSuccess > bdiffs) { thisTrashCode += "b"; } + else{ thisCurrentSeqsDiffs += thisSuccess; } + } + //cout << currSeq.getName() << '\t' << savedSeq.getUnaligned() << endl; + if(numFPrimers != 0){ + thisSuccess = rtrimOligos->stripForward(savedSeq, savedQual, thisPrimerIndex, true); + if(thisSuccess > pdiffs) { thisTrashCode += "f"; } + else{ thisCurrentSeqsDiffs += thisSuccess; } + } + + if (thisCurrentSeqsDiffs > tdiffs) { thisTrashCode += 't'; } + + if (thisTrashCode == "") { + trashCode = thisTrashCode; + success = thisSuccess; + currentSeqsDiffs = thisCurrentSeqsDiffs; + barcode = thisBarcodeIndex; + primer = thisPrimerIndex; + savedSeq.reverseComplement(); + currSeq.setAligned(savedSeq.getAligned()); + savedQual.flipQScores(); + currQual.setScores(savedQual.getScores()); + }else { trashCode += "(" + thisTrashCode + ")"; } + } + + if (trashCode.length() == 0) { //is this sequence in the ignore group + string thisGroup = oligosObject->getGroupName(barcode, primer); + + int pos = thisGroup.find("ignore"); + if (pos != string::npos) { trashCode += "i"; } + } return trashCode.length(); } @@ -1152,7 +1328,23 @@ int SffInfoCommand::findGroup(Header header, seqRead read, int& barcode, int& pr m->errorOut(e, "SffInfoCommand", "findGroup"); exit(1); } -} +} +//********************************************************************************************************************** +int SffInfoCommand::findGroup(Header header, seqRead read, int& barcode, int& primer, string groupMode) { + try { + string trashCode = ""; + primer = 0; + + string group = groupMap->getGroup(header.name); + if (group == "not found") { trashCode += "g"; } //scrap for group + + return trashCode.length(); + } + catch(exception& e) { + m->errorOut(e, "SffInfoCommand", "findGroup"); + exit(1); + } +} //********************************************************************************************************************** int SffInfoCommand::decodeName(string& timestamp, string& region, string& xy, string name) { try { @@ -1675,178 +1867,93 @@ bool SffInfoCommand::readOligos(string oligoFile){ numSplitReads.clear(); filehandlesHeaders.clear(); - ifstream inOligos; - m->openInputFile(oligoFile, inOligos); - - string type, oligo, group; + bool allBlank = false; + oligosObject->read(oligoFile); - int indexPrimer = 0; - int indexBarcode = 0; - - while(!inOligos.eof()){ - - inOligos >> type; - - if(type[0] == '#'){ - while (!inOligos.eof()) { char c = inOligos.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there - m->gobble(inOligos); - } - else{ - m->gobble(inOligos); - //make type case insensitive - for(int i=0;i> oligo; - - for(int i=0;i::iterator itPrime = primers.find(oligo); - if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); } - - primers[oligo]=indexPrimer; indexPrimer++; - primerNameVector.push_back(group); - }else if(type == "REVERSE"){ - //Sequence oligoRC("reverse", oligo); - //oligoRC.reverseComplement(); - string oligoRC = reverseOligo(oligo); - revPrimer.push_back(oligoRC); - } - else if(type == "BARCODE"){ - inOligos >> group; - - //check for repeat barcodes - map::iterator itBar = barcodes.find(oligo); - if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); } - - barcodes[oligo]=indexBarcode; indexBarcode++; - barcodeNameVector.push_back(group); - }else if(type == "LINKER"){ - linker.push_back(oligo); - }else if(type == "SPACER"){ - spacer.push_back(oligo); - } - else{ m->mothurOut("[WARNING]: " + type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); } - } - m->gobble(inOligos); - } - inOligos.close(); - - if(barcodeNameVector.size() == 0 && primerNameVector[0] == ""){ split = 1; } - - //add in potential combos - if(barcodeNameVector.size() == 0){ - barcodes[""] = 0; - barcodeNameVector.push_back(""); - } - - if(primerNameVector.size() == 0){ - primers[""] = 0; - primerNameVector.push_back(""); + if (m->control_pressed) { return false; } //error in reading oligos + + if (oligosObject->hasPairedBarcodes()) { + pairedOligos = true; + m->mothurOut("[ERROR]: sffinfo does not support paired barcodes and primers, aborting.\n"); m->control_pressed = true; return true; + }else { + pairedOligos = false; + numFPrimers = oligosObject->getPrimers().size(); + numBarcodes = oligosObject->getBarcodes().size(); + } + + numLinkers = oligosObject->getLinkers().size(); + numSpacers = oligosObject->getSpacers().size(); + numRPrimers = oligosObject->getReversePrimers().size(); + + vector groupNames = oligosObject->getGroupNames(); + if (groupNames.size() == 0) { allBlank = true; } + + filehandles.resize(oligosObject->getBarcodeNames().size()); + for(int i=0;igetPrimerNames().size();j++){ filehandles[i].push_back(""); } } - - filehandles.resize(barcodeNameVector.size()); - for (int i = 0; i < filehandles.size(); i++) { - for (int j = 0; j < primerNameVector.size(); j++) { - ofstream* temp; - map myMap; myMap[""] = temp; - filehandles[i].push_back(myMap); + + if(split > 1){ + set uniqueNames; //used to cleanup outputFileNames + map barcodes = oligosObject->getBarcodes() ; + map primers = oligosObject->getPrimers(); + for(map::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){ + for(map::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){ + + string primerName = oligosObject->getPrimerName(itPrimer->second); + string barcodeName = oligosObject->getBarcodeName(itBar->second); + + if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing + else if ((primerName == "") && (barcodeName == "")) { } //do nothing + else { + string comboGroupName = ""; + string fastaFileName = ""; + string qualFileName = ""; + string nameFileName = ""; + string countFileName = ""; + + if(primerName == ""){ + comboGroupName = barcodeName; + }else{ + if(barcodeName == ""){ + comboGroupName = primerName; + } + else{ + comboGroupName = barcodeName + "." + primerName; + } + } + + ofstream temp; + map variables; + variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(currentFileName)); + variables["[group]"] = comboGroupName; + string thisFilename = getOutputFileName("sff",variables); + if (uniqueNames.count(thisFilename) == 0) { + outputNames.push_back(thisFilename); + outputTypes["sff"].push_back(thisFilename); + uniqueNames.insert(thisFilename); + } + + filehandles[itBar->second][itPrimer->second] = thisFilename; + temp.open(thisFilename.c_str(), ios::binary); temp.close(); + } + } } } - - if(split > 1){ - set uniqueNames; //used to cleanup outputFileNames - for(map::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){ - for(map::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){ - - string primerName = primerNameVector[itPrimer->second]; - string barcodeName = barcodeNameVector[itBar->second]; - - string comboGroupName = ""; - string fastaFileName = ""; - string qualFileName = ""; - string nameFileName = ""; - - if(primerName == ""){ - comboGroupName = barcodeNameVector[itBar->second]; - } - else{ - if(barcodeName == ""){ - comboGroupName = primerNameVector[itPrimer->second]; - } - else{ - comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second]; - } - } - - ofstream* temp = new ofstream; - map variables; - variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(currentFileName)); - variables["[group]"] = comboGroupName; - string thisFilename = getOutputFileName("sff",variables); - if (uniqueNames.count(thisFilename) == 0) { - outputNames.push_back(thisFilename); - outputTypes["sff"].push_back(thisFilename); - uniqueNames.insert(thisFilename); - } - - map myMap; myMap[thisFilename] = temp; - m->openOutputFileBinary(thisFilename, *(temp)); - filehandles[itBar->second][itPrimer->second] = myMap; - map::iterator itOfstream = filehandles[itBar->second][itPrimer->second].find(""); - if (itOfstream != filehandles[itBar->second][itPrimer->second].end()) { filehandles[itBar->second][itPrimer->second].erase(itOfstream); } //remove blank entry so we dont mess with .begin() above. code above assumes only 1 file name in the map - } - } - } - numFPrimers = primers.size(); - numLinkers = linker.size(); - numSpacers = spacer.size(); - map variables; + + map variables; variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(currentFileName)); variables["[group]"] = "scrap"; noMatchFile = getOutputFileName("sff",variables); m->mothurRemove(noMatchFile); numNoMatch = 0; - - bool allBlank = true; - for (int i = 0; i < barcodeNameVector.size(); i++) { - if (barcodeNameVector[i] != "") { - allBlank = false; - break; - } - } - for (int i = 0; i < primerNameVector.size(); i++) { - if (primerNameVector[i] != "") { - allBlank = false; - break; - } - } filehandlesHeaders.resize(filehandles.size()); numSplitReads.resize(filehandles.size()); - for (int i = 0; i < filehandles.size(); i++) { - numSplitReads[i].resize(filehandles[i].size(), 0); + for (int i = 0; i < filehandles.size(); i++) { + numSplitReads[i].resize(filehandles[i].size(), 0); for (int j = 0; j < filehandles[i].size(); j++) { - ofstream* temp = new ofstream; - map myMap; - string thisHeader = (filehandles[i][j].begin())->first+"headers"; - myMap[thisHeader] = temp; - m->openOutputFileBinary(thisHeader, *(temp)); - filehandlesHeaders[i].push_back(myMap); + filehandlesHeaders[i].push_back(filehandles[i][j]+"headers"); } } @@ -1864,6 +1971,66 @@ bool SffInfoCommand::readOligos(string oligoFile){ exit(1); } } +//*************************************************************************************************************** + +bool SffInfoCommand::readGroup(string oligoFile){ + try { + filehandles.clear(); + numSplitReads.clear(); + filehandlesHeaders.clear(); + + groupMap = new GroupMap(); + groupMap->readMap(oligoFile); + + //like barcodeNameVector - no primer names + vector groups = groupMap->getNamesOfGroups(); + + filehandles.resize(groups.size()); + for (int i = 0; i < filehandles.size(); i++) { + for (int j = 0; j < 1; j++) { + + map variables; + variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(currentFileName)); + variables["[group]"] = groups[i]; + string thisFilename = getOutputFileName("sff",variables); + outputNames.push_back(thisFilename); + outputTypes["sff"].push_back(thisFilename); + + ofstream temp; + m->openOutputFileBinary(thisFilename, temp); temp.close(); + filehandles[i].push_back(thisFilename); + } + } + + map variables; + variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(currentFileName)); + variables["[group]"] = "scrap"; + noMatchFile = getOutputFileName("sff",variables); + m->mothurRemove(noMatchFile); + numNoMatch = 0; + + + filehandlesHeaders.resize(groups.size()); + numSplitReads.resize(filehandles.size()); + for (int i = 0; i < filehandles.size(); i++) { + numSplitReads[i].resize(filehandles[i].size(), 0); + for (int j = 0; j < filehandles[i].size(); j++) { + ofstream temp ; + string thisHeader = filehandles[i][j]+"headers"; + m->openOutputFileBinary(thisHeader, temp); temp.close(); + filehandlesHeaders[i].push_back(thisHeader); + } + } + + return true; + + } + catch(exception& e) { + m->errorOut(e, "SffInfoCommand", "readGroup"); + exit(1); + } +} + //********************************************************************/ string SffInfoCommand::reverseOligo(string oligo){ try {