X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=sffinfocommand.cpp;h=08cf21e5d6b543684cfebe56c0cdaf8697139125;hb=49d2b7459c5027557564b21e9487dadafbbbdc96;hp=1a9b73f18c5abd36a0bb6ca98ce9658bda37665a;hpb=d635b39347cd81943ea50de7b813a0a5d743b0c0;p=mothur.git diff --git a/sffinfocommand.cpp b/sffinfocommand.cpp index 1a9b73f..08cf21e 100644 --- a/sffinfocommand.cpp +++ b/sffinfocommand.cpp @@ -11,53 +11,88 @@ #include "endiannessmacros.h" //********************************************************************************************************************** -vector SffInfoCommand::getValidParameters(){ - try { - string Array[] = {"sff","qfile","fasta","flow","trim","accnos","sfftxt","outputdir","inputdir", "outputdir"}; - vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); +vector SffInfoCommand::setParameters(){ + try { + CommandParameter psff("sff", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(psff); + CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(paccnos); + CommandParameter psfftxt("sfftxt", "String", "", "", "", "", "",false,false); parameters.push_back(psfftxt); + CommandParameter pflow("flow", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pflow); + CommandParameter ptrim("trim", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(ptrim); + CommandParameter pfasta("fasta", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pfasta); + CommandParameter pqfile("name", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pqfile); + CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir); + CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir); + + vector myArray; + for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { - m->errorOut(e, "SffInfoCommand", "getValidParameters"); + m->errorOut(e, "SffInfoCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** -SffInfoCommand::SffInfoCommand(){ +string SffInfoCommand::getHelpString(){ try { - abort = true; - //initialize outputTypes - vector tempOutNames; - outputTypes["fasta"] = tempOutNames; - outputTypes["flow"] = tempOutNames; - outputTypes["sfftxt"] = tempOutNames; - outputTypes["qual"] = tempOutNames; + string helpString = ""; + helpString += "The sffinfo command reads a sff file and extracts the sequence data, or you can use it to parse a sfftxt file.\n"; + helpString += "The sffinfo command parameters are sff, fasta, qfile, accnos, flow, sfftxt, and trim. sff is required. \n"; + helpString += "The sff parameter allows you to enter the sff file you would like to extract data from. You may enter multiple files by separating them by -'s.\n"; + helpString += "The fasta parameter allows you to indicate if you would like a fasta formatted file generated. Default=True. \n"; + helpString += "The qfile parameter allows you to indicate if you would like a quality file generated. Default=True. \n"; + helpString += "The flow parameter allows you to indicate if you would like a flowgram file generated. Default=True. \n"; + helpString += "The sfftxt parameter allows you to indicate if you would like a sff.txt file generated. Default=False. \n"; + helpString += "If you want to parse an existing sfftxt file into flow, fasta and quality file, enter the file name using the sfftxt parameter. \n"; + helpString += "The trim parameter allows you to indicate if you would like a sequences and quality scores trimmed to the clipQualLeft and clipQualRight values. Default=True. \n"; + helpString += "The accnos parameter allows you to provide a accnos file containing the names of the sequences you would like extracted. You may enter multiple files by separating them by -'s. \n"; + helpString += "Example sffinfo(sff=mySffFile.sff, trim=F).\n"; + helpString += "Note: No spaces between parameter labels (i.e. sff), '=' and parameters (i.e.yourSffFileName).\n"; + return helpString; } catch(exception& e) { - m->errorOut(e, "SffInfoCommand", "SffInfoCommand"); + m->errorOut(e, "SffInfoCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** -vector SffInfoCommand::getRequiredParameters(){ +string SffInfoCommand::getOutputFileNameTag(string type, string inputName=""){ try { - string Array[] = {"sff"}; - vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); - return myArray; + string outputFileName = ""; + map >::iterator it; + + //is this a type this command creates + it = outputTypes.find(type); + if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); } + else { + if (type == "fasta") { outputFileName = "fasta"; } + else if (type == "flow") { outputFileName = "flow"; } + else if (type == "sfftxt") { outputFileName = "sff.txt"; } + else if (type == "qfile") { outputFileName = "qual"; } + else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; } + } + return outputFileName; } catch(exception& e) { - m->errorOut(e, "SffInfoCommand", "getRequiredParameters"); + m->errorOut(e, "SffInfoCommand", "getOutputFileNameTag"); exit(1); } } + + //********************************************************************************************************************** -vector SffInfoCommand::getRequiredFiles(){ +SffInfoCommand::SffInfoCommand(){ try { - vector myArray; - return myArray; + abort = true; calledHelp = true; + setParameters(); + vector tempOutNames; + outputTypes["fasta"] = tempOutNames; + outputTypes["flow"] = tempOutNames; + outputTypes["sfftxt"] = tempOutNames; + outputTypes["qfile"] = tempOutNames; } catch(exception& e) { - m->errorOut(e, "SffInfoCommand", "getRequiredFiles"); + m->errorOut(e, "SffInfoCommand", "SffInfoCommand"); exit(1); } } @@ -65,16 +100,16 @@ vector SffInfoCommand::getRequiredFiles(){ SffInfoCommand::SffInfoCommand(string option) { try { - abort = false; + abort = false; calledHelp = false; hasAccnos = false; //allow user to run help - if(option == "help") { help(); abort = true; } + if(option == "help") { help(); abort = true; calledHelp = true; } + else if(option == "citation") { citation(); abort = true; calledHelp = true;} else { //valid paramters for this command - string Array[] = {"sff","qfile","fasta","flow","trim","accnos","sfftxt","outputdir","inputdir", "outputdir"}; - vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); + vector myArray = setParameters(); OptionParser parser(option); map parameters = parser.getParameters(); @@ -90,7 +125,7 @@ SffInfoCommand::SffInfoCommand(string option) { outputTypes["fasta"] = tempOutNames; outputTypes["flow"] = tempOutNames; outputTypes["sfftxt"] = tempOutNames; - outputTypes["qual"] = tempOutNames; + outputTypes["qfile"] = tempOutNames; //if the user changes the output directory command factory will send this info to us in the output parameter outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; } @@ -99,52 +134,66 @@ SffInfoCommand::SffInfoCommand(string option) { string inputDir = validParameter.validFile(parameters, "inputdir", false); if (inputDir == "not found"){ inputDir = ""; } sffFilename = validParameter.validFile(parameters, "sff", false); - if (sffFilename == "not found") { m->mothurOut("sff is a required parameter for the sffinfo command."); m->mothurOutEndLine(); abort = true; } + if (sffFilename == "not found") { sffFilename = ""; } else { m->splitAtDash(sffFilename, filenames); //go through files and make sure they are good, if not, then disregard them for (int i = 0; i < filenames.size(); i++) { - if (inputDir != "") { - string path = m->hasPath(filenames[i]); - //if the user has not given a path then, add inputdir. else leave path alone. - if (path == "") { filenames[i] = inputDir + filenames[i]; } - } - - ifstream in; - int ableToOpen = m->openInputFile(filenames[i], in, "noerror"); - - //if you can't open it, try default location - if (ableToOpen == 1) { - if (m->getDefaultPath() != "") { //default path is set - string tryPath = m->getDefaultPath() + m->getSimpleName(filenames[i]); - m->mothurOut("Unable to open " + filenames[i] + ". Trying default " + tryPath); m->mothurOutEndLine(); - ifstream in2; - ableToOpen = m->openInputFile(tryPath, in2, "noerror"); - in2.close(); - filenames[i] = tryPath; + bool ignore = false; + if (filenames[i] == "current") { + filenames[i] = m->getSFFFile(); + if (filenames[i] != "") { m->mothurOut("Using " + filenames[i] + " as input file for the sff parameter where you had given current."); m->mothurOutEndLine(); } + else { + m->mothurOut("You have no current sfffile, ignoring current."); m->mothurOutEndLine(); ignore=true; + //erase from file list + filenames.erase(filenames.begin()+i); + i--; } } - //if you can't open it, try default location - if (ableToOpen == 1) { - if (m->getOutputDir() != "") { //default path is set - string tryPath = m->getOutputDir() + m->getSimpleName(filenames[i]); - m->mothurOut("Unable to open " + filenames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine(); - ifstream in2; - ableToOpen = m->openInputFile(tryPath, in2, "noerror"); - in2.close(); - filenames[i] = tryPath; + if (!ignore) { + if (inputDir != "") { + string path = m->hasPath(filenames[i]); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { filenames[i] = inputDir + filenames[i]; } } - } - - in.close(); + + ifstream in; + int ableToOpen = m->openInputFile(filenames[i], in, "noerror"); - if (ableToOpen == 1) { - m->mothurOut("Unable to open " + filenames[i] + ". It will be disregarded."); m->mothurOutEndLine(); - //erase from file list - filenames.erase(filenames.begin()+i); - i--; + //if you can't open it, try default location + if (ableToOpen == 1) { + if (m->getDefaultPath() != "") { //default path is set + string tryPath = m->getDefaultPath() + m->getSimpleName(filenames[i]); + m->mothurOut("Unable to open " + filenames[i] + ". Trying default " + tryPath); m->mothurOutEndLine(); + ifstream in2; + ableToOpen = m->openInputFile(tryPath, in2, "noerror"); + in2.close(); + filenames[i] = tryPath; + } + } + + //if you can't open it, try default location + if (ableToOpen == 1) { + if (m->getOutputDir() != "") { //default path is set + string tryPath = m->getOutputDir() + m->getSimpleName(filenames[i]); + m->mothurOut("Unable to open " + filenames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine(); + ifstream in2; + ableToOpen = m->openInputFile(tryPath, in2, "noerror"); + in2.close(); + filenames[i] = tryPath; + } + } + + in.close(); + + if (ableToOpen == 1) { + m->mothurOut("Unable to open " + filenames[i] + ". It will be disregarded."); m->mothurOutEndLine(); + //erase from file list + filenames.erase(filenames.begin()+i); + i--; + }else { m->setSFFFile(filenames[i]); } } } @@ -160,44 +209,59 @@ SffInfoCommand::SffInfoCommand(string option) { //go through files and make sure they are good, if not, then disregard them for (int i = 0; i < accnosFileNames.size(); i++) { - if (inputDir != "") { - string path = m->hasPath(accnosFileNames[i]); - //if the user has not given a path then, add inputdir. else leave path alone. - if (path == "") { accnosFileNames[i] = inputDir + accnosFileNames[i]; } - } - - ifstream in; - int ableToOpen = m->openInputFile(accnosFileNames[i], in, "noerror"); - - //if you can't open it, try default location - if (ableToOpen == 1) { - if (m->getDefaultPath() != "") { //default path is set - string tryPath = m->getDefaultPath() + m->getSimpleName(accnosFileNames[i]); - m->mothurOut("Unable to open " + accnosFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine(); - ifstream in2; - ableToOpen = m->openInputFile(tryPath, in2, "noerror"); - in2.close(); - accnosFileNames[i] = tryPath; + bool ignore = false; + if (accnosFileNames[i] == "current") { + accnosFileNames[i] = m->getAccnosFile(); + if (accnosFileNames[i] != "") { m->mothurOut("Using " + accnosFileNames[i] + " as input file for the accnos parameter where you had given current."); m->mothurOutEndLine(); } + else { + m->mothurOut("You have no current accnosfile, ignoring current."); m->mothurOutEndLine(); ignore=true; + //erase from file list + accnosFileNames.erase(accnosFileNames.begin()+i); + i--; } } - //if you can't open it, try default location - if (ableToOpen == 1) { - if (m->getOutputDir() != "") { //default path is set - string tryPath = m->getOutputDir() + m->getSimpleName(accnosFileNames[i]); - m->mothurOut("Unable to open " + accnosFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine(); - ifstream in2; - ableToOpen = m->openInputFile(tryPath, in2, "noerror"); - in2.close(); - accnosFileNames[i] = tryPath; + + if (!ignore) { + + if (inputDir != "") { + string path = m->hasPath(accnosFileNames[i]); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { accnosFileNames[i] = inputDir + accnosFileNames[i]; } } - } - in.close(); + + ifstream in; + int ableToOpen = m->openInputFile(accnosFileNames[i], in, "noerror"); - if (ableToOpen == 1) { - m->mothurOut("Unable to open " + accnosFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); - //erase from file list - accnosFileNames.erase(accnosFileNames.begin()+i); - i--; + //if you can't open it, try default location + if (ableToOpen == 1) { + if (m->getDefaultPath() != "") { //default path is set + string tryPath = m->getDefaultPath() + m->getSimpleName(accnosFileNames[i]); + m->mothurOut("Unable to open " + accnosFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine(); + ifstream in2; + ableToOpen = m->openInputFile(tryPath, in2, "noerror"); + in2.close(); + accnosFileNames[i] = tryPath; + } + } + //if you can't open it, try default location + if (ableToOpen == 1) { + if (m->getOutputDir() != "") { //default path is set + string tryPath = m->getOutputDir() + m->getSimpleName(accnosFileNames[i]); + m->mothurOut("Unable to open " + accnosFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine(); + ifstream in2; + ableToOpen = m->openInputFile(tryPath, in2, "noerror"); + in2.close(); + accnosFileNames[i] = tryPath; + } + } + in.close(); + + if (ableToOpen == 1) { + m->mothurOut("Unable to open " + accnosFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); + //erase from file list + accnosFileNames.erase(accnosFileNames.begin()+i); + i--; + } } } @@ -215,14 +279,38 @@ SffInfoCommand::SffInfoCommand(string option) { temp = validParameter.validFile(parameters, "fasta", false); if (temp == "not found"){ temp = "T"; } fasta = m->isTrue(temp); - temp = validParameter.validFile(parameters, "flow", false); if (temp == "not found"){ temp = "F"; } + temp = validParameter.validFile(parameters, "flow", false); if (temp == "not found"){ temp = "T"; } flow = m->isTrue(temp); temp = validParameter.validFile(parameters, "trim", false); if (temp == "not found"){ temp = "T"; } trim = m->isTrue(temp); - temp = validParameter.validFile(parameters, "sfftxt", false); if (temp == "not found"){ temp = "F"; } - sfftxt = m->isTrue(temp); + temp = validParameter.validFile(parameters, "sfftxt", false); + if (temp == "not found") { temp = "F"; sfftxt = false; sfftxtFilename = ""; } + else if (m->isTrue(temp)) { sfftxt = true; sfftxtFilename = ""; } + else { + //you are a filename + if (inputDir != "") { + map::iterator it = parameters.find("sfftxt"); + //user has given a template file + if(it != parameters.end()){ + string path = m->hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["sfftxt"] = inputDir + it->second; } + } + } + + sfftxtFilename = validParameter.validFile(parameters, "sfftxt", true); + if (sfftxtFilename == "not found") { sfftxtFilename = ""; } + else if (sfftxtFilename == "not open") { sfftxtFilename = ""; } + } + + if ((sfftxtFilename == "") && (filenames.size() == 0)) { + //if there is a current sff file, use it + string filename = m->getSFFFile(); + if (filename != "") { filenames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the sff parameter."); m->mothurOutEndLine(); } + else { m->mothurOut("[ERROR]: you must provide a valid sff or sfftxt file."); m->mothurOutEndLine(); abort=true; } + } } } catch(exception& e) { @@ -230,43 +318,18 @@ SffInfoCommand::SffInfoCommand(string option) { exit(1); } } -//********************************************************************************************************************** - -void SffInfoCommand::help(){ - try { - m->mothurOut("The sffinfo command reads a sff file and extracts the sequence data.\n"); - m->mothurOut("The sffinfo command parameters are sff, fasta, qfile, accnos, flow, sfftxt, and trim. sff is required. \n"); - m->mothurOut("The sff parameter allows you to enter the sff file you would like to extract data from. You may enter multiple files by separating them by -'s.\n"); - m->mothurOut("The fasta parameter allows you to indicate if you would like a fasta formatted file generated. Default=True. \n"); - m->mothurOut("The qfile parameter allows you to indicate if you would like a quality file generated. Default=True. \n"); - m->mothurOut("The flow parameter allows you to indicate if you would like a flowgram file generated. Default=False. \n"); - m->mothurOut("The sfftxt parameter allows you to indicate if you would like a sff.txt file generated. Default=False. \n"); - m->mothurOut("The trim parameter allows you to indicate if you would like a sequences and quality scores trimmed to the clipQualLeft and clipQualRight values. Default=True. \n"); - m->mothurOut("The accnos parameter allows you to provide a accnos file containing the names of the sequences you would like extracted. You may enter multiple files by separating them by -'s. \n"); - m->mothurOut("Example sffinfo(sff=mySffFile.sff, trim=F).\n"); - m->mothurOut("Note: No spaces between parameter labels (i.e. sff), '=' and parameters (i.e.yourSffFileName).\n\n"); - } - catch(exception& e) { - m->errorOut(e, "SffInfoCommand", "help"); - exit(1); - } -} -//********************************************************************************************************************** - -SffInfoCommand::~SffInfoCommand(){} - //********************************************************************************************************************** int SffInfoCommand::execute(){ try { - - if (abort == true) { return 0; } + if (abort == true) { if (calledHelp) { return 0; } return 2; } for (int s = 0; s < filenames.size(); s++) { - if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; } + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } int start = time(NULL); + filenames[s] = m->getFullPathName(filenames[s]); m->mothurOut("Extracting info from " + filenames[s] + " ..." ); m->mothurOutEndLine(); string accnos = ""; @@ -277,7 +340,26 @@ int SffInfoCommand::execute(){ m->mothurOut("It took " + toString(time(NULL) - start) + " secs to extract " + toString(numReads) + "."); } - if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; } + if (sfftxtFilename != "") { parseSffTxt(); } + + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } + + //set fasta file as new current fastafile + string current = ""; + itTypes = outputTypes.find("fasta"); + if (itTypes != outputTypes.end()) { + if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); } + } + + itTypes = outputTypes.find("qfile"); + if (itTypes != outputTypes.end()) { + if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); } + } + + itTypes = outputTypes.find("flow"); + if (itTypes != outputTypes.end()) { + if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFlowFile(current); } + } //report output filenames m->mothurOutEndLine(); @@ -303,19 +385,22 @@ int SffInfoCommand::extractSffInfo(string input, string accnos){ ofstream outSfftxt, outFasta, outQual, outFlow; string outFastaFileName, outQualFileName; - string sfftxtFileName = outputDir + m->getRootName(m->getSimpleName(input)) + "sff.txt"; - string outFlowFileName = outputDir + m->getRootName(m->getSimpleName(input)) + "flow"; + string rootName = outputDir + m->getRootName(m->getSimpleName(input)); + if(rootName.find_last_of(".") == rootName.npos){ rootName += "."; } + + string sfftxtFileName = outputDir + m->getRootName(m->getSimpleName(input)) + getOutputFileNameTag("sfftxt"); + string outFlowFileName = outputDir + m->getRootName(m->getSimpleName(input)) + getOutputFileNameTag("flow"); if (trim) { - outFastaFileName = outputDir + m->getRootName(m->getSimpleName(input)) + "fasta"; - outQualFileName = outputDir + m->getRootName(m->getSimpleName(input)) + "qual"; + outFastaFileName = outputDir + m->getRootName(m->getSimpleName(input)) + getOutputFileNameTag("fasta"); + outQualFileName = outputDir + m->getRootName(m->getSimpleName(input)) + getOutputFileNameTag("qfile"); }else{ - outFastaFileName = outputDir + m->getRootName(m->getSimpleName(input)) + "raw.fasta"; - outQualFileName = outputDir + m->getRootName(m->getSimpleName(input)) + "raw.qual"; + outFastaFileName = outputDir + m->getRootName(m->getSimpleName(input)) + "raw." + getOutputFileNameTag("fasta"); + outQualFileName = outputDir + m->getRootName(m->getSimpleName(input)) + "raw." + getOutputFileNameTag("qfile"); } if (sfftxt) { m->openOutputFile(sfftxtFileName, outSfftxt); outSfftxt.setf(ios::fixed, ios::floatfield); outSfftxt.setf(ios::showpoint); outputNames.push_back(sfftxtFileName); outputTypes["sfftxt"].push_back(sfftxtFileName); } if (fasta) { m->openOutputFile(outFastaFileName, outFasta); outputNames.push_back(outFastaFileName); outputTypes["fasta"].push_back(outFastaFileName); } - if (qual) { m->openOutputFile(outQualFileName, outQual); outputNames.push_back(outQualFileName); outputTypes["qual"].push_back(outQualFileName); } + if (qual) { m->openOutputFile(outQualFileName, outQual); outputNames.push_back(outQualFileName); outputTypes["qfile"].push_back(outQualFileName); } if (flow) { m->openOutputFile(outFlowFileName, outFlow); outputNames.push_back(outFlowFileName); outFlow.setf(ios::fixed, ios::floatfield); outFlow.setf(ios::showpoint); outputTypes["flow"].push_back(outFlowFileName); } ifstream in; @@ -323,16 +408,18 @@ int SffInfoCommand::extractSffInfo(string input, string accnos){ CommonHeader header; readCommonHeader(in, header); - + int count = 0; + mycount = 0; //check magic number and version if (header.magicNumber != 779314790) { m->mothurOut("Magic Number is not correct, not a valid .sff file"); m->mothurOutEndLine(); return count; } if (header.version != "0001") { m->mothurOut("Version is not supported, only support version 0001."); m->mothurOutEndLine(); return count; } //print common header - if (sfftxt) { printCommonHeader(outSfftxt, header); } - + if (sfftxt) { printCommonHeader(outSfftxt, header); } + if (flow) { outFlow << header.numFlowsPerRead << endl; } + //read through the sff file while (!in.eof()) { @@ -345,7 +432,9 @@ int SffInfoCommand::extractSffInfo(string input, string accnos){ //read data seqRead read; readSeqData(in, read, header.numFlowsPerRead, readheader.numBases); - + bool okay = sanityCheck(readheader, read); + if (!okay) { break; } + //if you have provided an accosfile and this seq is not in it, then dont print if (seqNames.size() != 0) { if (seqNames.count(readheader.name) == 0) { print = false; } } @@ -358,6 +447,7 @@ int SffInfoCommand::extractSffInfo(string input, string accnos){ } count++; + mycount++; //report progress if((count+1) % 10000 == 0){ m->mothurOut(toString(count+1)); m->mothurOutEndLine(); } @@ -404,7 +494,7 @@ int SffInfoCommand::readCommonHeader(ifstream& in, CommonHeader& header){ //read offset char buffer2 [8]; in.read(buffer2, 8); - header.indexOffset = be_int8(*(unsigned long int *)(&buffer2)); + header.indexOffset = be_int8(*(unsigned long long *)(&buffer2)); //read index length char buffer3 [4]; @@ -451,8 +541,8 @@ int SffInfoCommand::readCommonHeader(ifstream& in, CommonHeader& header){ delete[] tempBuffer2; /* Pad to 8 chars */ - unsigned long int spotInFile = in.tellg(); - unsigned long int spot = (spotInFile + 7)& ~7; // ~ inverts + unsigned long long spotInFile = in.tellg(); + unsigned long long spot = (spotInFile + 7)& ~7; // ~ inverts in.seekg(spot); }else{ @@ -519,8 +609,8 @@ int SffInfoCommand::readHeader(ifstream& in, Header& header){ decodeName(header.timestamp, header.region, header.xy, header.name); /* Pad to 8 chars */ - unsigned long int spotInFile = in.tellg(); - unsigned long int spot = (spotInFile + 7)& ~7; + unsigned long long spotInFile = in.tellg(); + unsigned long long spot = (spotInFile + 7)& ~7; in.seekg(spot); }else{ @@ -547,7 +637,7 @@ int SffInfoCommand::readSeqData(ifstream& in, seqRead& read, int numFlowReads, i in.read(buffer, 2); read.flowgram[i] = be_int2(*(unsigned short *)(&buffer)); } - + //read flowIndex read.flowIndex.resize(numBases); for (int i = 0; i < numBases; i++) { @@ -572,8 +662,8 @@ int SffInfoCommand::readSeqData(ifstream& in, seqRead& read, int numFlowReads, i } /* Pad to 8 chars */ - unsigned long int spotInFile = in.tellg(); - unsigned long int spot = (spotInFile + 7)& ~7; + unsigned long long spotInFile = in.tellg(); + unsigned long long spot = (spotInFile + 7)& ~7; in.seekg(spot); }else{ @@ -591,32 +681,36 @@ int SffInfoCommand::readSeqData(ifstream& in, seqRead& read, int numFlowReads, i int SffInfoCommand::decodeName(string& timestamp, string& region, string& xy, string name) { try { - string time = name.substr(0, 6); - unsigned int timeNum = m->fromBase36(time); + if (name.length() >= 6) { + string time = name.substr(0, 6); + unsigned int timeNum = m->fromBase36(time); - int q1 = timeNum / 60; - int sec = timeNum - 60 * q1; - int q2 = q1 / 60; - int minute = q1 - 60 * q2; - int q3 = q2 / 24; - int hr = q2 - 24 * q3; - int q4 = q3 / 32; - int day = q3 - 32 * q4; - int q5 = q4 / 13; - int mon = q4 - 13 * q5; - int year = 2000 + q5; + int q1 = timeNum / 60; + int sec = timeNum - 60 * q1; + int q2 = q1 / 60; + int minute = q1 - 60 * q2; + int q3 = q2 / 24; + int hr = q2 - 24 * q3; + int q4 = q3 / 32; + int day = q3 - 32 * q4; + int q5 = q4 / 13; + int mon = q4 - 13 * q5; + int year = 2000 + q5; + + timestamp = toString(year) + "_" + toString(mon) + "_" + toString(day) + "_" + toString(hr) + "_" + toString(minute) + "_" + toString(sec); + } - timestamp = toString(year) + "_" + toString(mon) + "_" + toString(day) + "_" + toString(hr) + "_" + toString(minute) + "_" + toString(sec); + if (name.length() >= 9) { + region = name.substr(7, 2); - region = name.substr(7, 2); + string xyNum = name.substr(9); + unsigned int myXy = m->fromBase36(xyNum); + int x = myXy >> 12; + int y = myXy & 4095; - string xyNum = name.substr(9); - unsigned int myXy = m->fromBase36(xyNum); - int x = myXy >> 12; - int y = myXy & 4095; + xy = toString(x) + "_" + toString(y); + } - xy = toString(x) + "_" + toString(y); - return 0; } catch(exception& e) { @@ -675,11 +769,39 @@ int SffInfoCommand::printHeader(ofstream& out, Header& header) { exit(1); } } - +//********************************************************************************************************************** +bool SffInfoCommand::sanityCheck(Header& header, seqRead& read) { + try { + bool okay = true; + string message = "[WARNING]: Your sff file may be corrupted! Sequence: " + header.name + "\n"; + + if (header.clipQualLeft > read.bases.length()) { + okay = false; message += "Clip Qual Left = " + toString(header.clipQualLeft) + ", but we only read " + toString(read.bases.length()) + " bases.\n"; + } + if (header.clipQualRight > read.bases.length()) { + okay = false; message += "Clip Qual Right = " + toString(header.clipQualRight) + ", but we only read " + toString(read.bases.length()) + " bases.\n"; + } + if (header.clipQualLeft > read.qualScores.size()) { + okay = false; message += "Clip Qual Left = " + toString(header.clipQualLeft) + ", but we only read " + toString(read.qualScores.size()) + " quality scores.\n"; + } + if (header.clipQualRight > read.qualScores.size()) { + okay = false; message += "Clip Qual Right = " + toString(header.clipQualRight) + ", but we only read " + toString(read.qualScores.size()) + " quality scores.\n"; + } + + if (okay == false) { + m->mothurOut(message); m->mothurOutEndLine(); + } + + return okay; + } + catch(exception& e) { + m->errorOut(e, "SffInfoCommand", "sanityCheck"); + exit(1); + } +} //********************************************************************************************************************** int SffInfoCommand::printSffTxtSeqData(ofstream& out, seqRead& read, Header& header) { try { - out << "Flowgram: "; for (int i = 0; i < read.flowgram.size(); i++) { out << setprecision(2) << (read.flowgram[i]/(float)100) << '\t'; } @@ -709,10 +831,9 @@ int SffInfoCommand::printSffTxtSeqData(ofstream& out, seqRead& read, Header& hea //********************************************************************************************************************** int SffInfoCommand::printFastaSeqData(ofstream& out, seqRead& read, Header& header) { try { - string seq = read.bases; - if (trim) { + if (trim) { if(header.clipQualRight < header.clipQualLeft){ seq = "NNNN"; } @@ -750,6 +871,7 @@ int SffInfoCommand::printQualSeqData(ofstream& out, seqRead& read, Header& heade if (trim) { if(header.clipQualRight < header.clipQualLeft){ + out << ">" << header.name << " xy=" << header.xy << endl; out << "0\t0\t0\t0"; } else if((header.clipQualRight != 0) && ((header.clipQualRight-header.clipQualLeft) >= 0)){ @@ -779,8 +901,12 @@ int SffInfoCommand::printQualSeqData(ofstream& out, seqRead& read, Header& heade int SffInfoCommand::printFlowSeqData(ofstream& out, seqRead& read, Header& header) { try { if(header.clipQualRight > header.clipQualLeft){ - out << ">" << header.name << " xy=" << header.xy << " length=" << (header.clipQualRight-header.clipQualLeft) << " numflows=" << read.flowgram.size() << endl; - for (int i = 0; i < read.flowgram.size(); i++) { out << setprecision(2) << (read.flowgram[i]/(float)100) << ' '; } + + int rightIndex = 0; + for (int i = 0; i < header.clipQualRight; i++) { rightIndex += read.flowIndex[i]; } + + out << header.name << ' ' << rightIndex; + for (int i = 0; i < read.flowgram.size(); i++) { out << setprecision(2) << ' ' << (read.flowgram[i]/(float)100); } out << endl; } @@ -818,4 +944,240 @@ int SffInfoCommand::readAccnosFile(string filename) { exit(1); } } -//**********************************************************************************************************************/ +//********************************************************************************************************************** +int SffInfoCommand::parseSffTxt() { + try { + + ifstream inSFF; + m->openInputFile(sfftxtFilename, inSFF); + + if (outputDir == "") { outputDir += m->hasPath(sfftxtFilename); } + + //output file names + ofstream outFasta, outQual, outFlow; + string outFastaFileName, outQualFileName; + string fileRoot = m->getRootName(m->getSimpleName(sfftxtFilename)); + if (fileRoot.length() > 0) { + //rip off last . + fileRoot = fileRoot.substr(0, fileRoot.length()-1); + fileRoot = m->getRootName(fileRoot); + } + + string outFlowFileName = outputDir + fileRoot + getOutputFileNameTag("flow"); + if (trim) { + outFastaFileName = outputDir + fileRoot + getOutputFileNameTag("fasta"); + outQualFileName = outputDir + fileRoot + getOutputFileNameTag("qfile"); + }else{ + outFastaFileName = outputDir + fileRoot + "raw." + getOutputFileNameTag("fasta"); + outQualFileName = outputDir + fileRoot + "raw." + getOutputFileNameTag("qfile"); + } + + if (fasta) { m->openOutputFile(outFastaFileName, outFasta); outputNames.push_back(outFastaFileName); outputTypes["fasta"].push_back(outFastaFileName); } + if (qual) { m->openOutputFile(outQualFileName, outQual); outputNames.push_back(outQualFileName); outputTypes["qfile"].push_back(outQualFileName); } + if (flow) { m->openOutputFile(outFlowFileName, outFlow); outputNames.push_back(outFlowFileName); outFlow.setf(ios::fixed, ios::floatfield); outFlow.setf(ios::showpoint); outputTypes["flow"].push_back(outFlowFileName); } + + //read common header + string commonHeader = m->getline(inSFF); + string magicNumber = m->getline(inSFF); + string version = m->getline(inSFF); + string indexOffset = m->getline(inSFF); + string indexLength = m->getline(inSFF); + int numReads = parseHeaderLineToInt(inSFF); + string headerLength = m->getline(inSFF); + string keyLength = m->getline(inSFF); + int numFlows = parseHeaderLineToInt(inSFF); + string flowgramCode = m->getline(inSFF); + string flowChars = m->getline(inSFF); + string keySequence = m->getline(inSFF); + m->gobble(inSFF); + + string seqName; + + if (flow) { outFlow << numFlows << endl; } + + for(int i=0;imothurOut("[ERROR]: Expected " + toString(numReads) + " but reached end of file at " + toString(i+1) + "."); m->mothurOutEndLine(); break; } + + Header header; + + //parse read header + inSFF >> seqName; + seqName = seqName.substr(1); + m->gobble(inSFF); + header.name = seqName; + + string runPrefix = parseHeaderLineToString(inSFF); header.timestamp = runPrefix; + string regionNumber = parseHeaderLineToString(inSFF); header.region = regionNumber; + string xyLocation = parseHeaderLineToString(inSFF); header.xy = xyLocation; + m->gobble(inSFF); + + string runName = parseHeaderLineToString(inSFF); + string analysisName = parseHeaderLineToString(inSFF); + string fullPath = parseHeaderLineToString(inSFF); + m->gobble(inSFF); + + string readHeaderLen = parseHeaderLineToString(inSFF); convert(readHeaderLen, header.headerLength); + string nameLength = parseHeaderLineToString(inSFF); convert(nameLength, header.nameLength); + int numBases = parseHeaderLineToInt(inSFF); header.numBases = numBases; + string clipQualLeft = parseHeaderLineToString(inSFF); convert(clipQualLeft, header.clipQualLeft); + int clipQualRight = parseHeaderLineToInt(inSFF); header.clipQualRight = clipQualRight; + string clipAdapLeft = parseHeaderLineToString(inSFF); convert(clipAdapLeft, header.clipAdapterLeft); + string clipAdapRight = parseHeaderLineToString(inSFF); convert(clipAdapRight, header.clipAdapterRight); + m->gobble(inSFF); + + seqRead read; + + //parse read + vector flowVector = parseHeaderLineToFloatVector(inSFF, numFlows); read.flowgram = flowVector; + vector flowIndices = parseHeaderLineToIntVector(inSFF, numBases); + + //adjust for print + vector flowIndicesAdjusted; flowIndicesAdjusted.push_back(flowIndices[0]); + for (int j = 1; j < flowIndices.size(); j++) { flowIndicesAdjusted.push_back(flowIndices[j] - flowIndices[j-1]); } + read.flowIndex = flowIndicesAdjusted; + + string bases = parseHeaderLineToString(inSFF); read.bases = bases; + vector qualityScores = parseHeaderLineToIntVector(inSFF, numBases); read.qualScores = qualityScores; + m->gobble(inSFF); + + //if you have provided an accosfile and this seq is not in it, then dont print + bool print = true; + if (seqNames.size() != 0) { if (seqNames.count(header.name) == 0) { print = false; } } + + //print + if (print) { + if (fasta) { printFastaSeqData(outFasta, read, header); } + if (qual) { printQualSeqData(outQual, read, header); } + if (flow) { printFlowSeqData(outFlow, read, header); } + } + + //report progress + if((i+1) % 10000 == 0){ m->mothurOut(toString(i+1)); m->mothurOutEndLine(); } + + if (m->control_pressed) { break; } + } + + //report progress + if (!m->control_pressed) { if((numReads) % 10000 != 0){ m->mothurOut(toString(numReads)); m->mothurOutEndLine(); } } + + inSFF.close(); + + if (fasta) { outFasta.close(); } + if (qual) { outQual.close(); } + if (flow) { outFlow.close(); } + + return 0; + } + catch(exception& e) { + m->errorOut(e, "SffInfoCommand", "parseSffTxt"); + exit(1); + } +} +//********************************************************************************************************************** + +int SffInfoCommand::parseHeaderLineToInt(ifstream& file){ + try { + int number; + + while (!file.eof()) { + + char c = file.get(); + if (c == ':'){ + file >> number; + break; + } + + } + m->gobble(file); + return number; + } + catch(exception& e) { + m->errorOut(e, "SffInfoCommand", "parseHeaderLineToInt"); + exit(1); + } + +} + +//********************************************************************************************************************** + +string SffInfoCommand::parseHeaderLineToString(ifstream& file){ + try { + string text; + + while (!file.eof()) { + char c = file.get(); + + if (c == ':'){ + //m->gobble(file); + //text = m->getline(file); + file >> text; + break; + } + } + m->gobble(file); + + return text; + } + catch(exception& e) { + m->errorOut(e, "SffInfoCommand", "parseHeaderLineToString"); + exit(1); + } +} + +//********************************************************************************************************************** + +vector SffInfoCommand::parseHeaderLineToFloatVector(ifstream& file, int length){ + try { + vector floatVector(length); + + while (!file.eof()) { + char c = file.get(); + if (c == ':'){ + float temp; + for(int i=0;i> temp; + floatVector[i] = temp * 100; + } + break; + } + } + m->gobble(file); + return floatVector; + } + catch(exception& e) { + m->errorOut(e, "SffInfoCommand", "parseHeaderLineToFloatVector"); + exit(1); + } +} + +//********************************************************************************************************************** + +vector SffInfoCommand::parseHeaderLineToIntVector(ifstream& file, int length){ + try { + vector intVector(length); + + while (!file.eof()) { + char c = file.get(); + if (c == ':'){ + for(int i=0;i> intVector[i]; + } + break; + } + } + m->gobble(file); + return intVector; + } + catch(exception& e) { + m->errorOut(e, "SffInfoCommand", "parseHeaderLineToIntVector"); + exit(1); + } +} + +//********************************************************************************************************************** + + + +