X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=sffinfocommand.cpp;h=08cf21e5d6b543684cfebe56c0cdaf8697139125;hb=2ecee16fec29d4c525f740ec19b27962ca09c050;hp=8c50247c6845867ff9f99f62760d81300d433779;hpb=86c838c428a9e7d26f902f5492738241fa72c4e7;p=mothur.git diff --git a/sffinfocommand.cpp b/sffinfocommand.cpp index 8c50247..08cf21e 100644 --- a/sffinfocommand.cpp +++ b/sffinfocommand.cpp @@ -16,7 +16,7 @@ vector SffInfoCommand::setParameters(){ CommandParameter psff("sff", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(psff); CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(paccnos); CommandParameter psfftxt("sfftxt", "String", "", "", "", "", "",false,false); parameters.push_back(psfftxt); - CommandParameter pflow("flow", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pflow); + CommandParameter pflow("flow", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pflow); CommandParameter ptrim("trim", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(ptrim); CommandParameter pfasta("fasta", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pfasta); CommandParameter pqfile("name", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pqfile); @@ -41,7 +41,7 @@ string SffInfoCommand::getHelpString(){ helpString += "The sff parameter allows you to enter the sff file you would like to extract data from. You may enter multiple files by separating them by -'s.\n"; helpString += "The fasta parameter allows you to indicate if you would like a fasta formatted file generated. Default=True. \n"; helpString += "The qfile parameter allows you to indicate if you would like a quality file generated. Default=True. \n"; - helpString += "The flow parameter allows you to indicate if you would like a flowgram file generated. Default=False. \n"; + helpString += "The flow parameter allows you to indicate if you would like a flowgram file generated. Default=True. \n"; helpString += "The sfftxt parameter allows you to indicate if you would like a sff.txt file generated. Default=False. \n"; helpString += "If you want to parse an existing sfftxt file into flow, fasta and quality file, enter the file name using the sfftxt parameter. \n"; helpString += "The trim parameter allows you to indicate if you would like a sequences and quality scores trimmed to the clipQualLeft and clipQualRight values. Default=True. \n"; @@ -55,6 +55,29 @@ string SffInfoCommand::getHelpString(){ exit(1); } } +//********************************************************************************************************************** +string SffInfoCommand::getOutputFileNameTag(string type, string inputName=""){ + try { + string outputFileName = ""; + map >::iterator it; + + //is this a type this command creates + it = outputTypes.find(type); + if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); } + else { + if (type == "fasta") { outputFileName = "fasta"; } + else if (type == "flow") { outputFileName = "flow"; } + else if (type == "sfftxt") { outputFileName = "sff.txt"; } + else if (type == "qfile") { outputFileName = "qual"; } + else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; } + } + return outputFileName; + } + catch(exception& e) { + m->errorOut(e, "SffInfoCommand", "getOutputFileNameTag"); + exit(1); + } +} //********************************************************************************************************************** @@ -82,6 +105,7 @@ SffInfoCommand::SffInfoCommand(string option) { //allow user to run help if(option == "help") { help(); abort = true; calledHelp = true; } + else if(option == "citation") { citation(); abort = true; calledHelp = true;} else { //valid paramters for this command @@ -169,7 +193,7 @@ SffInfoCommand::SffInfoCommand(string option) { //erase from file list filenames.erase(filenames.begin()+i); i--; - } + }else { m->setSFFFile(filenames[i]); } } } @@ -255,7 +279,7 @@ SffInfoCommand::SffInfoCommand(string option) { temp = validParameter.validFile(parameters, "fasta", false); if (temp == "not found"){ temp = "T"; } fasta = m->isTrue(temp); - temp = validParameter.validFile(parameters, "flow", false); if (temp == "not found"){ temp = "F"; } + temp = validParameter.validFile(parameters, "flow", false); if (temp == "not found"){ temp = "T"; } flow = m->isTrue(temp); temp = validParameter.validFile(parameters, "trim", false); if (temp == "not found"){ temp = "T"; } @@ -282,7 +306,7 @@ SffInfoCommand::SffInfoCommand(string option) { } if ((sfftxtFilename == "") && (filenames.size() == 0)) { - //if there is a current fasta file, use it + //if there is a current sff file, use it string filename = m->getSFFFile(); if (filename != "") { filenames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the sff parameter."); m->mothurOutEndLine(); } else { m->mothurOut("[ERROR]: you must provide a valid sff or sfftxt file."); m->mothurOutEndLine(); abort=true; } @@ -297,15 +321,15 @@ SffInfoCommand::SffInfoCommand(string option) { //********************************************************************************************************************** int SffInfoCommand::execute(){ try { - if (abort == true) { if (calledHelp) { return 0; } return 2; } for (int s = 0; s < filenames.size(); s++) { - if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; } + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } int start = time(NULL); + filenames[s] = m->getFullPathName(filenames[s]); m->mothurOut("Extracting info from " + filenames[s] + " ..." ); m->mothurOutEndLine(); string accnos = ""; @@ -318,7 +342,7 @@ int SffInfoCommand::execute(){ if (sfftxtFilename != "") { parseSffTxt(); } - if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; } + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } //set fasta file as new current fastafile string current = ""; @@ -361,14 +385,17 @@ int SffInfoCommand::extractSffInfo(string input, string accnos){ ofstream outSfftxt, outFasta, outQual, outFlow; string outFastaFileName, outQualFileName; - string sfftxtFileName = outputDir + m->getRootName(m->getSimpleName(input)) + "sff.txt"; - string outFlowFileName = outputDir + m->getRootName(m->getSimpleName(input)) + "flow"; + string rootName = outputDir + m->getRootName(m->getSimpleName(input)); + if(rootName.find_last_of(".") == rootName.npos){ rootName += "."; } + + string sfftxtFileName = outputDir + m->getRootName(m->getSimpleName(input)) + getOutputFileNameTag("sfftxt"); + string outFlowFileName = outputDir + m->getRootName(m->getSimpleName(input)) + getOutputFileNameTag("flow"); if (trim) { - outFastaFileName = outputDir + m->getRootName(m->getSimpleName(input)) + "fasta"; - outQualFileName = outputDir + m->getRootName(m->getSimpleName(input)) + "qual"; + outFastaFileName = outputDir + m->getRootName(m->getSimpleName(input)) + getOutputFileNameTag("fasta"); + outQualFileName = outputDir + m->getRootName(m->getSimpleName(input)) + getOutputFileNameTag("qfile"); }else{ - outFastaFileName = outputDir + m->getRootName(m->getSimpleName(input)) + "raw.fasta"; - outQualFileName = outputDir + m->getRootName(m->getSimpleName(input)) + "raw.qual"; + outFastaFileName = outputDir + m->getRootName(m->getSimpleName(input)) + "raw." + getOutputFileNameTag("fasta"); + outQualFileName = outputDir + m->getRootName(m->getSimpleName(input)) + "raw." + getOutputFileNameTag("qfile"); } if (sfftxt) { m->openOutputFile(sfftxtFileName, outSfftxt); outSfftxt.setf(ios::fixed, ios::floatfield); outSfftxt.setf(ios::showpoint); outputNames.push_back(sfftxtFileName); outputTypes["sfftxt"].push_back(sfftxtFileName); } @@ -405,7 +432,9 @@ int SffInfoCommand::extractSffInfo(string input, string accnos){ //read data seqRead read; readSeqData(in, read, header.numFlowsPerRead, readheader.numBases); - + bool okay = sanityCheck(readheader, read); + if (!okay) { break; } + //if you have provided an accosfile and this seq is not in it, then dont print if (seqNames.size() != 0) { if (seqNames.count(readheader.name) == 0) { print = false; } } @@ -465,7 +494,7 @@ int SffInfoCommand::readCommonHeader(ifstream& in, CommonHeader& header){ //read offset char buffer2 [8]; in.read(buffer2, 8); - header.indexOffset = be_int8(*(unsigned long int *)(&buffer2)); + header.indexOffset = be_int8(*(unsigned long long *)(&buffer2)); //read index length char buffer3 [4]; @@ -512,8 +541,8 @@ int SffInfoCommand::readCommonHeader(ifstream& in, CommonHeader& header){ delete[] tempBuffer2; /* Pad to 8 chars */ - unsigned long int spotInFile = in.tellg(); - unsigned long int spot = (spotInFile + 7)& ~7; // ~ inverts + unsigned long long spotInFile = in.tellg(); + unsigned long long spot = (spotInFile + 7)& ~7; // ~ inverts in.seekg(spot); }else{ @@ -580,8 +609,8 @@ int SffInfoCommand::readHeader(ifstream& in, Header& header){ decodeName(header.timestamp, header.region, header.xy, header.name); /* Pad to 8 chars */ - unsigned long int spotInFile = in.tellg(); - unsigned long int spot = (spotInFile + 7)& ~7; + unsigned long long spotInFile = in.tellg(); + unsigned long long spot = (spotInFile + 7)& ~7; in.seekg(spot); }else{ @@ -608,7 +637,7 @@ int SffInfoCommand::readSeqData(ifstream& in, seqRead& read, int numFlowReads, i in.read(buffer, 2); read.flowgram[i] = be_int2(*(unsigned short *)(&buffer)); } - + //read flowIndex read.flowIndex.resize(numBases); for (int i = 0; i < numBases; i++) { @@ -633,8 +662,8 @@ int SffInfoCommand::readSeqData(ifstream& in, seqRead& read, int numFlowReads, i } /* Pad to 8 chars */ - unsigned long int spotInFile = in.tellg(); - unsigned long int spot = (spotInFile + 7)& ~7; + unsigned long long spotInFile = in.tellg(); + unsigned long long spot = (spotInFile + 7)& ~7; in.seekg(spot); }else{ @@ -740,11 +769,39 @@ int SffInfoCommand::printHeader(ofstream& out, Header& header) { exit(1); } } - +//********************************************************************************************************************** +bool SffInfoCommand::sanityCheck(Header& header, seqRead& read) { + try { + bool okay = true; + string message = "[WARNING]: Your sff file may be corrupted! Sequence: " + header.name + "\n"; + + if (header.clipQualLeft > read.bases.length()) { + okay = false; message += "Clip Qual Left = " + toString(header.clipQualLeft) + ", but we only read " + toString(read.bases.length()) + " bases.\n"; + } + if (header.clipQualRight > read.bases.length()) { + okay = false; message += "Clip Qual Right = " + toString(header.clipQualRight) + ", but we only read " + toString(read.bases.length()) + " bases.\n"; + } + if (header.clipQualLeft > read.qualScores.size()) { + okay = false; message += "Clip Qual Left = " + toString(header.clipQualLeft) + ", but we only read " + toString(read.qualScores.size()) + " quality scores.\n"; + } + if (header.clipQualRight > read.qualScores.size()) { + okay = false; message += "Clip Qual Right = " + toString(header.clipQualRight) + ", but we only read " + toString(read.qualScores.size()) + " quality scores.\n"; + } + + if (okay == false) { + m->mothurOut(message); m->mothurOutEndLine(); + } + + return okay; + } + catch(exception& e) { + m->errorOut(e, "SffInfoCommand", "sanityCheck"); + exit(1); + } +} //********************************************************************************************************************** int SffInfoCommand::printSffTxtSeqData(ofstream& out, seqRead& read, Header& header) { try { - out << "Flowgram: "; for (int i = 0; i < read.flowgram.size(); i++) { out << setprecision(2) << (read.flowgram[i]/(float)100) << '\t'; } @@ -774,10 +831,9 @@ int SffInfoCommand::printSffTxtSeqData(ofstream& out, seqRead& read, Header& hea //********************************************************************************************************************** int SffInfoCommand::printFastaSeqData(ofstream& out, seqRead& read, Header& header) { try { - string seq = read.bases; - if (trim) { + if (trim) { if(header.clipQualRight < header.clipQualLeft){ seq = "NNNN"; } @@ -900,17 +956,24 @@ int SffInfoCommand::parseSffTxt() { //output file names ofstream outFasta, outQual, outFlow; string outFastaFileName, outQualFileName; - string outFlowFileName = outputDir + m->getRootName(m->getSimpleName(sfftxtFilename)) + "flow"; + string fileRoot = m->getRootName(m->getSimpleName(sfftxtFilename)); + if (fileRoot.length() > 0) { + //rip off last . + fileRoot = fileRoot.substr(0, fileRoot.length()-1); + fileRoot = m->getRootName(fileRoot); + } + + string outFlowFileName = outputDir + fileRoot + getOutputFileNameTag("flow"); if (trim) { - outFastaFileName = outputDir + m->getRootName(m->getSimpleName(sfftxtFilename)) + "fasta"; - outQualFileName = outputDir + m->getRootName(m->getSimpleName(sfftxtFilename)) + "qual"; + outFastaFileName = outputDir + fileRoot + getOutputFileNameTag("fasta"); + outQualFileName = outputDir + fileRoot + getOutputFileNameTag("qfile"); }else{ - outFastaFileName = outputDir + m->getRootName(m->getSimpleName(sfftxtFilename)) + "raw.fasta"; - outQualFileName = outputDir + m->getRootName(m->getSimpleName(sfftxtFilename)) + "raw.qual"; + outFastaFileName = outputDir + fileRoot + "raw." + getOutputFileNameTag("fasta"); + outQualFileName = outputDir + fileRoot + "raw." + getOutputFileNameTag("qfile"); } if (fasta) { m->openOutputFile(outFastaFileName, outFasta); outputNames.push_back(outFastaFileName); outputTypes["fasta"].push_back(outFastaFileName); } - if (qual) { m->openOutputFile(outQualFileName, outQual); outputNames.push_back(outQualFileName); outputTypes["qual"].push_back(outQualFileName); } + if (qual) { m->openOutputFile(outQualFileName, outQual); outputNames.push_back(outQualFileName); outputTypes["qfile"].push_back(outQualFileName); } if (flow) { m->openOutputFile(outFlowFileName, outFlow); outputNames.push_back(outFlowFileName); outFlow.setf(ios::fixed, ios::floatfield); outFlow.setf(ios::showpoint); outputTypes["flow"].push_back(outFlowFileName); } //read common header