X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=sffinfocommand.cpp;h=66409e88f9013abd8deb9fd2ba78f6bfdf249a38;hb=3247d888e7aafc4a65ec9062a94dfd166c2c5b1d;hp=7511a2e613bb08912044ad3c044cf3a6a8f91c40;hpb=cd4c86f68cb53017f0f98a82dd2c2e56a64c67bd;p=mothur.git diff --git a/sffinfocommand.cpp b/sffinfocommand.cpp index 7511a2e..66409e8 100644 --- a/sffinfocommand.cpp +++ b/sffinfocommand.cpp @@ -10,6 +10,57 @@ #include "sffinfocommand.h" #include "endiannessmacros.h" +//********************************************************************************************************************** +vector SffInfoCommand::getValidParameters(){ + try { + string Array[] = {"sff","qfile","fasta","flow","trim","accnos","sfftxt","outputdir","inputdir", "outputdir"}; + vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); + return myArray; + } + catch(exception& e) { + m->errorOut(e, "SffInfoCommand", "getValidParameters"); + exit(1); + } +} +//********************************************************************************************************************** +SffInfoCommand::SffInfoCommand(){ + try { + abort = true; + //initialize outputTypes + vector tempOutNames; + outputTypes["fasta"] = tempOutNames; + outputTypes["flow"] = tempOutNames; + outputTypes["sfftxt"] = tempOutNames; + outputTypes["qual"] = tempOutNames; + } + catch(exception& e) { + m->errorOut(e, "SffInfoCommand", "SffInfoCommand"); + exit(1); + } +} +//********************************************************************************************************************** +vector SffInfoCommand::getRequiredParameters(){ + try { + string Array[] = {"sff"}; + vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); + return myArray; + } + catch(exception& e) { + m->errorOut(e, "SffInfoCommand", "getRequiredParameters"); + exit(1); + } +} +//********************************************************************************************************************** +vector SffInfoCommand::getRequiredFiles(){ + try { + vector myArray; + return myArray; + } + catch(exception& e) { + m->errorOut(e, "SffInfoCommand", "getRequiredFiles"); + exit(1); + } +} //********************************************************************************************************************** SffInfoCommand::SffInfoCommand(string option) { @@ -34,6 +85,13 @@ SffInfoCommand::SffInfoCommand(string option) { if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; } } + //initialize outputTypes + vector tempOutNames; + outputTypes["fasta"] = tempOutNames; + outputTypes["flow"] = tempOutNames; + outputTypes["sfftxt"] = tempOutNames; + outputTypes["qual"] = tempOutNames; + //if the user changes the output directory command factory will send this info to us in the output parameter outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; } @@ -43,28 +101,43 @@ SffInfoCommand::SffInfoCommand(string option) { sffFilename = validParameter.validFile(parameters, "sff", false); if (sffFilename == "not found") { m->mothurOut("sff is a required parameter for the sffinfo command."); m->mothurOutEndLine(); abort = true; } else { - splitAtDash(sffFilename, filenames); + m->splitAtDash(sffFilename, filenames); //go through files and make sure they are good, if not, then disregard them for (int i = 0; i < filenames.size(); i++) { if (inputDir != "") { - string path = hasPath(filenames[i]); + string path = m->hasPath(filenames[i]); //if the user has not given a path then, add inputdir. else leave path alone. if (path == "") { filenames[i] = inputDir + filenames[i]; } } ifstream in; - int ableToOpen = openInputFile(filenames[i], in, "noerror"); + int ableToOpen = m->openInputFile(filenames[i], in, "noerror"); //if you can't open it, try default location if (ableToOpen == 1) { if (m->getDefaultPath() != "") { //default path is set - string tryPath = m->getDefaultPath() + getSimpleName(filenames[i]); + string tryPath = m->getDefaultPath() + m->getSimpleName(filenames[i]); m->mothurOut("Unable to open " + filenames[i] + ". Trying default " + tryPath); m->mothurOutEndLine(); - ableToOpen = openInputFile(tryPath, in, "noerror"); + ifstream in2; + ableToOpen = m->openInputFile(tryPath, in2, "noerror"); + in2.close(); + filenames[i] = tryPath; + } + } + + //if you can't open it, try default location + if (ableToOpen == 1) { + if (m->getOutputDir() != "") { //default path is set + string tryPath = m->getOutputDir() + m->getSimpleName(filenames[i]); + m->mothurOut("Unable to open " + filenames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine(); + ifstream in2; + ableToOpen = m->openInputFile(tryPath, in2, "noerror"); + in2.close(); filenames[i] = tryPath; } } + in.close(); if (ableToOpen == 1) { @@ -83,25 +156,38 @@ SffInfoCommand::SffInfoCommand(string option) { if (accnosName == "not found") { accnosName = ""; } else { hasAccnos = true; - splitAtDash(accnosName, accnosFileNames); + m->splitAtDash(accnosName, accnosFileNames); //go through files and make sure they are good, if not, then disregard them for (int i = 0; i < accnosFileNames.size(); i++) { if (inputDir != "") { - string path = hasPath(accnosFileNames[i]); + string path = m->hasPath(accnosFileNames[i]); //if the user has not given a path then, add inputdir. else leave path alone. if (path == "") { accnosFileNames[i] = inputDir + accnosFileNames[i]; } } ifstream in; - int ableToOpen = openInputFile(accnosFileNames[i], in, "noerror"); + int ableToOpen = m->openInputFile(accnosFileNames[i], in, "noerror"); //if you can't open it, try default location if (ableToOpen == 1) { if (m->getDefaultPath() != "") { //default path is set - string tryPath = m->getDefaultPath() + getSimpleName(accnosFileNames[i]); + string tryPath = m->getDefaultPath() + m->getSimpleName(accnosFileNames[i]); m->mothurOut("Unable to open " + accnosFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine(); - ableToOpen = openInputFile(tryPath, in, "noerror"); + ifstream in2; + ableToOpen = m->openInputFile(tryPath, in2, "noerror"); + in2.close(); + accnosFileNames[i] = tryPath; + } + } + //if you can't open it, try default location + if (ableToOpen == 1) { + if (m->getOutputDir() != "") { //default path is set + string tryPath = m->getOutputDir() + m->getSimpleName(accnosFileNames[i]); + m->mothurOut("Unable to open " + accnosFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine(); + ifstream in2; + ableToOpen = m->openInputFile(tryPath, in2, "noerror"); + in2.close(); accnosFileNames[i] = tryPath; } } @@ -124,19 +210,19 @@ SffInfoCommand::SffInfoCommand(string option) { } string temp = validParameter.validFile(parameters, "qfile", false); if (temp == "not found"){ temp = "T"; } - qual = isTrue(temp); + qual = m->isTrue(temp); temp = validParameter.validFile(parameters, "fasta", false); if (temp == "not found"){ temp = "T"; } - fasta = isTrue(temp); + fasta = m->isTrue(temp); temp = validParameter.validFile(parameters, "flow", false); if (temp == "not found"){ temp = "F"; } - flow = isTrue(temp); + flow = m->isTrue(temp); temp = validParameter.validFile(parameters, "trim", false); if (temp == "not found"){ temp = "T"; } - trim = isTrue(temp); + trim = m->isTrue(temp); temp = validParameter.validFile(parameters, "sfftxt", false); if (temp == "not found"){ temp = "F"; } - sfftxt = isTrue(temp); + sfftxt = m->isTrue(temp); } } catch(exception& e) { @@ -210,27 +296,27 @@ int SffInfoCommand::execute(){ int SffInfoCommand::extractSffInfo(string input, string accnos){ try { - if (outputDir == "") { outputDir += hasPath(input); } + if (outputDir == "") { outputDir += m->hasPath(input); } if (accnos != "") { readAccnosFile(accnos); } else { seqNames.clear(); } ofstream outSfftxt, outFasta, outQual, outFlow; string outFastaFileName, outQualFileName; - string sfftxtFileName = outputDir + getRootName(getSimpleName(input)) + "sff.txt"; - string outFlowFileName = outputDir + getRootName(getSimpleName(input)) + "flow"; + string sfftxtFileName = outputDir + m->getRootName(m->getSimpleName(input)) + "sff.txt"; + string outFlowFileName = outputDir + m->getRootName(m->getSimpleName(input)) + "flow"; if (trim) { - outFastaFileName = outputDir + getRootName(getSimpleName(input)) + "fasta"; - outQualFileName = outputDir + getRootName(getSimpleName(input)) + "qual"; + outFastaFileName = outputDir + m->getRootName(m->getSimpleName(input)) + "fasta"; + outQualFileName = outputDir + m->getRootName(m->getSimpleName(input)) + "qual"; }else{ - outFastaFileName = outputDir + getRootName(getSimpleName(input)) + "raw.fasta"; - outQualFileName = outputDir + getRootName(getSimpleName(input)) + "raw.qual"; + outFastaFileName = outputDir + m->getRootName(m->getSimpleName(input)) + "raw.fasta"; + outQualFileName = outputDir + m->getRootName(m->getSimpleName(input)) + "raw.qual"; } - if (sfftxt) { openOutputFile(sfftxtFileName, outSfftxt); outSfftxt.setf(ios::fixed, ios::floatfield); outSfftxt.setf(ios::showpoint); outputNames.push_back(sfftxtFileName); } - if (fasta) { openOutputFile(outFastaFileName, outFasta); outputNames.push_back(outFastaFileName); } - if (qual) { openOutputFile(outQualFileName, outQual); outputNames.push_back(outQualFileName); } - if (flow) { openOutputFile(outFlowFileName, outFlow); outputNames.push_back(outFlowFileName); } + if (sfftxt) { m->openOutputFile(sfftxtFileName, outSfftxt); outSfftxt.setf(ios::fixed, ios::floatfield); outSfftxt.setf(ios::showpoint); outputNames.push_back(sfftxtFileName); outputTypes["sfftxt"].push_back(sfftxtFileName); } + if (fasta) { m->openOutputFile(outFastaFileName, outFasta); outputNames.push_back(outFastaFileName); outputTypes["fasta"].push_back(outFastaFileName); } + if (qual) { m->openOutputFile(outQualFileName, outQual); outputNames.push_back(outQualFileName); outputTypes["qual"].push_back(outQualFileName); } + if (flow) { m->openOutputFile(outFlowFileName, outFlow); outputNames.push_back(outFlowFileName); outputTypes["flow"].push_back(outFlowFileName); } ifstream in; in.open(input.c_str(), ios::binary); @@ -405,12 +491,12 @@ int SffInfoCommand::readHeader(ifstream& in, Header& header){ char buffer4 [2]; in.read(buffer4, 2); header.clipQualLeft = be_int2(*(unsigned short *)(&buffer4)); + header.clipQualLeft = 5; //read clip qual right char buffer5 [2]; in.read(buffer5, 2); header.clipQualRight = be_int2(*(unsigned short *)(&buffer5)); - if(header.clipQualRight == 0){ header.clipQualRight = header.numBases; } //read clipAdapterLeft char buffer6 [2]; @@ -429,6 +515,9 @@ int SffInfoCommand::readHeader(ifstream& in, Header& header){ if (header.name.length() > header.nameLength) { header.name = header.name.substr(0, header.nameLength); } delete[] tempBuffer; + //extract info from name + decodeName(header.timestamp, header.region, header.xy, header.name); + /* Pad to 8 chars */ unsigned long int spotInFile = in.tellg(); unsigned long int spot = (spotInFile + 7)& ~7; @@ -499,6 +588,43 @@ int SffInfoCommand::readSeqData(ifstream& in, seqRead& read, int numFlowReads, i } } //********************************************************************************************************************** +int SffInfoCommand::decodeName(string& timestamp, string& region, string& xy, string name) { + try { + + string time = name.substr(0, 6); + unsigned int timeNum = m->fromBase36(time); + + int q1 = timeNum / 60; + int sec = timeNum - 60 * q1; + int q2 = q1 / 60; + int minute = q1 - 60 * q2; + int q3 = q2 / 24; + int hr = q2 - 24 * q3; + int q4 = q3 / 32; + int day = q3 - 32 * q4; + int q5 = q4 / 13; + int mon = q4 - 13 * q5; + int year = 2000 + q5; + + timestamp = toString(year) + "_" + toString(mon) + "_" + toString(day) + "_" + toString(hr) + "_" + toString(minute) + "_" + toString(sec); + + region = name.substr(7, 2); + + string xyNum = name.substr(9); + unsigned int myXy = m->fromBase36(xyNum); + int x = myXy >> 12; + int y = myXy & 4095; + + xy = toString(x) + "_" + toString(y); + + return 0; + } + catch(exception& e) { + m->errorOut(e, "SffInfoCommand", "decodeName"); + exit(1); + } +} +//********************************************************************************************************************** int SffInfoCommand::printCommonHeader(ofstream& out, CommonHeader& header) { try { @@ -526,9 +652,9 @@ int SffInfoCommand::printHeader(ofstream& out, Header& header) { try { out << ">" << header.name << endl; - out << "Run Prefix: " << endl; - out << "Region #: " << endl; - out << "XY Location: " << endl << endl; + out << "Run Prefix: " << header.timestamp << endl; + out << "Region #: " << header.region << endl; + out << "XY Location: " << header.xy << endl << endl; out << "Run Name: " << endl; out << "Analysis Name: " << endl; @@ -562,6 +688,7 @@ int SffInfoCommand::printSffTxtSeqData(ofstream& out, seqRead& read, Header& hea for (int i = 0; i < read.flowIndex.size(); i++) { sum += read.flowIndex[i]; out << sum << '\t'; } //make the bases you want to clip lowercase and the bases you want to keep upper case + if(header.clipQualRight == 0){ header.clipQualRight = read.bases.length(); } for (int i = 0; i < (header.clipQualLeft-1); i++) { read.bases[i] = tolower(read.bases[i]); } for (int i = (header.clipQualLeft-1); i < (header.clipQualRight-1); i++) { read.bases[i] = toupper(read.bases[i]); } for (int i = (header.clipQualRight-1); i < read.bases.length(); i++) { read.bases[i] = tolower(read.bases[i]); } @@ -586,18 +713,27 @@ int SffInfoCommand::printFastaSeqData(ofstream& out, seqRead& read, Header& head string seq = read.bases; if (trim) { - seq = seq.substr((header.clipQualLeft-1), (header.clipQualRight-header.clipQualLeft+1)); + if(header.clipQualRight < header.clipQualLeft){ + seq = "NNNN"; + } + else if((header.clipQualRight != 0) && ((header.clipQualRight-header.clipQualLeft) >= 0)){ + seq = seq.substr((header.clipQualLeft-1), (header.clipQualRight-header.clipQualLeft)); + } + else { + seq = seq.substr(header.clipQualLeft-1); + } }else{ //if you wanted the sfftxt then you already converted the bases to the right case if (!sfftxt) { //make the bases you want to clip lowercase and the bases you want to keep upper case + if(header.clipQualRight == 0){ header.clipQualRight = seq.length(); } for (int i = 0; i < (header.clipQualLeft-1); i++) { seq[i] = tolower(seq[i]); } for (int i = (header.clipQualLeft-1); i < (header.clipQualRight-1); i++) { seq[i] = toupper(seq[i]); } for (int i = (header.clipQualRight-1); i < seq.length(); i++) { seq[i] = tolower(seq[i]); } } } - out << ">" << header.name << endl; + out << ">" << header.name << " xy=" << header.xy << endl; out << seq << endl; return 0; @@ -613,10 +749,19 @@ int SffInfoCommand::printQualSeqData(ofstream& out, seqRead& read, Header& heade try { if (trim) { - out << ">" << header.name << " length=" << (header.clipQualRight-header.clipQualLeft+1) << endl; - for (int i = (header.clipQualLeft-1); i < (header.clipQualRight-1); i++) { out << read.qualScores[i] << '\t'; } + if(header.clipQualRight < header.clipQualLeft){ + out << "0\t0\t0\t0"; + } + else if((header.clipQualRight != 0) && ((header.clipQualRight-header.clipQualLeft) >= 0)){ + out << ">" << header.name << " xy=" << header.xy << " length=" << (header.clipQualRight-header.clipQualLeft) << endl; + for (int i = (header.clipQualLeft-1); i < (header.clipQualRight-1); i++) { out << read.qualScores[i] << '\t'; } + } + else{ + out << ">" << header.name << " xy=" << header.xy << " length=" << (header.clipQualRight-header.clipQualLeft) << endl; + for (int i = (header.clipQualLeft-1); i < read.qualScores.size(); i++) { out << read.qualScores[i] << '\t'; } + } }else{ - out << ">" << header.name << " length=" << read.qualScores.size() << endl; + out << ">" << header.name << " xy=" << header.xy << " length=" << read.qualScores.size() << endl; for (int i = 0; i < read.qualScores.size(); i++) { out << read.qualScores[i] << '\t'; } } @@ -634,7 +779,7 @@ int SffInfoCommand::printQualSeqData(ofstream& out, seqRead& read, Header& heade int SffInfoCommand::printFlowSeqData(ofstream& out, seqRead& read, Header& header) { try { - out << ">" << header.name << endl; + out << ">" << header.name << " xy=" << header.xy << endl; for (int i = 0; i < read.flowgram.size(); i++) { out << setprecision(2) << (read.flowgram[i]/(float)100) << '\t'; } out << endl; @@ -652,11 +797,11 @@ int SffInfoCommand::readAccnosFile(string filename) { seqNames.clear(); ifstream in; - openInputFile(filename, in); + m->openInputFile(filename, in); string name; while(!in.eof()){ - in >> name; gobble(in); + in >> name; m->gobble(in); seqNames.insert(name);