X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=sffinfocommand.cpp;h=a691a405ddb31fb022ebf37c98b40deb8e54c6a5;hb=bd93b1a6f9fe9a6a4a7ac2e9f106e5c83a438856;hp=885aadc9c05eb34ba16d0dca6e48322dc452e5af;hpb=dfae916a398508554d35c6b3c8002b69becb53be;p=mothur.git diff --git a/sffinfocommand.cpp b/sffinfocommand.cpp index 885aadc..a691a40 100644 --- a/sffinfocommand.cpp +++ b/sffinfocommand.cpp @@ -43,22 +43,32 @@ SffInfoCommand::SffInfoCommand(string option) { sffFilename = validParameter.validFile(parameters, "sff", false); if (sffFilename == "not found") { m->mothurOut("sff is a required parameter for the sffinfo command."); m->mothurOutEndLine(); abort = true; } else { - splitAtDash(sffFilename, filenames); + m->splitAtDash(sffFilename, filenames); //go through files and make sure they are good, if not, then disregard them for (int i = 0; i < filenames.size(); i++) { if (inputDir != "") { - string path = hasPath(filenames[i]); + string path = m->hasPath(filenames[i]); //if the user has not given a path then, add inputdir. else leave path alone. if (path == "") { filenames[i] = inputDir + filenames[i]; } } ifstream in; - int ableToOpen = openInputFile(filenames[i], in); + int ableToOpen = m->openInputFile(filenames[i], in, "noerror"); + + //if you can't open it, try default location + if (ableToOpen == 1) { + if (m->getDefaultPath() != "") { //default path is set + string tryPath = m->getDefaultPath() + m->getSimpleName(filenames[i]); + m->mothurOut("Unable to open " + filenames[i] + ". Trying default " + tryPath); m->mothurOutEndLine(); + ableToOpen = m->openInputFile(tryPath, in, "noerror"); + filenames[i] = tryPath; + } + } in.close(); if (ableToOpen == 1) { - m->mothurOut(filenames[i] + " will be disregarded."); m->mothurOutEndLine(); + m->mothurOut("Unable to open " + filenames[i] + ". It will be disregarded."); m->mothurOutEndLine(); //erase from file list filenames.erase(filenames.begin()+i); i--; @@ -73,22 +83,32 @@ SffInfoCommand::SffInfoCommand(string option) { if (accnosName == "not found") { accnosName = ""; } else { hasAccnos = true; - splitAtDash(accnosName, accnosFileNames); + m->splitAtDash(accnosName, accnosFileNames); //go through files and make sure they are good, if not, then disregard them for (int i = 0; i < accnosFileNames.size(); i++) { if (inputDir != "") { - string path = hasPath(accnosFileNames[i]); + string path = m->hasPath(accnosFileNames[i]); //if the user has not given a path then, add inputdir. else leave path alone. if (path == "") { accnosFileNames[i] = inputDir + accnosFileNames[i]; } } ifstream in; - int ableToOpen = openInputFile(accnosFileNames[i], in); + int ableToOpen = m->openInputFile(accnosFileNames[i], in, "noerror"); + + //if you can't open it, try default location + if (ableToOpen == 1) { + if (m->getDefaultPath() != "") { //default path is set + string tryPath = m->getDefaultPath() + m->getSimpleName(accnosFileNames[i]); + m->mothurOut("Unable to open " + accnosFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine(); + ableToOpen = m->openInputFile(tryPath, in, "noerror"); + accnosFileNames[i] = tryPath; + } + } in.close(); if (ableToOpen == 1) { - m->mothurOut(accnosFileNames[i] + " will be disregarded."); m->mothurOutEndLine(); + m->mothurOut("Unable to open " + accnosFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); //erase from file list accnosFileNames.erase(accnosFileNames.begin()+i); i--; @@ -104,19 +124,19 @@ SffInfoCommand::SffInfoCommand(string option) { } string temp = validParameter.validFile(parameters, "qfile", false); if (temp == "not found"){ temp = "T"; } - qual = isTrue(temp); + qual = m->isTrue(temp); temp = validParameter.validFile(parameters, "fasta", false); if (temp == "not found"){ temp = "T"; } - fasta = isTrue(temp); + fasta = m->isTrue(temp); temp = validParameter.validFile(parameters, "flow", false); if (temp == "not found"){ temp = "F"; } - flow = isTrue(temp); + flow = m->isTrue(temp); temp = validParameter.validFile(parameters, "trim", false); if (temp == "not found"){ temp = "T"; } - trim = isTrue(temp); + trim = m->isTrue(temp); temp = validParameter.validFile(parameters, "sfftxt", false); if (temp == "not found"){ temp = "F"; } - sfftxt = isTrue(temp); + sfftxt = m->isTrue(temp); } } catch(exception& e) { @@ -128,9 +148,16 @@ SffInfoCommand::SffInfoCommand(string option) { void SffInfoCommand::help(){ try { - m->mothurOut("The sffinfo command reads a sff file and outputs a .sff.txt file.\n"); - - m->mothurOut("Example sffinfo(sff=...).\n"); + m->mothurOut("The sffinfo command reads a sff file and extracts the sequence data.\n"); + m->mothurOut("The sffinfo command parameters are sff, fasta, qfile, accnos, flow, sfftxt, and trim. sff is required. \n"); + m->mothurOut("The sff parameter allows you to enter the sff file you would like to extract data from. You may enter multiple files by separating them by -'s.\n"); + m->mothurOut("The fasta parameter allows you to indicate if you would like a fasta formatted file generated. Default=True. \n"); + m->mothurOut("The qfile parameter allows you to indicate if you would like a quality file generated. Default=True. \n"); + m->mothurOut("The flow parameter allows you to indicate if you would like a flowgram file generated. Default=False. \n"); + m->mothurOut("The sfftxt parameter allows you to indicate if you would like a sff.txt file generated. Default=False. \n"); + m->mothurOut("The trim parameter allows you to indicate if you would like a sequences and quality scores trimmed to the clipQualLeft and clipQualRight values. Default=True. \n"); + m->mothurOut("The accnos parameter allows you to provide a accnos file containing the names of the sequences you would like extracted. You may enter multiple files by separating them by -'s. \n"); + m->mothurOut("Example sffinfo(sff=mySffFile.sff, trim=F).\n"); m->mothurOut("Note: No spaces between parameter labels (i.e. sff), '=' and parameters (i.e.yourSffFileName).\n\n"); } catch(exception& e) { @@ -183,27 +210,27 @@ int SffInfoCommand::execute(){ int SffInfoCommand::extractSffInfo(string input, string accnos){ try { - if (outputDir == "") { outputDir += hasPath(input); } + if (outputDir == "") { outputDir += m->hasPath(input); } if (accnos != "") { readAccnosFile(accnos); } else { seqNames.clear(); } ofstream outSfftxt, outFasta, outQual, outFlow; string outFastaFileName, outQualFileName; - string sfftxtFileName = outputDir + getRootName(getSimpleName(input)) + "sff.txt"; - string outFlowFileName = outputDir + getRootName(getSimpleName(input)) + "flow"; + string sfftxtFileName = outputDir + m->getRootName(m->getSimpleName(input)) + "sff.txt"; + string outFlowFileName = outputDir + m->getRootName(m->getSimpleName(input)) + "flow"; if (trim) { - outFastaFileName = outputDir + getRootName(getSimpleName(input)) + "fasta"; - outQualFileName = outputDir + getRootName(getSimpleName(input)) + "qual"; + outFastaFileName = outputDir + m->getRootName(m->getSimpleName(input)) + "fasta"; + outQualFileName = outputDir + m->getRootName(m->getSimpleName(input)) + "qual"; }else{ - outFastaFileName = outputDir + getRootName(getSimpleName(input)) + "raw.fasta"; - outQualFileName = outputDir + getRootName(getSimpleName(input)) + "raw.qual"; + outFastaFileName = outputDir + m->getRootName(m->getSimpleName(input)) + "raw.fasta"; + outQualFileName = outputDir + m->getRootName(m->getSimpleName(input)) + "raw.qual"; } - if (sfftxt) { openOutputFile(sfftxtFileName, outSfftxt); outSfftxt.setf(ios::fixed, ios::floatfield); outSfftxt.setf(ios::showpoint); outputNames.push_back(sfftxtFileName); } - if (fasta) { openOutputFile(outFastaFileName, outFasta); outputNames.push_back(outFastaFileName); } - if (qual) { openOutputFile(outQualFileName, outQual); outputNames.push_back(outQualFileName); } - if (flow) { openOutputFile(outFlowFileName, outFlow); outputNames.push_back(outFlowFileName); } + if (sfftxt) { m->openOutputFile(sfftxtFileName, outSfftxt); outSfftxt.setf(ios::fixed, ios::floatfield); outSfftxt.setf(ios::showpoint); outputNames.push_back(sfftxtFileName); } + if (fasta) { m->openOutputFile(outFastaFileName, outFasta); outputNames.push_back(outFastaFileName); } + if (qual) { m->openOutputFile(outQualFileName, outQual); outputNames.push_back(outQualFileName); } + if (flow) { m->openOutputFile(outFlowFileName, outFlow); outputNames.push_back(outFlowFileName); } ifstream in; in.open(input.c_str(), ios::binary); @@ -219,7 +246,7 @@ int SffInfoCommand::extractSffInfo(string input, string accnos){ //print common header if (sfftxt) { printCommonHeader(outSfftxt, header); } - + //read through the sff file while (!in.eof()) { @@ -232,22 +259,22 @@ int SffInfoCommand::extractSffInfo(string input, string accnos){ //read data seqRead read; readSeqData(in, read, header.numFlowsPerRead, readheader.numBases); - + //if you have provided an accosfile and this seq is not in it, then dont print if (seqNames.size() != 0) { if (seqNames.count(readheader.name) == 0) { print = false; } } //print if (print) { - if (sfftxt) { printHeader(outSfftxt, readheader); printSffTxtSeqData(outSfftxt, read); } + if (sfftxt) { printHeader(outSfftxt, readheader); printSffTxtSeqData(outSfftxt, read, readheader); } if (fasta) { printFastaSeqData(outFasta, read, readheader); } if (qual) { printQualSeqData(outQual, read, readheader); } if (flow) { printFlowSeqData(outFlow, read, readheader); } } count++; - + //report progress - if((count+1) % 500 == 0){ m->mothurOut(toString(count+1)); m->mothurOutEndLine(); } + if((count+1) % 10000 == 0){ m->mothurOut(toString(count+1)); m->mothurOutEndLine(); } if (m->control_pressed) { count = 0; break; } @@ -255,7 +282,7 @@ int SffInfoCommand::extractSffInfo(string input, string accnos){ } //report progress - if (!m->control_pressed) { if((count) % 500 != 0){ m->mothurOut(toString(count)); m->mothurOutEndLine(); } } + if (!m->control_pressed) { if((count) % 10000 != 0){ m->mothurOut(toString(count)); m->mothurOutEndLine(); } } in.close(); @@ -276,12 +303,12 @@ int SffInfoCommand::readCommonHeader(ifstream& in, CommonHeader& header){ try { if (!in.eof()) { - + //read magic number - char buffer[sizeof(header.magicNumber)]; - in.read(buffer, sizeof(header.magicNumber)); + char buffer[4]; + in.read(buffer, 4); header.magicNumber = be_int4(*(unsigned int *)(&buffer)); - + //read version char buffer9[4]; in.read(buffer9, 4); @@ -289,33 +316,33 @@ int SffInfoCommand::readCommonHeader(ifstream& in, CommonHeader& header){ for (int i = 0; i < 4; i++) { header.version += toString((int)(buffer9[i])); } //read offset - char buffer2 [sizeof(header.indexOffset)]; - in.read(buffer2, sizeof(header.indexOffset)); + char buffer2 [8]; + in.read(buffer2, 8); header.indexOffset = be_int8(*(unsigned long int *)(&buffer2)); //read index length - char buffer3 [sizeof(header.indexLength)]; - in.read(buffer3, sizeof(header.indexLength)); + char buffer3 [4]; + in.read(buffer3, 4); header.indexLength = be_int4(*(unsigned int *)(&buffer3)); //read num reads - char buffer4 [sizeof(header.numReads)]; - in.read(buffer4, sizeof(header.numReads)); + char buffer4 [4]; + in.read(buffer4, 4); header.numReads = be_int4(*(unsigned int *)(&buffer4)); //read header length - char buffer5 [sizeof(header.headerLength)]; - in.read(buffer5, sizeof(header.headerLength)); + char buffer5 [2]; + in.read(buffer5, 2); header.headerLength = be_int2(*(unsigned short *)(&buffer5)); //read key length - char buffer6 [sizeof(header.keyLength)]; - in.read(buffer6, sizeof(header.keyLength)); + char buffer6 [2]; + in.read(buffer6, 2); header.keyLength = be_int2(*(unsigned short *)(&buffer6)); //read number of flow reads - char buffer7 [sizeof(header.numFlowsPerRead)]; - in.read(buffer7, sizeof(header.numFlowsPerRead)); + char buffer7 [2]; + in.read(buffer7, 2); header.numFlowsPerRead = be_int2(*(unsigned short *)(&buffer7)); //read format code @@ -324,20 +351,22 @@ int SffInfoCommand::readCommonHeader(ifstream& in, CommonHeader& header){ header.flogramFormatCode = (int)(buffer8[0]); //read flow chars - char tempBuffer [header.numFlowsPerRead]; - in.read(tempBuffer, header.numFlowsPerRead); + char* tempBuffer = new char[header.numFlowsPerRead]; + in.read(&(*tempBuffer), header.numFlowsPerRead); header.flowChars = tempBuffer; if (header.flowChars.length() > header.numFlowsPerRead) { header.flowChars = header.flowChars.substr(0, header.numFlowsPerRead); } + delete[] tempBuffer; //read key - char tempBuffer2 [header.keyLength]; - in.read(tempBuffer2, header.keyLength); + char* tempBuffer2 = new char[header.keyLength]; + in.read(&(*tempBuffer2), header.keyLength); header.keySequence = tempBuffer2; if (header.keySequence.length() > header.keyLength) { header.keySequence = header.keySequence.substr(0, header.keyLength); } + delete[] tempBuffer2; /* Pad to 8 chars */ - int spotInFile = in.tellg(); - int spot = (spotInFile + 7)& ~7; // ~ inverts + unsigned long int spotInFile = in.tellg(); + unsigned long int spot = (spotInFile + 7)& ~7; // ~ inverts in.seekg(spot); }else{ @@ -358,49 +387,51 @@ int SffInfoCommand::readHeader(ifstream& in, Header& header){ if (!in.eof()) { //read header length - char buffer [sizeof(header.headerLength)]; - in.read(buffer, sizeof(header.headerLength)); + char buffer [2]; + in.read(buffer, 2); header.headerLength = be_int2(*(unsigned short *)(&buffer)); //read name length - char buffer2 [sizeof(header.nameLength)]; - in.read(buffer2, sizeof(header.nameLength)); + char buffer2 [2]; + in.read(buffer2, 2); header.nameLength = be_int2(*(unsigned short *)(&buffer2)); //read num bases - char buffer3 [sizeof(header.numBases)]; - in.read(buffer3, sizeof(header.numBases)); + char buffer3 [4]; + in.read(buffer3, 4); header.numBases = be_int4(*(unsigned int *)(&buffer3)); //read clip qual left - char buffer4 [sizeof(header.clipQualLeft)]; - in.read(buffer4, sizeof(header.clipQualLeft)); + char buffer4 [2]; + in.read(buffer4, 2); header.clipQualLeft = be_int2(*(unsigned short *)(&buffer4)); + header.clipQualLeft = 5; //read clip qual right - char buffer5 [sizeof(header.clipQualRight)]; - in.read(buffer5, sizeof(header.clipQualRight)); + char buffer5 [2]; + in.read(buffer5, 2); header.clipQualRight = be_int2(*(unsigned short *)(&buffer5)); //read clipAdapterLeft - char buffer6 [sizeof(header.clipAdapterLeft)]; - in.read(buffer6, sizeof(header.clipAdapterLeft)); + char buffer6 [2]; + in.read(buffer6, 2); header.clipAdapterLeft = be_int2(*(unsigned short *)(&buffer6)); //read clipAdapterRight - char buffer7 [sizeof(header.clipAdapterRight)]; - in.read(buffer7, sizeof(header.clipAdapterRight)); + char buffer7 [2]; + in.read(buffer7, 2); header.clipAdapterRight = be_int2(*(unsigned short *)(&buffer7)); //read name - char tempBuffer [header.nameLength]; - in.read(tempBuffer, header.nameLength); + char* tempBuffer = new char[header.nameLength]; + in.read(&(*tempBuffer), header.nameLength); header.name = tempBuffer; if (header.name.length() > header.nameLength) { header.name = header.name.substr(0, header.nameLength); } + delete[] tempBuffer; /* Pad to 8 chars */ - int spotInFile = in.tellg(); - int spot = (spotInFile + 7)& ~7; + unsigned long int spotInFile = in.tellg(); + unsigned long int spot = (spotInFile + 7)& ~7; in.seekg(spot); }else{ @@ -423,8 +454,8 @@ int SffInfoCommand::readSeqData(ifstream& in, seqRead& read, int numFlowReads, i //read flowgram read.flowgram.resize(numFlowReads); for (int i = 0; i < numFlowReads; i++) { - char buffer [sizeof(unsigned short)]; - in.read(buffer, (sizeof(unsigned short))); + char buffer [2]; + in.read(buffer, 2); read.flowgram[i] = be_int2(*(unsigned short *)(&buffer)); } @@ -435,24 +466,25 @@ int SffInfoCommand::readSeqData(ifstream& in, seqRead& read, int numFlowReads, i in.read(temp, 1); read.flowIndex[i] = be_int1(*(unsigned char *)(&temp)); } - + //read bases - char tempBuffer[numBases]; - in.read(tempBuffer, numBases); + char* tempBuffer = new char[numBases]; + in.read(&(*tempBuffer), numBases); read.bases = tempBuffer; if (read.bases.length() > numBases) { read.bases = read.bases.substr(0, numBases); } + delete[] tempBuffer; - //read flowgram + //read qual scores read.qualScores.resize(numBases); for (int i = 0; i < numBases; i++) { char temp[1]; in.read(temp, 1); read.qualScores[i] = be_int1(*(unsigned char *)(&temp)); } - + /* Pad to 8 chars */ - int spotInFile = in.tellg(); - int spot = (spotInFile + 7)& ~7; + unsigned long int spotInFile = in.tellg(); + unsigned long int spot = (spotInFile + 7)& ~7; in.seekg(spot); }else{ @@ -519,18 +551,26 @@ int SffInfoCommand::printHeader(ofstream& out, Header& header) { } //********************************************************************************************************************** -int SffInfoCommand::printSffTxtSeqData(ofstream& out, seqRead& read) { +int SffInfoCommand::printSffTxtSeqData(ofstream& out, seqRead& read, Header& header) { try { - out << "FlowGram: "; + out << "Flowgram: "; for (int i = 0; i < read.flowgram.size(); i++) { out << setprecision(2) << (read.flowgram[i]/(float)100) << '\t'; } out << endl << "Flow Indexes: "; int sum = 0; for (int i = 0; i < read.flowIndex.size(); i++) { sum += read.flowIndex[i]; out << sum << '\t'; } + //make the bases you want to clip lowercase and the bases you want to keep upper case + if(header.clipQualRight == 0){ header.clipQualRight = read.bases.length(); } + for (int i = 0; i < (header.clipQualLeft-1); i++) { read.bases[i] = tolower(read.bases[i]); } + for (int i = (header.clipQualLeft-1); i < (header.clipQualRight-1); i++) { read.bases[i] = toupper(read.bases[i]); } + for (int i = (header.clipQualRight-1); i < read.bases.length(); i++) { read.bases[i] = tolower(read.bases[i]); } + out << endl << "Bases: " << read.bases << endl << "Quality Scores: "; for (int i = 0; i < read.qualScores.size(); i++) { out << read.qualScores[i] << '\t'; } + + out << endl << endl; return 0; @@ -546,9 +586,25 @@ int SffInfoCommand::printFastaSeqData(ofstream& out, seqRead& read, Header& head string seq = read.bases; - if (trim) { - seq = seq.substr(header.clipQualLeft, (header.clipQualRight-header.clipQualLeft)); + if(header.clipQualRight < header.clipQualLeft){ + seq = "NNNN"; + } + else if((header.clipQualRight != 0) && ((header.clipQualRight-header.clipQualLeft) >= 0)){ + seq = seq.substr((header.clipQualLeft-1), (header.clipQualRight-header.clipQualLeft)); + } + else { + seq = seq.substr(header.clipQualLeft-1); + } + }else{ + //if you wanted the sfftxt then you already converted the bases to the right case + if (!sfftxt) { + //make the bases you want to clip lowercase and the bases you want to keep upper case + if(header.clipQualRight == 0){ header.clipQualRight = seq.length(); } + for (int i = 0; i < (header.clipQualLeft-1); i++) { seq[i] = tolower(seq[i]); } + for (int i = (header.clipQualLeft-1); i < (header.clipQualRight-1); i++) { seq[i] = toupper(seq[i]); } + for (int i = (header.clipQualRight-1); i < seq.length(); i++) { seq[i] = tolower(seq[i]); } + } } out << ">" << header.name << endl; @@ -567,8 +623,17 @@ int SffInfoCommand::printQualSeqData(ofstream& out, seqRead& read, Header& heade try { if (trim) { - out << ">" << header.name << " length=" << (header.clipQualRight-header.clipQualLeft) << endl; - for (int i = header.clipQualLeft; i < (header.clipQualRight-header.clipQualLeft); i++) { out << read.qualScores[i] << '\t'; } + if(header.clipQualRight < header.clipQualLeft){ + out << "0\t0\t0\t0"; + } + else if((header.clipQualRight != 0) && ((header.clipQualRight-header.clipQualLeft) >= 0)){ + out << ">" << header.name << " length=" << (header.clipQualRight-header.clipQualLeft) << endl; + for (int i = (header.clipQualLeft-1); i < (header.clipQualRight-1); i++) { out << read.qualScores[i] << '\t'; } + } + else{ + out << ">" << header.name << " length=" << (header.clipQualRight-header.clipQualLeft) << endl; + for (int i = (header.clipQualLeft-1); i < read.qualScores.size(); i++) { out << read.qualScores[i] << '\t'; } + } }else{ out << ">" << header.name << " length=" << read.qualScores.size() << endl; for (int i = 0; i < read.qualScores.size(); i++) { out << read.qualScores[i] << '\t'; } @@ -606,11 +671,11 @@ int SffInfoCommand::readAccnosFile(string filename) { seqNames.clear(); ifstream in; - openInputFile(filename, in); + m->openInputFile(filename, in); string name; while(!in.eof()){ - in >> name; gobble(in); + in >> name; m->gobble(in); seqNames.insert(name);