X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=sffinfocommand.cpp;h=11754469fc3543c2ac544304606c507755f7ac41;hb=df41d90a9611eab75d0516e2654ed8580f8df24c;hp=2d6af87eea323f62583a880e2b6778a34120fa00;hpb=956cdff34f2d609a7736838b1631cd7957580b8b;p=mothur.git diff --git a/sffinfocommand.cpp b/sffinfocommand.cpp index 2d6af87..1175446 100644 --- a/sffinfocommand.cpp +++ b/sffinfocommand.cpp @@ -10,18 +10,69 @@ #include "sffinfocommand.h" #include "endiannessmacros.h" +//********************************************************************************************************************** +vector SffInfoCommand::getValidParameters(){ + try { + string Array[] = {"sff","qfile","fasta","flow","trim","accnos","sfftxt","outputdir","inputdir", "outputdir"}; + vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); + return myArray; + } + catch(exception& e) { + m->errorOut(e, "SffInfoCommand", "getValidParameters"); + exit(1); + } +} +//********************************************************************************************************************** +SffInfoCommand::SffInfoCommand(){ + try { + //initialize outputTypes + vector tempOutNames; + outputTypes["fasta"] = tempOutNames; + outputTypes["flow"] = tempOutNames; + outputTypes["sfftxt"] = tempOutNames; + outputTypes["qual"] = tempOutNames; + } + catch(exception& e) { + m->errorOut(e, "SffInfoCommand", "SffInfoCommand"); + exit(1); + } +} +//********************************************************************************************************************** +vector SffInfoCommand::getRequiredParameters(){ + try { + string Array[] = {"sff"}; + vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); + return myArray; + } + catch(exception& e) { + m->errorOut(e, "SffInfoCommand", "getRequiredParameters"); + exit(1); + } +} +//********************************************************************************************************************** +vector SffInfoCommand::getRequiredFiles(){ + try { + vector myArray; + return myArray; + } + catch(exception& e) { + m->errorOut(e, "SffInfoCommand", "getRequiredFiles"); + exit(1); + } +} //********************************************************************************************************************** SffInfoCommand::SffInfoCommand(string option) { try { abort = false; + hasAccnos = false; //allow user to run help if(option == "help") { help(); abort = true; } else { //valid paramters for this command - string Array[] = {"sff","outputdir","inputdir", "outputdir"}; + string Array[] = {"sff","qfile","fasta","flow","trim","accnos","sfftxt","outputdir","inputdir", "outputdir"}; vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); OptionParser parser(option); @@ -33,6 +84,13 @@ SffInfoCommand::SffInfoCommand(string option) { if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; } } + //initialize outputTypes + vector tempOutNames; + outputTypes["fasta"] = tempOutNames; + outputTypes["flow"] = tempOutNames; + outputTypes["sfftxt"] = tempOutNames; + outputTypes["qual"] = tempOutNames; + //if the user changes the output directory command factory will send this info to us in the output parameter outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; } @@ -42,22 +100,47 @@ SffInfoCommand::SffInfoCommand(string option) { sffFilename = validParameter.validFile(parameters, "sff", false); if (sffFilename == "not found") { m->mothurOut("sff is a required parameter for the sffinfo command."); m->mothurOutEndLine(); abort = true; } else { - splitAtDash(sffFilename, filenames); + m->splitAtDash(sffFilename, filenames); //go through files and make sure they are good, if not, then disregard them for (int i = 0; i < filenames.size(); i++) { if (inputDir != "") { - string path = hasPath(filenames[i]); + string path = m->hasPath(filenames[i]); //if the user has not given a path then, add inputdir. else leave path alone. if (path == "") { filenames[i] = inputDir + filenames[i]; } } ifstream in; - int ableToOpen = openInputFile(filenames[i], in); + int ableToOpen = m->openInputFile(filenames[i], in, "noerror"); + + //if you can't open it, try default location + if (ableToOpen == 1) { + if (m->getDefaultPath() != "") { //default path is set + string tryPath = m->getDefaultPath() + m->getSimpleName(filenames[i]); + m->mothurOut("Unable to open " + filenames[i] + ". Trying default " + tryPath); m->mothurOutEndLine(); + ifstream in2; + ableToOpen = m->openInputFile(tryPath, in2, "noerror"); + in2.close(); + filenames[i] = tryPath; + } + } + + //if you can't open it, try default location + if (ableToOpen == 1) { + if (m->getOutputDir() != "") { //default path is set + string tryPath = m->getOutputDir() + m->getSimpleName(filenames[i]); + m->mothurOut("Unable to open " + filenames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine(); + ifstream in2; + ableToOpen = m->openInputFile(tryPath, in2, "noerror"); + in2.close(); + filenames[i] = tryPath; + } + } + in.close(); if (ableToOpen == 1) { - m->mothurOut(filenames[i] + " will be disregarded."); m->mothurOutEndLine(); + m->mothurOut("Unable to open " + filenames[i] + ". It will be disregarded."); m->mothurOutEndLine(); //erase from file list filenames.erase(filenames.begin()+i); i--; @@ -67,6 +150,78 @@ SffInfoCommand::SffInfoCommand(string option) { //make sure there is at least one valid file left if (filenames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; } } + + accnosName = validParameter.validFile(parameters, "accnos", false); + if (accnosName == "not found") { accnosName = ""; } + else { + hasAccnos = true; + m->splitAtDash(accnosName, accnosFileNames); + + //go through files and make sure they are good, if not, then disregard them + for (int i = 0; i < accnosFileNames.size(); i++) { + if (inputDir != "") { + string path = m->hasPath(accnosFileNames[i]); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { accnosFileNames[i] = inputDir + accnosFileNames[i]; } + } + + ifstream in; + int ableToOpen = m->openInputFile(accnosFileNames[i], in, "noerror"); + + //if you can't open it, try default location + if (ableToOpen == 1) { + if (m->getDefaultPath() != "") { //default path is set + string tryPath = m->getDefaultPath() + m->getSimpleName(accnosFileNames[i]); + m->mothurOut("Unable to open " + accnosFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine(); + ifstream in2; + ableToOpen = m->openInputFile(tryPath, in2, "noerror"); + in2.close(); + accnosFileNames[i] = tryPath; + } + } + //if you can't open it, try default location + if (ableToOpen == 1) { + if (m->getOutputDir() != "") { //default path is set + string tryPath = m->getOutputDir() + m->getSimpleName(accnosFileNames[i]); + m->mothurOut("Unable to open " + accnosFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine(); + ifstream in2; + ableToOpen = m->openInputFile(tryPath, in2, "noerror"); + in2.close(); + accnosFileNames[i] = tryPath; + } + } + in.close(); + + if (ableToOpen == 1) { + m->mothurOut("Unable to open " + accnosFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); + //erase from file list + accnosFileNames.erase(accnosFileNames.begin()+i); + i--; + } + } + + //make sure there is at least one valid file left + if (accnosFileNames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; } + } + + if (hasAccnos) { + if (accnosFileNames.size() != filenames.size()) { abort = true; m->mothurOut("If you provide a accnos file, you must have one for each sff file."); m->mothurOutEndLine(); } + } + + string temp = validParameter.validFile(parameters, "qfile", false); if (temp == "not found"){ temp = "T"; } + qual = m->isTrue(temp); + + temp = validParameter.validFile(parameters, "fasta", false); if (temp == "not found"){ temp = "T"; } + fasta = m->isTrue(temp); + + temp = validParameter.validFile(parameters, "flow", false); if (temp == "not found"){ temp = "F"; } + flow = m->isTrue(temp); + + temp = validParameter.validFile(parameters, "trim", false); if (temp == "not found"){ temp = "T"; } + trim = m->isTrue(temp); + + temp = validParameter.validFile(parameters, "sfftxt", false); if (temp == "not found"){ temp = "F"; } + sfftxt = m->isTrue(temp); } } catch(exception& e) { @@ -78,9 +233,16 @@ SffInfoCommand::SffInfoCommand(string option) { void SffInfoCommand::help(){ try { - m->mothurOut("The sffinfo command reads a sff file and outputs a .sff.txt file.\n"); - - m->mothurOut("Example sffinfo(sff=...).\n"); + m->mothurOut("The sffinfo command reads a sff file and extracts the sequence data.\n"); + m->mothurOut("The sffinfo command parameters are sff, fasta, qfile, accnos, flow, sfftxt, and trim. sff is required. \n"); + m->mothurOut("The sff parameter allows you to enter the sff file you would like to extract data from. You may enter multiple files by separating them by -'s.\n"); + m->mothurOut("The fasta parameter allows you to indicate if you would like a fasta formatted file generated. Default=True. \n"); + m->mothurOut("The qfile parameter allows you to indicate if you would like a quality file generated. Default=True. \n"); + m->mothurOut("The flow parameter allows you to indicate if you would like a flowgram file generated. Default=False. \n"); + m->mothurOut("The sfftxt parameter allows you to indicate if you would like a sff.txt file generated. Default=False. \n"); + m->mothurOut("The trim parameter allows you to indicate if you would like a sequences and quality scores trimmed to the clipQualLeft and clipQualRight values. Default=True. \n"); + m->mothurOut("The accnos parameter allows you to provide a accnos file containing the names of the sequences you would like extracted. You may enter multiple files by separating them by -'s. \n"); + m->mothurOut("Example sffinfo(sff=mySffFile.sff, trim=F).\n"); m->mothurOut("Note: No spaces between parameter labels (i.e. sff), '=' and parameters (i.e.yourSffFileName).\n\n"); } catch(exception& e) { @@ -102,16 +264,16 @@ int SffInfoCommand::execute(){ if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; } + int start = time(NULL); + m->mothurOut("Extracting info from " + filenames[s] + " ..." ); m->mothurOutEndLine(); - if (outputDir == "") { outputDir += hasPath(filenames[s]); } - string outputFileName = outputDir + getRootName(getSimpleName(filenames[s])) + "sff.txt"; - - extractSffInfo(filenames[s], outputFileName); + string accnos = ""; + if (hasAccnos) { accnos = accnosFileNames[s]; } - outputNames.push_back(outputFileName); + int numReads = extractSffInfo(filenames[s], accnos); - m->mothurOut("Done."); m->mothurOutEndLine(); + m->mothurOut("It took " + toString(time(NULL) - start) + " secs to extract " + toString(numReads) + "."); } if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; } @@ -130,46 +292,91 @@ int SffInfoCommand::execute(){ } } //********************************************************************************************************************** -int SffInfoCommand::extractSffInfo(string input, string output){ +int SffInfoCommand::extractSffInfo(string input, string accnos){ try { - ofstream out; - openOutputFile(output, out); + + if (outputDir == "") { outputDir += m->hasPath(input); } + + if (accnos != "") { readAccnosFile(accnos); } + else { seqNames.clear(); } + + ofstream outSfftxt, outFasta, outQual, outFlow; + string outFastaFileName, outQualFileName; + string sfftxtFileName = outputDir + m->getRootName(m->getSimpleName(input)) + "sff.txt"; + string outFlowFileName = outputDir + m->getRootName(m->getSimpleName(input)) + "flow"; + if (trim) { + outFastaFileName = outputDir + m->getRootName(m->getSimpleName(input)) + "fasta"; + outQualFileName = outputDir + m->getRootName(m->getSimpleName(input)) + "qual"; + }else{ + outFastaFileName = outputDir + m->getRootName(m->getSimpleName(input)) + "raw.fasta"; + outQualFileName = outputDir + m->getRootName(m->getSimpleName(input)) + "raw.qual"; + } + + if (sfftxt) { m->openOutputFile(sfftxtFileName, outSfftxt); outSfftxt.setf(ios::fixed, ios::floatfield); outSfftxt.setf(ios::showpoint); outputNames.push_back(sfftxtFileName); outputTypes["sfftxt"].push_back(sfftxtFileName); } + if (fasta) { m->openOutputFile(outFastaFileName, outFasta); outputNames.push_back(outFastaFileName); outputTypes["fasta"].push_back(outFastaFileName); } + if (qual) { m->openOutputFile(outQualFileName, outQual); outputNames.push_back(outQualFileName); outputTypes["qual"].push_back(outQualFileName); } + if (flow) { m->openOutputFile(outFlowFileName, outFlow); outputNames.push_back(outFlowFileName); outputTypes["flow"].push_back(outFlowFileName); } ifstream in; in.open(input.c_str(), ios::binary); - CommonHeader* header = readCommonHeader(in); + CommonHeader header; + readCommonHeader(in, header); + + int count = 0; - //cout << "magic = " << header->magicNumber << endl << "version = " << header->version << endl << "index offset = " << header->indexOffset << endl << "index length = "<< header->indexLength << endl << "numreads = " << header->numReads << endl << "header length = " << header->headerLength << endl << "key length = " << header->keyLength << endl; -//cout << "numflowreads = "<< header->numFlowsPerRead << endl << "flow format code = "<< header->flogramFormatCode << endl << "flow chars = " << header->flowChars << endl << "key sequence = " << header->keySequence << endl << endl; - cout << in.tellg() << endl; + //check magic number and version + if (header.magicNumber != 779314790) { m->mothurOut("Magic Number is not correct, not a valid .sff file"); m->mothurOutEndLine(); return count; } + if (header.version != "0001") { m->mothurOut("Version is not supported, only support version 0001."); m->mothurOutEndLine(); return count; } + + //print common header + if (sfftxt) { printCommonHeader(outSfftxt, header); } + //read through the sff file while (!in.eof()) { - - //read header - Header* readheader = readHeader(in); - //read data - seqRead* read = readSeqData(in, header->numFlowsPerRead, readheader->numBases); + bool print = true; - cout << in.tellg() << endl; + //read header + Header readheader; + readHeader(in, readheader); - //print common header - printCommonHeader(out, header, true); + //read data + seqRead read; + readSeqData(in, read, header.numFlowsPerRead, readheader.numBases); + + //if you have provided an accosfile and this seq is not in it, then dont print + if (seqNames.size() != 0) { if (seqNames.count(readheader.name) == 0) { print = false; } } + + //print + if (print) { + if (sfftxt) { printHeader(outSfftxt, readheader); printSffTxtSeqData(outSfftxt, read, readheader); } + if (fasta) { printFastaSeqData(outFasta, read, readheader); } + if (qual) { printQualSeqData(outQual, read, readheader); } + if (flow) { printFlowSeqData(outFlow, read, readheader); } + } - //print header - printHeader(out, readheader, true); + count++; + + //report progress + if((count+1) % 10000 == 0){ m->mothurOut(toString(count+1)); m->mothurOutEndLine(); } + + if (m->control_pressed) { count = 0; break; } - //print data - printSeqData(out, read, true); - + if (count >= header.numReads) { break; } } + //report progress + if (!m->control_pressed) { if((count) % 10000 != 0){ m->mothurOut(toString(count)); m->mothurOutEndLine(); } } in.close(); - out.close(); - return 0; + if (sfftxt) { outSfftxt.close(); } + if (fasta) { outFasta.close(); } + if (qual) { outQual.close(); } + if (flow) { outFlow.close(); } + + return count; } catch(exception& e) { m->errorOut(e, "SffInfoCommand", "extractSffInfo"); @@ -177,84 +384,81 @@ int SffInfoCommand::extractSffInfo(string input, string output){ } } //********************************************************************************************************************** -CommonHeader* SffInfoCommand::readCommonHeader(ifstream& in){ +int SffInfoCommand::readCommonHeader(ifstream& in, CommonHeader& header){ try { - CommonHeader* header = new CommonHeader(); - + if (!in.eof()) { - string tempBuf = ""; - + //read magic number - char* buffer = new char(sizeof(header->magicNumber)); - in.read(buffer, sizeof(header->magicNumber)); - header->magicNumber = be_int4(*(uint32_t *)(buffer)); - delete[] buffer; - + char buffer[4]; + in.read(buffer, 4); + header.magicNumber = be_int4(*(unsigned int *)(&buffer)); + //read version - header->version = new char(4); - in.read(header->version, 4); - tempBuf = buffer; - if (tempBuf.length() > 4) { tempBuf = tempBuf.substr(0, 4); strcpy(header->version, tempBuf.c_str()); } - + char buffer9[4]; + in.read(buffer9, 4); + header.version = ""; + for (int i = 0; i < 4; i++) { header.version += toString((int)(buffer9[i])); } + //read offset - buffer = new char(sizeof(header->indexOffset)); - in.read(buffer, sizeof(header->indexOffset)); - header->indexOffset = be_int8(*(uint64_t *)(buffer)); - delete[] buffer; + char buffer2 [8]; + in.read(buffer2, 8); + header.indexOffset = be_int8(*(unsigned long int *)(&buffer2)); //read index length - buffer = new char(sizeof(header->indexLength)); - in.read(buffer, sizeof(header->indexLength)); - header->indexLength = be_int4(*(uint32_t *)(buffer)); - delete[] buffer; + char buffer3 [4]; + in.read(buffer3, 4); + header.indexLength = be_int4(*(unsigned int *)(&buffer3)); //read num reads - buffer = new char(sizeof(header->numReads)); - in.read(buffer, sizeof(header->numReads)); - header->numReads = be_int4(*(uint32_t *)(buffer)); - delete[] buffer; - + char buffer4 [4]; + in.read(buffer4, 4); + header.numReads = be_int4(*(unsigned int *)(&buffer4)); + //read header length - buffer = new char(sizeof(header->headerLength)); - in.read(buffer, sizeof(header->headerLength)); - header->headerLength = be_int2(*(uint16_t *)(buffer)); - delete[] buffer; - + char buffer5 [2]; + in.read(buffer5, 2); + header.headerLength = be_int2(*(unsigned short *)(&buffer5)); + //read key length - buffer = new char(sizeof(header->keyLength)); - in.read(buffer, sizeof(header->keyLength)); - header->keyLength = be_int2(*(uint16_t *)(buffer)); - delete[] buffer; - - //read number of flow reads - buffer = new char(sizeof(header->numFlowsPerRead)); - in.read(buffer, sizeof(header->numFlowsPerRead)); - header->numFlowsPerRead = be_int2(*(uint16_t *)(buffer)); - delete[] buffer; + char buffer6 [2]; + in.read(buffer6, 2); + header.keyLength = be_int2(*(unsigned short *)(&buffer6)); + //read number of flow reads + char buffer7 [2]; + in.read(buffer7, 2); + header.numFlowsPerRead = be_int2(*(unsigned short *)(&buffer7)); + //read format code - buffer = new char(sizeof(header->flogramFormatCode)); - in.read(buffer, sizeof(header->flogramFormatCode)); - header->flogramFormatCode = be_int1(*(uint8_t *)(buffer)); - delete[] buffer; + char buffer8 [1]; + in.read(buffer8, 1); + header.flogramFormatCode = (int)(buffer8[0]); //read flow chars - header->flowChars = new char(header->numFlowsPerRead); - in.read(header->flowChars, header->numFlowsPerRead); - tempBuf = buffer; - if (tempBuf.length() > header->numFlowsPerRead) { tempBuf = tempBuf.substr(0, header->numFlowsPerRead); strcpy(header->flowChars, tempBuf.c_str()); } + char* tempBuffer = new char[header.numFlowsPerRead]; + in.read(&(*tempBuffer), header.numFlowsPerRead); + header.flowChars = tempBuffer; + if (header.flowChars.length() > header.numFlowsPerRead) { header.flowChars = header.flowChars.substr(0, header.numFlowsPerRead); } + delete[] tempBuffer; //read key - header->keySequence = new char(header->keyLength); - in.read(header->keySequence, header->keyLength); - tempBuf = header->keySequence; - if (tempBuf.length() > header->keyLength) { tempBuf = tempBuf.substr(0, header->keyLength); strcpy(header->keySequence, tempBuf.c_str()); } + char* tempBuffer2 = new char[header.keyLength]; + in.read(&(*tempBuffer2), header.keyLength); + header.keySequence = tempBuffer2; + if (header.keySequence.length() > header.keyLength) { header.keySequence = header.keySequence.substr(0, header.keyLength); } + delete[] tempBuffer2; + + /* Pad to 8 chars */ + unsigned long int spotInFile = in.tellg(); + unsigned long int spot = (spotInFile + 7)& ~7; // ~ inverts + in.seekg(spot); }else{ m->mothurOut("Error reading sff common header."); m->mothurOutEndLine(); } - return header; + return 0; } catch(exception& e) { m->errorOut(e, "SffInfoCommand", "readCommonHeader"); @@ -262,66 +466,64 @@ CommonHeader* SffInfoCommand::readCommonHeader(ifstream& in){ } } //********************************************************************************************************************** -Header* SffInfoCommand::readHeader(ifstream& in){ +int SffInfoCommand::readHeader(ifstream& in, Header& header){ try { - Header* header = new Header(); if (!in.eof()) { - string tempBuf = ""; //read header length - char* buffer = new char(sizeof(header->headerLength)); - in.read(buffer, sizeof(header->headerLength)); - header->headerLength = be_int2(*(unsigned short *)(buffer)); - delete[] buffer; + char buffer [2]; + in.read(buffer, 2); + header.headerLength = be_int2(*(unsigned short *)(&buffer)); //read name length - buffer = new char(sizeof(header->nameLength)); - in.read(buffer, sizeof(header->nameLength)); - header->nameLength = be_int2(*(unsigned short *)(buffer)); - delete[] buffer; + char buffer2 [2]; + in.read(buffer2, 2); + header.nameLength = be_int2(*(unsigned short *)(&buffer2)); //read num bases - buffer = new char(sizeof(header->numBases)); - in.read(buffer, sizeof(header->numBases)); - header->numBases = be_int4(*(unsigned int *)(buffer)); - delete[] buffer; + char buffer3 [4]; + in.read(buffer3, 4); + header.numBases = be_int4(*(unsigned int *)(&buffer3)); //read clip qual left - buffer = new char(sizeof(header->clipQualLeft)); - in.read(buffer, sizeof(header->clipQualLeft)); - header->clipQualLeft = be_int2(*(unsigned short *)(buffer)); - delete[] buffer; + char buffer4 [2]; + in.read(buffer4, 2); + header.clipQualLeft = be_int2(*(unsigned short *)(&buffer4)); + header.clipQualLeft = 5; //read clip qual right - buffer = new char(sizeof(header->clipQualRight)); - in.read(buffer, sizeof(header->clipQualRight)); - header->clipQualRight = be_int2(*(unsigned short *)(buffer)); - delete[] buffer; + char buffer5 [2]; + in.read(buffer5, 2); + header.clipQualRight = be_int2(*(unsigned short *)(&buffer5)); //read clipAdapterLeft - buffer = new char(sizeof(header->clipAdapterLeft)); - in.read(buffer, sizeof(header->clipAdapterLeft)); - header->clipAdapterLeft = be_int2(*(unsigned short *)(buffer)); - delete[] buffer; + char buffer6 [2]; + in.read(buffer6, 2); + header.clipAdapterLeft = be_int2(*(unsigned short *)(&buffer6)); //read clipAdapterRight - buffer = new char(sizeof(header->clipAdapterRight)); - in.read(buffer, sizeof(header->clipAdapterRight)); - header->clipAdapterRight = be_int2(*(unsigned short *)(buffer)); - delete[] buffer; - + char buffer7 [2]; + in.read(buffer7, 2); + header.clipAdapterRight = be_int2(*(unsigned short *)(&buffer7)); + //read name - header->name = new char(header->nameLength); - in.read(header->name, header->nameLength); - tempBuf = header->name; - if (tempBuf.length() > header->nameLength) { tempBuf = tempBuf.substr(0, header->nameLength); strcpy(header->name, tempBuf.c_str()); } - + char* tempBuffer = new char[header.nameLength]; + in.read(&(*tempBuffer), header.nameLength); + header.name = tempBuffer; + if (header.name.length() > header.nameLength) { header.name = header.name.substr(0, header.nameLength); } + delete[] tempBuffer; + + /* Pad to 8 chars */ + unsigned long int spotInFile = in.tellg(); + unsigned long int spot = (spotInFile + 7)& ~7; + in.seekg(spot); + }else{ m->mothurOut("Error reading sff header info."); m->mothurOutEndLine(); } - return header; + return 0; } catch(exception& e) { m->errorOut(e, "SffInfoCommand", "readHeader"); @@ -329,61 +531,52 @@ Header* SffInfoCommand::readHeader(ifstream& in){ } } //********************************************************************************************************************** -seqRead* SffInfoCommand::readSeqData(ifstream& in, int numFlowReads, int numBases){ +int SffInfoCommand::readSeqData(ifstream& in, seqRead& read, int numFlowReads, int numBases){ try { - seqRead* read = new seqRead(); if (!in.eof()) { - - string tempBuf = ""; - char* buffer; - + //read flowgram - read->flowgram.resize(numFlowReads); + read.flowgram.resize(numFlowReads); for (int i = 0; i < numFlowReads; i++) { - buffer = new char((sizeof(unsigned short))); - in.read(buffer, (sizeof(unsigned short))); - read->flowgram[i] = be_int2(*(unsigned short *)(buffer)); - delete[] buffer; + char buffer [2]; + in.read(buffer, 2); + read.flowgram[i] = be_int2(*(unsigned short *)(&buffer)); } - - //read flowgram - read->flowIndex.resize(numBases); + + //read flowIndex + read.flowIndex.resize(numBases); for (int i = 0; i < numBases; i++) { - buffer = new char(1); - in.read(buffer, 1); - read->flowgram[i] = be_int1(*(unsigned int *)(buffer)); - delete[] buffer; + char temp[1]; + in.read(temp, 1); + read.flowIndex[i] = be_int1(*(unsigned char *)(&temp)); } - + //read bases - read->bases = new char(numBases); - in.read(read->bases, numBases); - tempBuf = buffer; - if (tempBuf.length() > numBases) { tempBuf = tempBuf.substr(0, numBases); strcpy(read->bases, tempBuf.c_str()); } - + char* tempBuffer = new char[numBases]; + in.read(&(*tempBuffer), numBases); + read.bases = tempBuffer; + if (read.bases.length() > numBases) { read.bases = read.bases.substr(0, numBases); } + delete[] tempBuffer; - //read flowgram - read->qualScores.resize(numBases); + //read qual scores + read.qualScores.resize(numBases); for (int i = 0; i < numBases; i++) { - buffer = new char(1); - in.read(buffer, 1); - read->qualScores[i] = be_int1(*(unsigned int *)(buffer)); - delete[] buffer; + char temp[1]; + in.read(temp, 1); + read.qualScores[i] = be_int1(*(unsigned char *)(&temp)); } - + /* Pad to 8 chars */ - int spotInFile = in.tellg(); - cout << spotInFile << endl; - int spot = floor((spotInFile + 7) /(float) 8) * 8; - cout << spot << endl; + unsigned long int spotInFile = in.tellg(); + unsigned long int spot = (spotInFile + 7)& ~7; in.seekg(spot); }else{ m->mothurOut("Error reading."); m->mothurOutEndLine(); } - return read; + return 0; } catch(exception& e) { m->errorOut(e, "SffInfoCommand", "readSeqData"); @@ -391,25 +584,20 @@ seqRead* SffInfoCommand::readSeqData(ifstream& in, int numFlowReads, int numBase } } //********************************************************************************************************************** -int SffInfoCommand::printCommonHeader(ofstream& out, CommonHeader* header, bool debug) { +int SffInfoCommand::printCommonHeader(ofstream& out, CommonHeader& header) { try { - string output = "Common Header:\nMagic Number: "; - output += toString(header->magicNumber) + '\n'; - output += "Version: " + toString(header->version) + '\n'; - output += "Index Offset: " + toString(header->indexOffset) + '\n'; - output += "Index Length: " + toString(header->indexLength) + '\n'; - output += "Number of Reads: " + toString(header->numReads) + '\n'; - output += "Header Length: " + toString(header->headerLength) + '\n'; - output += "Key Length: " + toString(header->keyLength) + '\n'; - output += "Number of Flows: " + toString(header->numFlowsPerRead) + '\n'; - output += "Format Code: " + toString(header->flogramFormatCode) + '\n'; - output += "Flow Chars: " + toString(header->flowChars) + '\n'; - output += "Key Sequence: " + toString(header->keySequence) + '\n'; - - out << output << endl; - - if (debug) { cout << output << endl; } + out << "Common Header:\nMagic Number: " << header.magicNumber << endl; + out << "Version: " << header.version << endl; + out << "Index Offset: " << header.indexOffset << endl; + out << "Index Length: " << header.indexLength << endl; + out << "Number of Reads: " << header.numReads << endl; + out << "Header Length: " << header.headerLength << endl; + out << "Key Length: " << header.keyLength << endl; + out << "Number of Flows: " << header.numFlowsPerRead << endl; + out << "Format Code: " << header.flogramFormatCode << endl; + out << "Flow Chars: " << header.flowChars << endl; + out << "Key Sequence: " << header.keySequence << endl << endl; return 0; } @@ -419,21 +607,26 @@ int SffInfoCommand::printCommonHeader(ofstream& out, CommonHeader* header, bool } } //********************************************************************************************************************** -int SffInfoCommand::printHeader(ofstream& out, Header* header, bool debug) { +int SffInfoCommand::printHeader(ofstream& out, Header& header) { try { - string name = header->name; - string output = ">" + name + "\nRead Header Length: " + toString(header->headerLength) + '\n'; - output += "Name Length: " + toString(header->nameLength) + '\n'; - output += "Number of Bases: " + toString(header->numBases) + '\n'; - output += "Clip Qual Left: " + toString(header->clipQualLeft) + '\n'; - output += "Clip Qual Right: " + toString(header->clipQualLeft) + '\n'; - output += "Clip Adap Left: " + toString(header->clipQualLeft) + '\n'; - output += "Clip Adap Right: " + toString(header->clipQualLeft) + '\n'; - - out << output << endl; - - if (debug) { cout << output << endl; } - + + out << ">" << header.name << endl; + out << "Run Prefix: " << endl; + out << "Region #: " << endl; + out << "XY Location: " << endl << endl; + + out << "Run Name: " << endl; + out << "Analysis Name: " << endl; + out << "Full Path: " << endl << endl; + + out << "Read Header Len: " << header.headerLength << endl; + out << "Name Length: " << header.nameLength << endl; + out << "# of Bases: " << header.numBases << endl; + out << "Clip Qual Left: " << header.clipQualLeft << endl; + out << "Clip Qual Right: " << header.clipQualRight << endl; + out << "Clip Adap Left: " << header.clipAdapterLeft << endl; + out << "Clip Adap Right: " << header.clipAdapterRight << endl << endl; + return 0; } catch(exception& e) { @@ -443,26 +636,142 @@ int SffInfoCommand::printHeader(ofstream& out, Header* header, bool debug) { } //********************************************************************************************************************** -int SffInfoCommand::printSeqData(ofstream& out, seqRead* read, bool debug) { +int SffInfoCommand::printSffTxtSeqData(ofstream& out, seqRead& read, Header& header) { + try { + + out << "Flowgram: "; + for (int i = 0; i < read.flowgram.size(); i++) { out << setprecision(2) << (read.flowgram[i]/(float)100) << '\t'; } + + out << endl << "Flow Indexes: "; + int sum = 0; + for (int i = 0; i < read.flowIndex.size(); i++) { sum += read.flowIndex[i]; out << sum << '\t'; } + + //make the bases you want to clip lowercase and the bases you want to keep upper case + if(header.clipQualRight == 0){ header.clipQualRight = read.bases.length(); } + for (int i = 0; i < (header.clipQualLeft-1); i++) { read.bases[i] = tolower(read.bases[i]); } + for (int i = (header.clipQualLeft-1); i < (header.clipQualRight-1); i++) { read.bases[i] = toupper(read.bases[i]); } + for (int i = (header.clipQualRight-1); i < read.bases.length(); i++) { read.bases[i] = tolower(read.bases[i]); } + + out << endl << "Bases: " << read.bases << endl << "Quality Scores: "; + for (int i = 0; i < read.qualScores.size(); i++) { out << read.qualScores[i] << '\t'; } + + + out << endl << endl; + + return 0; + } + catch(exception& e) { + m->errorOut(e, "SffInfoCommand", "printSffTxtSeqData"); + exit(1); + } +} +//********************************************************************************************************************** +int SffInfoCommand::printFastaSeqData(ofstream& out, seqRead& read, Header& header) { try { + + string seq = read.bases; + + if (trim) { + if(header.clipQualRight < header.clipQualLeft){ + seq = "NNNN"; + } + else if((header.clipQualRight != 0) && ((header.clipQualRight-header.clipQualLeft) >= 0)){ + seq = seq.substr((header.clipQualLeft-1), (header.clipQualRight-header.clipQualLeft)); + } + else { + seq = seq.substr(header.clipQualLeft-1); + } + }else{ + //if you wanted the sfftxt then you already converted the bases to the right case + if (!sfftxt) { + //make the bases you want to clip lowercase and the bases you want to keep upper case + if(header.clipQualRight == 0){ header.clipQualRight = seq.length(); } + for (int i = 0; i < (header.clipQualLeft-1); i++) { seq[i] = tolower(seq[i]); } + for (int i = (header.clipQualLeft-1); i < (header.clipQualRight-1); i++) { seq[i] = toupper(seq[i]); } + for (int i = (header.clipQualRight-1); i < seq.length(); i++) { seq[i] = tolower(seq[i]); } + } + } + + out << ">" << header.name << endl; + out << seq << endl; + + return 0; + } + catch(exception& e) { + m->errorOut(e, "SffInfoCommand", "printFastaSeqData"); + exit(1); + } +} - string output = "FlowGram: "; - for (int i = 0; i < read->flowgram.size(); i++) { output += toString(read->flowgram[i]) +'\t'; } - output += "\nFlow Indexes: "; - for (int i = 0; i < read->flowIndex.size(); i++) { output += toString(read->flowIndex[i]) +'\t'; } - string bases = read->bases; - output += "\nBases: " + bases + '\n'; - for (int i = 0; i < read->qualScores.size(); i++) { output += toString(read->qualScores[i]) +'\t'; } - output += '\n'; +//********************************************************************************************************************** +int SffInfoCommand::printQualSeqData(ofstream& out, seqRead& read, Header& header) { + try { + + if (trim) { + if(header.clipQualRight < header.clipQualLeft){ + out << "0\t0\t0\t0"; + } + else if((header.clipQualRight != 0) && ((header.clipQualRight-header.clipQualLeft) >= 0)){ + out << ">" << header.name << " length=" << (header.clipQualRight-header.clipQualLeft) << endl; + for (int i = (header.clipQualLeft-1); i < (header.clipQualRight-1); i++) { out << read.qualScores[i] << '\t'; } + } + else{ + out << ">" << header.name << " length=" << (header.clipQualRight-header.clipQualLeft) << endl; + for (int i = (header.clipQualLeft-1); i < read.qualScores.size(); i++) { out << read.qualScores[i] << '\t'; } + } + }else{ + out << ">" << header.name << " length=" << read.qualScores.size() << endl; + for (int i = 0; i < read.qualScores.size(); i++) { out << read.qualScores[i] << '\t'; } + } - out << output << endl; + out << endl; - if (debug) { cout << output << endl; } + return 0; + } + catch(exception& e) { + m->errorOut(e, "SffInfoCommand", "printQualSeqData"); + exit(1); + } +} + +//********************************************************************************************************************** +int SffInfoCommand::printFlowSeqData(ofstream& out, seqRead& read, Header& header) { + try { + + out << ">" << header.name << endl; + for (int i = 0; i < read.flowgram.size(); i++) { out << setprecision(2) << (read.flowgram[i]/(float)100) << '\t'; } + out << endl; + + return 0; + } + catch(exception& e) { + m->errorOut(e, "SffInfoCommand", "printFlowSeqData"); + exit(1); + } +} +//********************************************************************************************************************** +int SffInfoCommand::readAccnosFile(string filename) { + try { + //remove old names + seqNames.clear(); + + ifstream in; + m->openInputFile(filename, in); + string name; + + while(!in.eof()){ + in >> name; m->gobble(in); + + seqNames.insert(name); + + if (m->control_pressed) { seqNames.clear(); break; } + } + in.close(); return 0; } catch(exception& e) { - m->errorOut(e, "SffInfoCommand", "printSeqData"); + m->errorOut(e, "SffInfoCommand", "readAccnosFile"); exit(1); } }