5 * Created by westcott on 7/7/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "sffinfocommand.h"
11 #include "endiannessmacros.h"
13 //**********************************************************************************************************************
15 SffInfoCommand::SffInfoCommand(string option) {
19 //allow user to run help
20 if(option == "help") { help(); abort = true; }
23 //valid paramters for this command
24 string Array[] = {"sff","outputdir","inputdir", "outputdir"};
25 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
27 OptionParser parser(option);
28 map<string, string> parameters = parser.getParameters();
30 ValidParameters validParameter;
31 //check to make sure all parameters are valid for command
32 for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
33 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
36 //if the user changes the output directory command factory will send this info to us in the output parameter
37 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
39 //if the user changes the input directory command factory will send this info to us in the output parameter
40 string inputDir = validParameter.validFile(parameters, "inputdir", false); if (inputDir == "not found"){ inputDir = ""; }
42 sffFilename = validParameter.validFile(parameters, "sff", false);
43 if (sffFilename == "not found") { m->mothurOut("sff is a required parameter for the sffinfo command."); m->mothurOutEndLine(); abort = true; }
45 splitAtDash(sffFilename, filenames);
47 //go through files and make sure they are good, if not, then disregard them
48 for (int i = 0; i < filenames.size(); i++) {
50 string path = hasPath(filenames[i]);
51 //if the user has not given a path then, add inputdir. else leave path alone.
52 if (path == "") { filenames[i] = inputDir + filenames[i]; }
56 int ableToOpen = openInputFile(filenames[i], in);
59 if (ableToOpen == 1) {
60 m->mothurOut(filenames[i] + " will be disregarded."); m->mothurOutEndLine();
61 //erase from file list
62 filenames.erase(filenames.begin()+i);
67 //make sure there is at least one valid file left
68 if (filenames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; }
73 m->errorOut(e, "SffInfoCommand", "SffInfoCommand");
77 //**********************************************************************************************************************
79 void SffInfoCommand::help(){
81 m->mothurOut("The sffinfo command reads a sff file and outputs a .sff.txt file.\n");
83 m->mothurOut("Example sffinfo(sff=...).\n");
84 m->mothurOut("Note: No spaces between parameter labels (i.e. sff), '=' and parameters (i.e.yourSffFileName).\n\n");
87 m->errorOut(e, "SffInfoCommand", "help");
91 //**********************************************************************************************************************
93 SffInfoCommand::~SffInfoCommand(){}
95 //**********************************************************************************************************************
96 int SffInfoCommand::execute(){
99 if (abort == true) { return 0; }
101 for (int s = 0; s < filenames.size(); s++) {
103 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
105 m->mothurOut("Extracting info from " + filenames[s] + " ..." ); m->mothurOutEndLine();
107 if (outputDir == "") { outputDir += hasPath(filenames[s]); }
108 string outputFileName = outputDir + getRootName(getSimpleName(filenames[s])) + "sff.txt";
110 extractSffInfo(filenames[s], outputFileName);
112 outputNames.push_back(outputFileName);
114 m->mothurOut("Done."); m->mothurOutEndLine();
117 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
119 //report output filenames
120 m->mothurOutEndLine();
121 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
122 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
123 m->mothurOutEndLine();
127 catch(exception& e) {
128 m->errorOut(e, "SffInfoCommand", "execute");
132 //**********************************************************************************************************************
133 int SffInfoCommand::extractSffInfo(string input, string output){
136 openOutputFile(output, out);
139 in.open(input.c_str(), ios::binary);
141 CommonHeader* header = readCommonHeader(in);
143 //cout << "magic = " << header->magicNumber << endl << "version = " << header->version << endl << "index offset = " << header->indexOffset << endl << "index length = "<< header->indexLength << endl << "numreads = " << header->numReads << endl << "header length = " << header->headerLength << endl << "key length = " << header->keyLength << endl;
144 //cout << "numflowreads = "<< header->numFlowsPerRead << endl << "flow format code = "<< header->flogramFormatCode << endl << "flow chars = " << header->flowChars << endl << "key sequence = " << header->keySequence << endl << endl;
145 cout << in.tellg() << endl;
146 //read through the sff file
150 Header* readheader = readHeader(in);
153 seqRead* read = readSeqData(in, header->numFlowsPerRead, readheader->numBases);
155 cout << in.tellg() << endl;
157 //print common header
158 printCommonHeader(out, header, true);
161 printHeader(out, readheader, true);
164 printSeqData(out, read, true);
174 catch(exception& e) {
175 m->errorOut(e, "SffInfoCommand", "extractSffInfo");
179 //**********************************************************************************************************************
180 CommonHeader* SffInfoCommand::readCommonHeader(ifstream& in){
182 CommonHeader* header = new CommonHeader();
188 char* buffer = new char(sizeof(header->magicNumber));
189 in.read(buffer, sizeof(header->magicNumber));
190 header->magicNumber = be_int4(*(uint32_t *)(buffer));
194 header->version = new char(4);
195 in.read(header->version, 4);
197 if (tempBuf.length() > 4) { tempBuf = tempBuf.substr(0, 4); strcpy(header->version, tempBuf.c_str()); }
200 buffer = new char(sizeof(header->indexOffset));
201 in.read(buffer, sizeof(header->indexOffset));
202 header->indexOffset = be_int8(*(uint64_t *)(buffer));
206 buffer = new char(sizeof(header->indexLength));
207 in.read(buffer, sizeof(header->indexLength));
208 header->indexLength = be_int4(*(uint32_t *)(buffer));
212 buffer = new char(sizeof(header->numReads));
213 in.read(buffer, sizeof(header->numReads));
214 header->numReads = be_int4(*(uint32_t *)(buffer));
218 buffer = new char(sizeof(header->headerLength));
219 in.read(buffer, sizeof(header->headerLength));
220 header->headerLength = be_int2(*(uint16_t *)(buffer));
224 buffer = new char(sizeof(header->keyLength));
225 in.read(buffer, sizeof(header->keyLength));
226 header->keyLength = be_int2(*(uint16_t *)(buffer));
229 //read number of flow reads
230 buffer = new char(sizeof(header->numFlowsPerRead));
231 in.read(buffer, sizeof(header->numFlowsPerRead));
232 header->numFlowsPerRead = be_int2(*(uint16_t *)(buffer));
236 buffer = new char(sizeof(header->flogramFormatCode));
237 in.read(buffer, sizeof(header->flogramFormatCode));
238 header->flogramFormatCode = be_int1(*(uint8_t *)(buffer));
242 header->flowChars = new char(header->numFlowsPerRead);
243 in.read(header->flowChars, header->numFlowsPerRead);
245 if (tempBuf.length() > header->numFlowsPerRead) { tempBuf = tempBuf.substr(0, header->numFlowsPerRead); strcpy(header->flowChars, tempBuf.c_str()); }
248 header->keySequence = new char(header->keyLength);
249 in.read(header->keySequence, header->keyLength);
250 tempBuf = header->keySequence;
251 if (tempBuf.length() > header->keyLength) { tempBuf = tempBuf.substr(0, header->keyLength); strcpy(header->keySequence, tempBuf.c_str()); }
254 m->mothurOut("Error reading sff common header."); m->mothurOutEndLine();
259 catch(exception& e) {
260 m->errorOut(e, "SffInfoCommand", "readCommonHeader");
264 //**********************************************************************************************************************
265 Header* SffInfoCommand::readHeader(ifstream& in){
267 Header* header = new Header();
273 char* buffer = new char(sizeof(header->headerLength));
274 in.read(buffer, sizeof(header->headerLength));
275 header->headerLength = be_int2(*(unsigned short *)(buffer));
279 buffer = new char(sizeof(header->nameLength));
280 in.read(buffer, sizeof(header->nameLength));
281 header->nameLength = be_int2(*(unsigned short *)(buffer));
285 buffer = new char(sizeof(header->numBases));
286 in.read(buffer, sizeof(header->numBases));
287 header->numBases = be_int4(*(unsigned int *)(buffer));
290 //read clip qual left
291 buffer = new char(sizeof(header->clipQualLeft));
292 in.read(buffer, sizeof(header->clipQualLeft));
293 header->clipQualLeft = be_int2(*(unsigned short *)(buffer));
296 //read clip qual right
297 buffer = new char(sizeof(header->clipQualRight));
298 in.read(buffer, sizeof(header->clipQualRight));
299 header->clipQualRight = be_int2(*(unsigned short *)(buffer));
302 //read clipAdapterLeft
303 buffer = new char(sizeof(header->clipAdapterLeft));
304 in.read(buffer, sizeof(header->clipAdapterLeft));
305 header->clipAdapterLeft = be_int2(*(unsigned short *)(buffer));
308 //read clipAdapterRight
309 buffer = new char(sizeof(header->clipAdapterRight));
310 in.read(buffer, sizeof(header->clipAdapterRight));
311 header->clipAdapterRight = be_int2(*(unsigned short *)(buffer));
315 header->name = new char(header->nameLength);
316 in.read(header->name, header->nameLength);
317 tempBuf = header->name;
318 if (tempBuf.length() > header->nameLength) { tempBuf = tempBuf.substr(0, header->nameLength); strcpy(header->name, tempBuf.c_str()); }
321 m->mothurOut("Error reading sff header info."); m->mothurOutEndLine();
326 catch(exception& e) {
327 m->errorOut(e, "SffInfoCommand", "readHeader");
331 //**********************************************************************************************************************
332 seqRead* SffInfoCommand::readSeqData(ifstream& in, int numFlowReads, int numBases){
334 seqRead* read = new seqRead();
342 read->flowgram.resize(numFlowReads);
343 for (int i = 0; i < numFlowReads; i++) {
344 buffer = new char((sizeof(unsigned short)));
345 in.read(buffer, (sizeof(unsigned short)));
346 read->flowgram[i] = be_int2(*(unsigned short *)(buffer));
351 read->flowIndex.resize(numBases);
352 for (int i = 0; i < numBases; i++) {
353 buffer = new char(1);
355 read->flowgram[i] = be_int1(*(unsigned int *)(buffer));
360 read->bases = new char(numBases);
361 in.read(read->bases, numBases);
363 if (tempBuf.length() > numBases) { tempBuf = tempBuf.substr(0, numBases); strcpy(read->bases, tempBuf.c_str()); }
367 read->qualScores.resize(numBases);
368 for (int i = 0; i < numBases; i++) {
369 buffer = new char(1);
371 read->qualScores[i] = be_int1(*(unsigned int *)(buffer));
376 int spotInFile = in.tellg();
377 cout << spotInFile << endl;
378 int spot = floor((spotInFile + 7) /(float) 8) * 8;
379 cout << spot << endl;
383 m->mothurOut("Error reading."); m->mothurOutEndLine();
388 catch(exception& e) {
389 m->errorOut(e, "SffInfoCommand", "readSeqData");
393 //**********************************************************************************************************************
394 int SffInfoCommand::printCommonHeader(ofstream& out, CommonHeader* header, bool debug) {
397 string output = "Common Header:\nMagic Number: ";
398 output += toString(header->magicNumber) + '\n';
399 output += "Version: " + toString(header->version) + '\n';
400 output += "Index Offset: " + toString(header->indexOffset) + '\n';
401 output += "Index Length: " + toString(header->indexLength) + '\n';
402 output += "Number of Reads: " + toString(header->numReads) + '\n';
403 output += "Header Length: " + toString(header->headerLength) + '\n';
404 output += "Key Length: " + toString(header->keyLength) + '\n';
405 output += "Number of Flows: " + toString(header->numFlowsPerRead) + '\n';
406 output += "Format Code: " + toString(header->flogramFormatCode) + '\n';
407 output += "Flow Chars: " + toString(header->flowChars) + '\n';
408 output += "Key Sequence: " + toString(header->keySequence) + '\n';
410 out << output << endl;
412 if (debug) { cout << output << endl; }
416 catch(exception& e) {
417 m->errorOut(e, "SffInfoCommand", "printCommonHeader");
421 //**********************************************************************************************************************
422 int SffInfoCommand::printHeader(ofstream& out, Header* header, bool debug) {
424 string name = header->name;
425 string output = ">" + name + "\nRead Header Length: " + toString(header->headerLength) + '\n';
426 output += "Name Length: " + toString(header->nameLength) + '\n';
427 output += "Number of Bases: " + toString(header->numBases) + '\n';
428 output += "Clip Qual Left: " + toString(header->clipQualLeft) + '\n';
429 output += "Clip Qual Right: " + toString(header->clipQualLeft) + '\n';
430 output += "Clip Adap Left: " + toString(header->clipQualLeft) + '\n';
431 output += "Clip Adap Right: " + toString(header->clipQualLeft) + '\n';
433 out << output << endl;
435 if (debug) { cout << output << endl; }
439 catch(exception& e) {
440 m->errorOut(e, "SffInfoCommand", "printHeader");
445 //**********************************************************************************************************************
446 int SffInfoCommand::printSeqData(ofstream& out, seqRead* read, bool debug) {
449 string output = "FlowGram: ";
450 for (int i = 0; i < read->flowgram.size(); i++) { output += toString(read->flowgram[i]) +'\t'; }
451 output += "\nFlow Indexes: ";
452 for (int i = 0; i < read->flowIndex.size(); i++) { output += toString(read->flowIndex[i]) +'\t'; }
453 string bases = read->bases;
454 output += "\nBases: " + bases + '\n';
455 for (int i = 0; i < read->qualScores.size(); i++) { output += toString(read->qualScores[i]) +'\t'; }
458 out << output << endl;
460 if (debug) { cout << output << endl; }
464 catch(exception& e) {
465 m->errorOut(e, "SffInfoCommand", "printSeqData");
469 //**********************************************************************************************************************/