5 * Created by westcott on 7/7/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "sffinfocommand.h"
11 #include "endiannessmacros.h"
13 //**********************************************************************************************************************
15 SffInfoCommand::SffInfoCommand(string option) {
19 //allow user to run help
20 if(option == "help") { help(); abort = true; }
23 //valid paramters for this command
24 string Array[] = {"sff","outputdir","inputdir", "outputdir"};
25 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
27 OptionParser parser(option);
28 map<string, string> parameters = parser.getParameters();
30 ValidParameters validParameter;
31 //check to make sure all parameters are valid for command
32 for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
33 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
36 //if the user changes the output directory command factory will send this info to us in the output parameter
37 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
39 //if the user changes the input directory command factory will send this info to us in the output parameter
40 string inputDir = validParameter.validFile(parameters, "inputdir", false); if (inputDir == "not found"){ inputDir = ""; }
42 sffFilename = validParameter.validFile(parameters, "sff", false);
43 if (sffFilename == "not found") { m->mothurOut("sff is a required parameter for the sffinfo command."); m->mothurOutEndLine(); abort = true; }
45 splitAtDash(sffFilename, filenames);
47 //go through files and make sure they are good, if not, then disregard them
48 for (int i = 0; i < filenames.size(); i++) {
50 string path = hasPath(filenames[i]);
51 //if the user has not given a path then, add inputdir. else leave path alone.
52 if (path == "") { filenames[i] = inputDir + filenames[i]; }
56 int ableToOpen = openInputFile(filenames[i], in);
59 if (ableToOpen == 1) {
60 m->mothurOut(filenames[i] + " will be disregarded."); m->mothurOutEndLine();
61 //erase from file list
62 filenames.erase(filenames.begin()+i);
67 //make sure there is at least one valid file left
68 if (filenames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; }
73 m->errorOut(e, "SffInfoCommand", "SffInfoCommand");
77 //**********************************************************************************************************************
79 void SffInfoCommand::help(){
81 m->mothurOut("The sffinfo command reads a sff file and outputs a .sff.txt file.\n");
83 m->mothurOut("Example sffinfo(sff=...).\n");
84 m->mothurOut("Note: No spaces between parameter labels (i.e. sff), '=' and parameters (i.e.yourSffFileName).\n\n");
87 m->errorOut(e, "SffInfoCommand", "help");
91 //**********************************************************************************************************************
93 SffInfoCommand::~SffInfoCommand(){}
95 //**********************************************************************************************************************
96 int SffInfoCommand::execute(){
99 if (abort == true) { return 0; }
101 for (int s = 0; s < filenames.size(); s++) {
103 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
105 m->mothurOut("Extracting info from " + filenames[s] + " ..." ); m->mothurOutEndLine();
107 if (outputDir == "") { outputDir += hasPath(filenames[s]); }
108 string outputFileName = outputDir + getRootName(getSimpleName(filenames[s])) + "sff.txt";
110 extractSffInfo(filenames[s], outputFileName);
112 outputNames.push_back(outputFileName);
114 m->mothurOut("Done."); m->mothurOutEndLine();
117 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
119 //report output filenames
120 m->mothurOutEndLine();
121 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
122 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
123 m->mothurOutEndLine();
127 catch(exception& e) {
128 m->errorOut(e, "SffInfoCommand", "execute");
132 //**********************************************************************************************************************
133 int SffInfoCommand::extractSffInfo(string input, string output){
136 openOutputFile(output, out);
139 in.open(input.c_str(), ios::binary);
141 CommonHeader* header = new CommonHeader();
142 readCommonHeader(in, header);
144 cout << strlen(header->flowChars) << endl;
145 //cout << "magic = " << header->magicNumber << endl << "version = " << header->version << endl << "index offset = " << header->indexOffset << endl << "index length = "<< header->indexLength << endl << "numreads = " << header->numReads << endl << "header length = " << header->headerLength << endl << "key length = " << header->keyLength << endl;
146 //cout << "numflowreads = "<< header->numFlowsPerRead << endl << "flow format code = "<< header->flogramFormatCode << endl << "flow chars = " << header->flowChars << endl << "key sequence = " << header->keySequence << endl << endl;
147 cout << in.tellg() << endl;
148 //read through the sff file
150 //print common header
151 printCommonHeader(out, header, true);
154 Header* readheader = new Header();
155 readHeader(in, readheader);
157 cout << in.tellg() << endl;
160 printHeader(out, readheader, true);
163 seqRead* read = new seqRead();
164 readSeqData(in, read, header->numFlowsPerRead, readheader->numBases);
166 cout << in.tellg() << endl;
169 printSeqData(out, read, true);
180 catch(exception& e) {
181 m->errorOut(e, "SffInfoCommand", "extractSffInfo");
185 //**********************************************************************************************************************
186 int SffInfoCommand::readCommonHeader(ifstream& in, CommonHeader* header){
192 char buffer[sizeof(header->magicNumber)];
193 in.read(buffer, sizeof(header->magicNumber));
194 header->magicNumber = be_int4(*(unsigned int *)(&buffer));
196 cout << "here " << header->magicNumber << '\t' << in.tellg() << endl;
198 header->version = new char(4);
199 in.read(header->version, 4);
200 string tempBuf0 = header->version; //this is in here because the read sometimes tacks on extra chars, not sure why?
201 if (tempBuf0.length() > 4) { tempBuf0 = tempBuf0.substr(0, 4); strcpy(header->version, tempBuf0.c_str()); }
202 //memcpy(header->version, buffer+4, 4);
203 cout << "here " << header->version << '\t' << in.tellg() << endl;
205 char buffer2 [sizeof(header->indexOffset)];
206 in.read(buffer2, sizeof(header->indexOffset));
207 header->indexOffset = be_int8(*(unsigned long int *)(&buffer2));
209 cout << "here " << header->indexOffset << '\t' << in.tellg() << endl;
211 char buffer3 [sizeof(header->indexLength)];
212 in.read(buffer3, sizeof(header->indexLength));
213 header->indexLength = be_int4(*(unsigned int *)(&buffer3));
215 cout << "here " << header->indexLength << '\t' << in.tellg() << endl;
217 char buffer4 [sizeof(header->numReads)];
218 in.read(buffer4, sizeof(header->numReads));
219 header->numReads = be_int4(*(unsigned int *)(&buffer4));
221 cout << "here " << header->numReads << '\t' << in.tellg() << endl;
223 char buffer5 [sizeof(header->headerLength)];
224 in.read(buffer5, sizeof(header->headerLength));
225 header->headerLength = be_int2(*(unsigned short *)(&buffer5));
227 cout << "here " << header->headerLength << '\t' << in.tellg() << endl;
229 char buffer6 [sizeof(header->keyLength)];
230 in.read(buffer6, sizeof(header->keyLength));
231 header->keyLength = be_int2(*(unsigned short *)(&buffer6));
234 cout << "here " << header->keyLength << '\t' << in.tellg() << endl;
235 //read number of flow reads
236 char buffer7 [sizeof(header->numFlowsPerRead)];
237 in.read(buffer7, sizeof(header->numFlowsPerRead));
238 header->numFlowsPerRead = be_int2(*(unsigned short *)(&buffer7));
240 cout << "here " << header->numFlowsPerRead << '\t' << in.tellg() << endl;
242 //buffer = new char(sizeof(header->flogramFormatCode));
243 in.read(&header->flogramFormatCode, 1);
244 header->flogramFormatCode = be_int1(*(char *)(&header->flogramFormatCode));
246 cout << "here " << header->flogramFormatCode << '\t' << in.tellg() << endl;
249 //header->numFlowsPerRead = 800;
250 header->flowChars = new char(header->numFlowsPerRead);
251 //buffer = new char(header->numFlowsPerRead);
252 cout << "here" << endl;
253 in.read(header->flowChars, header->numFlowsPerRead);
254 //string tempBuf1 = header->flowChars + '\0'; //this is in here because the read sometimes tacks on extra chars, not sure why?
255 //if (tempBuf0.length() > header->numFlowsPerRead) { tempBuf1 = tempBuf1.substr(0, header->numFlowsPerRead); strcpy(header->flowChars, tempBuf1.c_str()); }
256 //in.read(buffer, header->numFlowsPerRead);
257 //memcpy(header->flowChars, buffer, header->numFlowsPerRead);
259 //cout << "here" << endl;
260 //string tempBuf1 = header->flowChars;
261 //cout << "here " << in.tellg() << endl;
262 //if (tempBuf1.length() > header->numFlowsPerRead) { tempBuf1 = tempBuf1.substr(0, header->numFlowsPerRead); strcpy(header->flowChars, tempBuf1.c_str()); }
264 cout << "here " << header->flowChars << '\t' << in.tellg() << endl;
266 //header->keyLength = 4;
267 //char* myAlloc2 = new char(4); cout << "alloced" << endl;
268 header->keySequence = new char(header->keyLength);
269 //char* myAlloc = new char(4);
270 cout << "here " << endl;
271 in.read(header->keySequence, header->keyLength);
272 string tempBuf2 = header->keySequence;
273 if (tempBuf2.length() > header->keyLength) { tempBuf2 = tempBuf2.substr(0, header->keyLength); strcpy(header->keySequence, tempBuf2.c_str()); }
274 cout << "here " << header->keySequence << '\t' << in.tellg() << endl;
277 int spotInFile = in.tellg();
278 //cout << spotInFile << endl;
279 int spot = floor((spotInFile + 7) /(float) 8) * 8;
280 //cout << spot << endl;
286 m->mothurOut("Error reading sff common header."); m->mothurOutEndLine();
291 catch(exception& e) {
292 m->errorOut(e, "SffInfoCommand", "readCommonHeader");
296 //**********************************************************************************************************************
297 int SffInfoCommand::readHeader(ifstream& in, Header* header){
304 char* buffer = new char(sizeof(header->headerLength));
305 in.read(buffer, sizeof(header->headerLength));
306 header->headerLength = be_int2(*(unsigned short *)(buffer));
310 buffer = new char(sizeof(header->nameLength));
311 in.read(buffer, sizeof(header->nameLength));
312 header->nameLength = be_int2(*(unsigned short *)(buffer));
316 buffer = new char(sizeof(header->numBases));
317 in.read(buffer, sizeof(header->numBases));
318 header->numBases = be_int4(*(unsigned int *)(buffer));
321 //read clip qual left
322 buffer = new char(sizeof(header->clipQualLeft));
323 in.read(buffer, sizeof(header->clipQualLeft));
324 header->clipQualLeft = be_int2(*(unsigned short *)(buffer));
327 //read clip qual right
328 buffer = new char(sizeof(header->clipQualRight));
329 in.read(buffer, sizeof(header->clipQualRight));
330 header->clipQualRight = be_int2(*(unsigned short *)(buffer));
333 //read clipAdapterLeft
334 buffer = new char(sizeof(header->clipAdapterLeft));
335 in.read(buffer, sizeof(header->clipAdapterLeft));
336 header->clipAdapterLeft = be_int2(*(unsigned short *)(buffer));
339 //read clipAdapterRight
340 buffer = new char(sizeof(header->clipAdapterRight));
341 in.read(buffer, sizeof(header->clipAdapterRight));
342 header->clipAdapterRight = be_int2(*(unsigned short *)(buffer));
346 header->name = new char(header->nameLength);
347 //buffer = new char(header->nameLength);
348 in.read(header->name, header->nameLength);
349 //memcpy(header->name, buffer, header->nameLength);
353 m->mothurOut("Error reading sff header info."); m->mothurOutEndLine();
358 catch(exception& e) {
359 m->errorOut(e, "SffInfoCommand", "readHeader");
363 //**********************************************************************************************************************
364 int SffInfoCommand::readSeqData(ifstream& in, seqRead* read, int numFlowReads, int numBases){
373 read->flowgram.resize(numFlowReads);
374 for (int i = 0; i < numFlowReads; i++) {
375 buffer = new char((sizeof(unsigned short)));
376 in.read(buffer, (sizeof(unsigned short)));
377 read->flowgram[i] = be_int2(*(unsigned short *)(buffer));
382 read->flowIndex.resize(numBases);
383 for (int i = 0; i < numBases; i++) {
384 //buffer = new char(1);
387 read->flowgram[i] = be_int1(*(unsigned int *)(&temp));
392 read->bases = new char(numBases);
393 in.read(read->bases, numBases);
395 if (tempBuf.length() > numBases) { tempBuf = tempBuf.substr(0, numBases); strcpy(read->bases, tempBuf.c_str()); }
398 read->qualScores.resize(numBases);
399 for (int i = 0; i < numBases; i++) {
402 read->qualScores[i] = be_int1(*(unsigned int *)(&temp));
406 int spotInFile = in.tellg();
407 cout << spotInFile << endl;
408 int spot = floor((spotInFile + 7) /(float) 8) * 8;
409 cout << spot << endl;
413 m->mothurOut("Error reading."); m->mothurOutEndLine();
418 catch(exception& e) {
419 m->errorOut(e, "SffInfoCommand", "readSeqData");
423 //**********************************************************************************************************************
424 int SffInfoCommand::printCommonHeader(ofstream& out, CommonHeader* header, bool debug) {
427 string output = "Common Header:\nMagic Number: ";
428 output += toString(header->magicNumber) + '\n';
429 output += "Version: " + toString(header->version) + '\n';
430 output += "Index Offset: " + toString(header->indexOffset) + '\n';
431 output += "Index Length: " + toString(header->indexLength) + '\n';
432 output += "Number of Reads: " + toString(header->numReads) + '\n';
433 output += "Header Length: " + toString(header->headerLength) + '\n';
434 output += "Key Length: " + toString(header->keyLength) + '\n';
435 output += "Number of Flows: " + toString(header->numFlowsPerRead) + '\n';
436 output += "Format Code: " + toString(header->flogramFormatCode) + '\n';
437 output += "Flow Chars: " + toString(header->flowChars) + '\n';
438 output += "Key Sequence: " + toString(header->keySequence) + '\n';
440 out << output << endl;
442 if (debug) { cout << output << endl; }
446 catch(exception& e) {
447 m->errorOut(e, "SffInfoCommand", "printCommonHeader");
451 //**********************************************************************************************************************
452 int SffInfoCommand::printHeader(ofstream& out, Header* header, bool debug) {
454 string name = header->name;
455 string output = ">" + name + "\nRead Header Length: " + toString(header->headerLength) + '\n';
456 output += "Name Length: " + toString(header->nameLength) + '\n';
457 output += "Number of Bases: " + toString(header->numBases) + '\n';
458 output += "Clip Qual Left: " + toString(header->clipQualLeft) + '\n';
459 output += "Clip Qual Right: " + toString(header->clipQualLeft) + '\n';
460 output += "Clip Adap Left: " + toString(header->clipQualLeft) + '\n';
461 output += "Clip Adap Right: " + toString(header->clipQualLeft) + '\n';
463 out << output << endl;
465 if (debug) { cout << output << endl; }
469 catch(exception& e) {
470 m->errorOut(e, "SffInfoCommand", "printHeader");
475 //**********************************************************************************************************************
476 int SffInfoCommand::printSeqData(ofstream& out, seqRead* read, bool debug) {
479 string output = "FlowGram: ";
480 for (int i = 0; i < read->flowgram.size(); i++) { output += toString(read->flowgram[i]) +'\t'; }
481 output += "\nFlow Indexes: ";
482 for (int i = 0; i < read->flowIndex.size(); i++) { output += toString(read->flowIndex[i]) +'\t'; }
483 string bases = read->bases;
484 output += "\nBases: " + bases + '\n';
485 for (int i = 0; i < read->qualScores.size(); i++) { output += toString(read->qualScores[i]) +'\t'; }
488 out << output << endl;
490 if (debug) { cout << output << endl; }
494 catch(exception& e) {
495 m->errorOut(e, "SffInfoCommand", "printSeqData");
499 //**********************************************************************************************************************/