5 * Created by westcott on 7/7/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "sffinfocommand.h"
11 #include "endiannessmacros.h"
13 //**********************************************************************************************************************
15 SffInfoCommand::SffInfoCommand(string option) {
19 //allow user to run help
20 if(option == "help") { help(); abort = true; }
23 //valid paramters for this command
24 string Array[] = {"sff","outputdir","inputdir", "outputdir"};
25 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
27 OptionParser parser(option);
28 map<string, string> parameters = parser.getParameters();
30 ValidParameters validParameter;
31 //check to make sure all parameters are valid for command
32 for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
33 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
36 //if the user changes the output directory command factory will send this info to us in the output parameter
37 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
39 //if the user changes the input directory command factory will send this info to us in the output parameter
40 string inputDir = validParameter.validFile(parameters, "inputdir", false); if (inputDir == "not found"){ inputDir = ""; }
42 sffFilename = validParameter.validFile(parameters, "sff", false);
43 if (sffFilename == "not found") { m->mothurOut("sff is a required parameter for the sffinfo command."); m->mothurOutEndLine(); abort = true; }
45 splitAtDash(sffFilename, filenames);
47 //go through files and make sure they are good, if not, then disregard them
48 for (int i = 0; i < filenames.size(); i++) {
50 string path = hasPath(filenames[i]);
51 //if the user has not given a path then, add inputdir. else leave path alone.
52 if (path == "") { filenames[i] = inputDir + filenames[i]; }
56 int ableToOpen = openInputFile(filenames[i], in);
59 if (ableToOpen == 1) {
60 m->mothurOut(filenames[i] + " will be disregarded."); m->mothurOutEndLine();
61 //erase from file list
62 filenames.erase(filenames.begin()+i);
67 //make sure there is at least one valid file left
68 if (filenames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; }
73 m->errorOut(e, "SffInfoCommand", "SffInfoCommand");
77 //**********************************************************************************************************************
79 void SffInfoCommand::help(){
81 m->mothurOut("The sffinfo command reads a sff file and outputs a .sff.txt file.\n");
83 m->mothurOut("Example sffinfo(sff=...).\n");
84 m->mothurOut("Note: No spaces between parameter labels (i.e. sff), '=' and parameters (i.e.yourSffFileName).\n\n");
87 m->errorOut(e, "SffInfoCommand", "help");
91 //**********************************************************************************************************************
93 SffInfoCommand::~SffInfoCommand(){}
95 //**********************************************************************************************************************
96 int SffInfoCommand::execute(){
99 if (abort == true) { return 0; }
101 for (int s = 0; s < filenames.size(); s++) {
103 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
105 m->mothurOut("Extracting info from " + filenames[s] + " ..." ); m->mothurOutEndLine();
107 if (outputDir == "") { outputDir += hasPath(filenames[s]); }
108 string outputFileName = outputDir + getRootName(getSimpleName(filenames[s])) + "sff.txt";
110 extractSffInfo(filenames[s], outputFileName);
112 outputNames.push_back(outputFileName);
114 m->mothurOut("Done."); m->mothurOutEndLine();
117 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
119 //report output filenames
120 m->mothurOutEndLine();
121 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
122 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
123 m->mothurOutEndLine();
127 catch(exception& e) {
128 m->errorOut(e, "SffInfoCommand", "execute");
132 //**********************************************************************************************************************
133 int SffInfoCommand::extractSffInfo(string input, string output){
136 openOutputFile(output, out);
139 in.open(input.c_str(), ios::binary);
141 CommonHeader* header = new CommonHeader();
142 readCommonHeader(in, header);
144 cout << strlen(header->flowChars) << endl;
145 //cout << "magic = " << header->magicNumber << endl << "version = " << header->version << endl << "index offset = " << header->indexOffset << endl << "index length = "<< header->indexLength << endl << "numreads = " << header->numReads << endl << "header length = " << header->headerLength << endl << "key length = " << header->keyLength << endl;
146 //cout << "numflowreads = "<< header->numFlowsPerRead << endl << "flow format code = "<< header->flogramFormatCode << endl << "flow chars = " << header->flowChars << endl << "key sequence = " << header->keySequence << endl << endl;
147 cout << in.tellg() << endl;
148 //read through the sff file
150 //print common header
151 printCommonHeader(out, header, true);
154 Header* readheader = new Header();
155 readHeader(in, readheader);
157 cout << in.tellg() << endl;
160 printHeader(out, readheader, true);
163 seqRead* read = new seqRead();
164 readSeqData(in, read, header->numFlowsPerRead, readheader->numBases);
166 cout << in.tellg() << endl;
169 printSeqData(out, read, true);
179 catch(exception& e) {
180 m->errorOut(e, "SffInfoCommand", "extractSffInfo");
184 //**********************************************************************************************************************
185 int SffInfoCommand::readCommonHeader(ifstream& in, CommonHeader*& header){
191 char* buffer = new char(sizeof(header->magicNumber));
192 in.read(buffer, sizeof(header->magicNumber));
193 header->magicNumber = be_int4(*(unsigned int *)(buffer));
195 //cout << "here " << header->magicNumber << '\t' << in.tellg() << endl;
197 header->version = new char(4);
198 in.read(header->version, 4);
199 string tempBuf0 = header->version;
200 if (tempBuf0.length() > 4) { tempBuf0 = tempBuf0.substr(0, 4); strcpy(header->version, tempBuf0.c_str()); }
201 //memcpy(header->version, buffer+4, 4);
202 //cout << "here " << header->version << '\t' << in.tellg() << endl;
204 buffer = new char(sizeof(header->indexOffset));
205 in.read(buffer, sizeof(header->indexOffset));
206 header->indexOffset = be_int8(*(unsigned long int *)(buffer));
208 //cout << "here " << header->indexOffset << '\t' << in.tellg() << endl;
210 buffer = new char(sizeof(header->indexLength));
211 in.read(buffer, sizeof(header->indexLength));
212 header->indexLength = be_int4(*(unsigned int *)(buffer));
214 //cout << "here " << header->indexLength << '\t' << in.tellg() << endl;
216 buffer = new char(sizeof(header->numReads));
217 in.read(buffer, sizeof(header->numReads));
218 header->numReads = be_int4(*(unsigned int *)(buffer));
220 //cout << "here " << header->numReads << '\t' << in.tellg() << endl;
222 buffer = new char(sizeof(header->headerLength));
223 in.read(buffer, sizeof(header->headerLength));
224 header->headerLength = be_int2(*(unsigned short *)(buffer));
226 //cout << "here " << header->headerLength << '\t' << in.tellg() << endl;
228 buffer = new char(sizeof(header->keyLength));
229 in.read(buffer, sizeof(header->keyLength));
230 header->keyLength = be_int2(*(unsigned short *)(buffer));
233 //cout << "here " << header->keyLength << '\t' << in.tellg() << endl;
234 //read number of flow reads
235 buffer = new char(sizeof(header->numFlowsPerRead));
236 in.read(buffer, sizeof(header->numFlowsPerRead));
237 header->numFlowsPerRead = be_int2(*(unsigned short *)(buffer));
239 //cout << "here " << header->numFlowsPerRead << '\t' << in.tellg() << endl;
241 buffer = new char(sizeof(header->flogramFormatCode));
242 in.read(buffer, sizeof(header->flogramFormatCode));
243 header->flogramFormatCode = be_int1(*(char *)(buffer));
245 //cout << "here " << header->flogramFormatCode << '\t' << in.tellg() << endl;
248 //header->numFlowsPerRead = 800;
249 header->flowChars = new char(header->numFlowsPerRead);
250 buffer = new char(header->numFlowsPerRead);
251 //cout << "here" << endl;
252 //in.read(header->flowChars, header->numFlowsPerRead);
253 in.read(buffer, header->numFlowsPerRead);
254 memcpy(header->flowChars, buffer, header->numFlowsPerRead);
256 //cout << "here" << endl;
257 //string tempBuf1 = header->flowChars;
258 //cout << "here " << in.tellg() << endl;
259 //if (tempBuf1.length() > header->numFlowsPerRead) { tempBuf1 = tempBuf1.substr(0, header->numFlowsPerRead); strcpy(header->flowChars, tempBuf1.c_str()); }
261 // cout << "here " << header->flowChars << '\t' << in.tellg() << endl;
263 //header->keyLength = 4;
264 //char* myAlloc2 = new char(4); cout << "alloced" << endl;
265 header->keySequence = new char(header->keyLength);
266 //char* myAlloc = new char(4);
267 // cout << "here " << endl;
268 in.read(header->keySequence, header->keyLength);
269 string tempBuf2 = header->keySequence;
270 if (tempBuf2.length() > header->keyLength) { tempBuf2 = tempBuf2.substr(0, header->keyLength); strcpy(header->keySequence, tempBuf2.c_str()); }
271 //cout << "here " << header->keySequence << '\t' << in.tellg() << endl;
274 int spotInFile = in.tellg();
275 //cout << spotInFile << endl;
276 int spot = floor((spotInFile + 7) /(float) 8) * 8;
277 //cout << spot << endl;
283 m->mothurOut("Error reading sff common header."); m->mothurOutEndLine();
288 catch(exception& e) {
289 m->errorOut(e, "SffInfoCommand", "readCommonHeader");
293 //**********************************************************************************************************************
294 int SffInfoCommand::readHeader(ifstream& in, Header*& header){
301 char* buffer = new char(sizeof(header->headerLength));
302 in.read(buffer, sizeof(header->headerLength));
303 header->headerLength = be_int2(*(unsigned short *)(buffer));
307 buffer = new char(sizeof(header->nameLength));
308 in.read(buffer, sizeof(header->nameLength));
309 header->nameLength = be_int2(*(unsigned short *)(buffer));
313 buffer = new char(sizeof(header->numBases));
314 in.read(buffer, sizeof(header->numBases));
315 header->numBases = be_int4(*(unsigned int *)(buffer));
318 //read clip qual left
319 buffer = new char(sizeof(header->clipQualLeft));
320 in.read(buffer, sizeof(header->clipQualLeft));
321 header->clipQualLeft = be_int2(*(unsigned short *)(buffer));
324 //read clip qual right
325 buffer = new char(sizeof(header->clipQualRight));
326 in.read(buffer, sizeof(header->clipQualRight));
327 header->clipQualRight = be_int2(*(unsigned short *)(buffer));
330 //read clipAdapterLeft
331 buffer = new char(sizeof(header->clipAdapterLeft));
332 in.read(buffer, sizeof(header->clipAdapterLeft));
333 header->clipAdapterLeft = be_int2(*(unsigned short *)(buffer));
336 //read clipAdapterRight
337 buffer = new char(sizeof(header->clipAdapterRight));
338 in.read(buffer, sizeof(header->clipAdapterRight));
339 header->clipAdapterRight = be_int2(*(unsigned short *)(buffer));
343 header->name = new char(header->nameLength);
344 //buffer = new char(header->nameLength);
345 in.read(header->name, header->nameLength);
346 //memcpy(header->name, buffer, header->nameLength);
350 m->mothurOut("Error reading sff header info."); m->mothurOutEndLine();
355 catch(exception& e) {
356 m->errorOut(e, "SffInfoCommand", "readHeader");
360 //**********************************************************************************************************************
361 int SffInfoCommand::readSeqData(ifstream& in, seqRead*& read, int numFlowReads, int numBases){
370 read->flowgram.resize(numFlowReads);
371 for (int i = 0; i < numFlowReads; i++) {
372 buffer = new char((sizeof(unsigned short)));
373 in.read(buffer, (sizeof(unsigned short)));
374 read->flowgram[i] = be_int2(*(unsigned short *)(buffer));
379 read->flowIndex.resize(numBases);
380 for (int i = 0; i < numBases; i++) {
381 buffer = new char(1);
383 read->flowgram[i] = be_int1(*(unsigned int *)(buffer));
388 read->bases = new char(numBases);
389 in.read(read->bases, numBases);
391 if (tempBuf.length() > numBases) { tempBuf = tempBuf.substr(0, numBases); strcpy(read->bases, tempBuf.c_str()); }
395 read->qualScores.resize(numBases);
396 for (int i = 0; i < numBases; i++) {
397 buffer = new char(1);
399 read->qualScores[i] = be_int1(*(unsigned int *)(buffer));
404 int spotInFile = in.tellg();
405 cout << spotInFile << endl;
406 int spot = floor((spotInFile + 7) /(float) 8) * 8;
407 cout << spot << endl;
411 m->mothurOut("Error reading."); m->mothurOutEndLine();
416 catch(exception& e) {
417 m->errorOut(e, "SffInfoCommand", "readSeqData");
421 //**********************************************************************************************************************
422 int SffInfoCommand::printCommonHeader(ofstream& out, CommonHeader* header, bool debug) {
425 string output = "Common Header:\nMagic Number: ";
426 output += toString(header->magicNumber) + '\n';
427 output += "Version: " + toString(header->version) + '\n';
428 output += "Index Offset: " + toString(header->indexOffset) + '\n';
429 output += "Index Length: " + toString(header->indexLength) + '\n';
430 output += "Number of Reads: " + toString(header->numReads) + '\n';
431 output += "Header Length: " + toString(header->headerLength) + '\n';
432 output += "Key Length: " + toString(header->keyLength) + '\n';
433 output += "Number of Flows: " + toString(header->numFlowsPerRead) + '\n';
434 output += "Format Code: " + toString(header->flogramFormatCode) + '\n';
435 output += "Flow Chars: " + toString(header->flowChars) + '\n';
436 output += "Key Sequence: " + toString(header->keySequence) + '\n';
438 out << output << endl;
440 if (debug) { cout << output << endl; }
444 catch(exception& e) {
445 m->errorOut(e, "SffInfoCommand", "printCommonHeader");
449 //**********************************************************************************************************************
450 int SffInfoCommand::printHeader(ofstream& out, Header* header, bool debug) {
452 string name = header->name;
453 string output = ">" + name + "\nRead Header Length: " + toString(header->headerLength) + '\n';
454 output += "Name Length: " + toString(header->nameLength) + '\n';
455 output += "Number of Bases: " + toString(header->numBases) + '\n';
456 output += "Clip Qual Left: " + toString(header->clipQualLeft) + '\n';
457 output += "Clip Qual Right: " + toString(header->clipQualLeft) + '\n';
458 output += "Clip Adap Left: " + toString(header->clipQualLeft) + '\n';
459 output += "Clip Adap Right: " + toString(header->clipQualLeft) + '\n';
461 out << output << endl;
463 if (debug) { cout << output << endl; }
467 catch(exception& e) {
468 m->errorOut(e, "SffInfoCommand", "printHeader");
473 //**********************************************************************************************************************
474 int SffInfoCommand::printSeqData(ofstream& out, seqRead* read, bool debug) {
477 string output = "FlowGram: ";
478 for (int i = 0; i < read->flowgram.size(); i++) { output += toString(read->flowgram[i]) +'\t'; }
479 output += "\nFlow Indexes: ";
480 for (int i = 0; i < read->flowIndex.size(); i++) { output += toString(read->flowIndex[i]) +'\t'; }
481 string bases = read->bases;
482 output += "\nBases: " + bases + '\n';
483 for (int i = 0; i < read->qualScores.size(); i++) { output += toString(read->qualScores[i]) +'\t'; }
486 out << output << endl;
488 if (debug) { cout << output << endl; }
492 catch(exception& e) {
493 m->errorOut(e, "SffInfoCommand", "printSeqData");
497 //**********************************************************************************************************************/