5 * Created by westcott on 7/7/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "sffinfocommand.h"
11 #include "endiannessmacros.h"
13 //**********************************************************************************************************************
15 SffInfoCommand::SffInfoCommand(string option) {
19 //allow user to run help
20 if(option == "help") { help(); abort = true; }
23 //valid paramters for this command
24 string Array[] = {"sff","outputdir","inputdir", "outputdir"};
25 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
27 OptionParser parser(option);
28 map<string, string> parameters = parser.getParameters();
30 ValidParameters validParameter;
31 //check to make sure all parameters are valid for command
32 for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
33 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
36 //if the user changes the output directory command factory will send this info to us in the output parameter
37 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
39 //if the user changes the input directory command factory will send this info to us in the output parameter
40 string inputDir = validParameter.validFile(parameters, "inputdir", false); if (inputDir == "not found"){ inputDir = ""; }
42 sffFilename = validParameter.validFile(parameters, "sff", false);
43 if (sffFilename == "not found") { m->mothurOut("sff is a required parameter for the sffinfo command."); m->mothurOutEndLine(); abort = true; }
45 splitAtDash(sffFilename, filenames);
47 //go through files and make sure they are good, if not, then disregard them
48 for (int i = 0; i < filenames.size(); i++) {
50 string path = hasPath(filenames[i]);
51 //if the user has not given a path then, add inputdir. else leave path alone.
52 if (path == "") { filenames[i] = inputDir + filenames[i]; }
56 int ableToOpen = openInputFile(filenames[i], in);
59 if (ableToOpen == 1) {
60 m->mothurOut(filenames[i] + " will be disregarded."); m->mothurOutEndLine();
61 //erase from file list
62 filenames.erase(filenames.begin()+i);
67 //make sure there is at least one valid file left
68 if (filenames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; }
73 m->errorOut(e, "SffInfoCommand", "SffInfoCommand");
77 //**********************************************************************************************************************
79 void SffInfoCommand::help(){
81 m->mothurOut("The sffinfo command reads a sff file and outputs a .sff.txt file.\n");
83 m->mothurOut("Example sffinfo(sff=...).\n");
84 m->mothurOut("Note: No spaces between parameter labels (i.e. sff), '=' and parameters (i.e.yourSffFileName).\n\n");
87 m->errorOut(e, "SffInfoCommand", "help");
91 //**********************************************************************************************************************
93 SffInfoCommand::~SffInfoCommand(){}
95 //**********************************************************************************************************************
96 int SffInfoCommand::execute(){
99 if (abort == true) { return 0; }
101 for (int s = 0; s < filenames.size(); s++) {
103 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
105 int start = time(NULL);
107 m->mothurOut("Extracting info from " + filenames[s] + " ..." ); m->mothurOutEndLine();
109 if (outputDir == "") { outputDir += hasPath(filenames[s]); }
110 string outputFileName = outputDir + getRootName(getSimpleName(filenames[s])) + "sff.txt";
112 int numReads = extractSffInfo(filenames[s], outputFileName);
114 outputNames.push_back(outputFileName);
116 m->mothurOut("It took " + toString(time(NULL) - start) + " secs to extract " + toString(numReads) + ".");
119 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
121 //report output filenames
122 m->mothurOutEndLine();
123 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
124 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
125 m->mothurOutEndLine();
129 catch(exception& e) {
130 m->errorOut(e, "SffInfoCommand", "execute");
134 //**********************************************************************************************************************
135 int SffInfoCommand::extractSffInfo(string input, string output){
139 openOutputFile(output, out);
141 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
144 in.open(input.c_str(), ios::binary);
147 readCommonHeader(in, header);
149 //print common header
150 printCommonHeader(out, header);
154 //check magic number and version
155 if (header.magicNumber != 779314790) { m->mothurOut("Magic Number is not correct, not a valid .sff file"); m->mothurOutEndLine(); return count; }
156 if (header.version != "0001") { m->mothurOut("Version is not supported, only support version 0001."); m->mothurOutEndLine(); return count; }
158 //read through the sff file
163 readHeader(in, readheader);
166 printHeader(out, readheader);
170 readSeqData(in, read, header.numFlowsPerRead, readheader.numBases);
173 printSeqData(out, read);
178 if((count+1) % 500 == 0){ m->mothurOut(toString(count+1)); m->mothurOutEndLine(); }
180 if (m->control_pressed) { count = 0; break; }
182 if (count >= header.numReads) { break; }
186 if (!m->control_pressed) { if((count) % 500 != 0){ m->mothurOut(toString(count)); m->mothurOutEndLine(); } }
193 catch(exception& e) {
194 m->errorOut(e, "SffInfoCommand", "extractSffInfo");
198 //**********************************************************************************************************************
199 int SffInfoCommand::readCommonHeader(ifstream& in, CommonHeader& header){
205 char buffer[sizeof(header.magicNumber)];
206 in.read(buffer, sizeof(header.magicNumber));
207 header.magicNumber = be_int4(*(unsigned int *)(&buffer));
213 for (int i = 0; i < 4; i++) { header.version += toString((int)(buffer9[i])); }
216 char buffer2 [sizeof(header.indexOffset)];
217 in.read(buffer2, sizeof(header.indexOffset));
218 header.indexOffset = be_int8(*(unsigned long int *)(&buffer2));
221 char buffer3 [sizeof(header.indexLength)];
222 in.read(buffer3, sizeof(header.indexLength));
223 header.indexLength = be_int4(*(unsigned int *)(&buffer3));
226 char buffer4 [sizeof(header.numReads)];
227 in.read(buffer4, sizeof(header.numReads));
228 header.numReads = be_int4(*(unsigned int *)(&buffer4));
231 char buffer5 [sizeof(header.headerLength)];
232 in.read(buffer5, sizeof(header.headerLength));
233 header.headerLength = be_int2(*(unsigned short *)(&buffer5));
236 char buffer6 [sizeof(header.keyLength)];
237 in.read(buffer6, sizeof(header.keyLength));
238 header.keyLength = be_int2(*(unsigned short *)(&buffer6));
240 //read number of flow reads
241 char buffer7 [sizeof(header.numFlowsPerRead)];
242 in.read(buffer7, sizeof(header.numFlowsPerRead));
243 header.numFlowsPerRead = be_int2(*(unsigned short *)(&buffer7));
248 header.flogramFormatCode = (int)(buffer8[0]);
251 char tempBuffer [header.numFlowsPerRead];
252 in.read(tempBuffer, header.numFlowsPerRead);
253 header.flowChars = tempBuffer;
254 if (header.flowChars.length() > header.numFlowsPerRead) { header.flowChars = header.flowChars.substr(0, header.numFlowsPerRead); }
257 char tempBuffer2 [header.keyLength];
258 in.read(tempBuffer2, header.keyLength);
259 header.keySequence = tempBuffer2;
260 if (header.keySequence.length() > header.keyLength) { header.keySequence = header.keySequence.substr(0, header.keyLength); }
263 int spotInFile = in.tellg();
264 int spot = (spotInFile + 7)& ~7; // ~ inverts
268 m->mothurOut("Error reading sff common header."); m->mothurOutEndLine();
273 catch(exception& e) {
274 m->errorOut(e, "SffInfoCommand", "readCommonHeader");
278 //**********************************************************************************************************************
279 int SffInfoCommand::readHeader(ifstream& in, Header& header){
285 char buffer [sizeof(header.headerLength)];
286 in.read(buffer, sizeof(header.headerLength));
287 header.headerLength = be_int2(*(unsigned short *)(&buffer));
290 char buffer2 [sizeof(header.nameLength)];
291 in.read(buffer2, sizeof(header.nameLength));
292 header.nameLength = be_int2(*(unsigned short *)(&buffer2));
295 char buffer3 [sizeof(header.numBases)];
296 in.read(buffer3, sizeof(header.numBases));
297 header.numBases = be_int4(*(unsigned int *)(&buffer3));
299 //read clip qual left
300 char buffer4 [sizeof(header.clipQualLeft)];
301 in.read(buffer4, sizeof(header.clipQualLeft));
302 header.clipQualLeft = be_int2(*(unsigned short *)(&buffer4));
304 //read clip qual right
305 char buffer5 [sizeof(header.clipQualRight)];
306 in.read(buffer5, sizeof(header.clipQualRight));
307 header.clipQualRight = be_int2(*(unsigned short *)(&buffer5));
309 //read clipAdapterLeft
310 char buffer6 [sizeof(header.clipAdapterLeft)];
311 in.read(buffer6, sizeof(header.clipAdapterLeft));
312 header.clipAdapterLeft = be_int2(*(unsigned short *)(&buffer6));
314 //read clipAdapterRight
315 char buffer7 [sizeof(header.clipAdapterRight)];
316 in.read(buffer7, sizeof(header.clipAdapterRight));
317 header.clipAdapterRight = be_int2(*(unsigned short *)(&buffer7));
320 char tempBuffer [header.nameLength];
321 in.read(tempBuffer, header.nameLength);
322 header.name = tempBuffer;
323 if (header.name.length() > header.nameLength) { header.name = header.name.substr(0, header.nameLength); }
326 int spotInFile = in.tellg();
327 int spot = (spotInFile + 7)& ~7;
331 m->mothurOut("Error reading sff header info."); m->mothurOutEndLine();
336 catch(exception& e) {
337 m->errorOut(e, "SffInfoCommand", "readHeader");
341 //**********************************************************************************************************************
342 int SffInfoCommand::readSeqData(ifstream& in, seqRead& read, int numFlowReads, int numBases){
348 read.flowgram.resize(numFlowReads);
349 for (int i = 0; i < numFlowReads; i++) {
350 char buffer [sizeof(unsigned short)];
351 in.read(buffer, (sizeof(unsigned short)));
352 read.flowgram[i] = be_int2(*(unsigned short *)(&buffer));
356 read.flowIndex.resize(numBases);
357 for (int i = 0; i < numBases; i++) {
360 read.flowIndex[i] = be_int1(*(unsigned char *)(&temp));
364 char tempBuffer[numBases];
365 in.read(tempBuffer, numBases);
366 read.bases = tempBuffer;
367 if (read.bases.length() > numBases) { read.bases = read.bases.substr(0, numBases); }
370 read.qualScores.resize(numBases);
371 for (int i = 0; i < numBases; i++) {
374 read.qualScores[i] = be_int1(*(unsigned char *)(&temp));
378 int spotInFile = in.tellg();
379 int spot = (spotInFile + 7)& ~7;
383 m->mothurOut("Error reading."); m->mothurOutEndLine();
388 catch(exception& e) {
389 m->errorOut(e, "SffInfoCommand", "readSeqData");
393 //**********************************************************************************************************************
394 int SffInfoCommand::printCommonHeader(ofstream& out, CommonHeader& header) {
397 out << "Common Header:\nMagic Number: " << header.magicNumber << endl;
398 out << "Version: " << header.version << endl;
399 out << "Index Offset: " << header.indexOffset << endl;
400 out << "Index Length: " << header.indexLength << endl;
401 out << "Number of Reads: " << header.numReads << endl;
402 out << "Header Length: " << header.headerLength << endl;
403 out << "Key Length: " << header.keyLength << endl;
404 out << "Number of Flows: " << header.numFlowsPerRead << endl;
405 out << "Format Code: " << header.flogramFormatCode << endl;
406 out << "Flow Chars: " << header.flowChars << endl;
407 out << "Key Sequence: " << header.keySequence << endl << endl;
411 catch(exception& e) {
412 m->errorOut(e, "SffInfoCommand", "printCommonHeader");
416 //**********************************************************************************************************************
417 int SffInfoCommand::printHeader(ofstream& out, Header& header) {
419 out << ">" << header.name << endl;
420 out << "Run Prefix: " << endl;
421 out << "Region #: " << endl;
422 out << "XY Location: " << endl << endl;
424 out << "Run Name: " << endl;
425 out << "Analysis Name: " << endl;
426 out << "Full Path: " << endl << endl;
428 out << "Read Header Len: " << header.headerLength << endl;
429 out << "Name Length: " << header.nameLength << endl;
430 out << "# of Bases: " << header.numBases << endl;
431 out << "Clip Qual Left: " << header.clipQualLeft << endl;
432 out << "Clip Qual Right: " << header.clipQualRight << endl;
433 out << "Clip Adap Left: " << header.clipAdapterLeft << endl;
434 out << "Clip Adap Right: " << header.clipAdapterRight << endl << endl;
438 catch(exception& e) {
439 m->errorOut(e, "SffInfoCommand", "printHeader");
444 //**********************************************************************************************************************
445 int SffInfoCommand::printSeqData(ofstream& out, seqRead& read) {
449 for (int i = 0; i < read.flowgram.size(); i++) { out << setprecision(2) << (read.flowgram[i]/(float)100) << '\t'; }
451 out << endl << "Flow Indexes: ";
453 for (int i = 0; i < read.flowIndex.size(); i++) { sum += read.flowIndex[i]; out << sum << '\t'; }
455 out << endl << "Bases: " << read.bases << endl << "Quality Scores: ";
456 for (int i = 0; i < read.qualScores.size(); i++) { out << read.qualScores[i] << '\t'; }
461 catch(exception& e) {
462 m->errorOut(e, "SffInfoCommand", "printSeqData");
466 //**********************************************************************************************************************/