5 * Created by westcott on 7/7/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "sffinfocommand.h"
11 #include "endiannessmacros.h"
13 //**********************************************************************************************************************
15 SffInfoCommand::SffInfoCommand(string option) {
19 //allow user to run help
20 if(option == "help") { help(); abort = true; }
23 //valid paramters for this command
24 string Array[] = {"sff","outputdir","inputdir", "outputdir"};
25 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
27 OptionParser parser(option);
28 map<string, string> parameters = parser.getParameters();
30 ValidParameters validParameter;
31 //check to make sure all parameters are valid for command
32 for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
33 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
36 //if the user changes the output directory command factory will send this info to us in the output parameter
37 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
39 //if the user changes the input directory command factory will send this info to us in the output parameter
40 string inputDir = validParameter.validFile(parameters, "inputdir", false); if (inputDir == "not found"){ inputDir = ""; }
42 sffFilename = validParameter.validFile(parameters, "sff", false);
43 if (sffFilename == "not found") { m->mothurOut("sff is a required parameter for the sffinfo command."); m->mothurOutEndLine(); abort = true; }
45 splitAtDash(sffFilename, filenames);
47 //go through files and make sure they are good, if not, then disregard them
48 for (int i = 0; i < filenames.size(); i++) {
50 string path = hasPath(filenames[i]);
51 //if the user has not given a path then, add inputdir. else leave path alone.
52 if (path == "") { filenames[i] = inputDir + filenames[i]; }
56 int ableToOpen = openInputFile(filenames[i], in);
59 if (ableToOpen == 1) {
60 m->mothurOut(filenames[i] + " will be disregarded."); m->mothurOutEndLine();
61 //erase from file list
62 filenames.erase(filenames.begin()+i);
67 //make sure there is at least one valid file left
68 if (filenames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; }
73 m->errorOut(e, "SffInfoCommand", "SffInfoCommand");
77 //**********************************************************************************************************************
79 void SffInfoCommand::help(){
81 m->mothurOut("The sffinfo command reads a sff file and outputs a .sff.txt file.\n");
83 m->mothurOut("Example sffinfo(sff=...).\n");
84 m->mothurOut("Note: No spaces between parameter labels (i.e. sff), '=' and parameters (i.e.yourSffFileName).\n\n");
87 m->errorOut(e, "SffInfoCommand", "help");
91 //**********************************************************************************************************************
93 SffInfoCommand::~SffInfoCommand(){}
95 //**********************************************************************************************************************
96 int SffInfoCommand::execute(){
99 if (abort == true) { return 0; }
101 for (int s = 0; s < filenames.size(); s++) {
103 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
105 int start = time(NULL);
107 m->mothurOut("Extracting info from " + filenames[s] + " ..." ); m->mothurOutEndLine();
109 if (outputDir == "") { outputDir += hasPath(filenames[s]); }
110 string outputFileName = outputDir + getRootName(getSimpleName(filenames[s])) + "sff.txt";
112 int numReads = extractSffInfo(filenames[s], outputFileName);
114 outputNames.push_back(outputFileName);
116 m->mothurOut("It took " + toString(time(NULL) - start) + " secs to extract " + toString(numReads) + ".");
119 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
121 //report output filenames
122 m->mothurOutEndLine();
123 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
124 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
125 m->mothurOutEndLine();
129 catch(exception& e) {
130 m->errorOut(e, "SffInfoCommand", "execute");
134 //**********************************************************************************************************************
135 int SffInfoCommand::extractSffInfo(string input, string output){
139 openOutputFile(output, out);
141 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
144 in.open(input.c_str(), ios::binary);
147 readCommonHeader(in, header);
151 //check magic number and version
152 if (header.magicNumber != 779314790) { m->mothurOut("Magic Number is not correct, not a valid .sff file"); m->mothurOutEndLine(); return count; }
153 if (header.version != "0001") { m->mothurOut("Version is not supported, only support version 0001."); m->mothurOutEndLine(); return count; }
155 //read through the sff file
157 //print common header
158 printCommonHeader(out, header);
162 readHeader(in, readheader);
165 printHeader(out, readheader);
169 readSeqData(in, read, header.numFlowsPerRead, readheader.numBases);
172 printSeqData(out, read);
177 if((count+1) % 500 == 0){ m->mothurOut(toString(count+1)); m->mothurOutEndLine(); }
179 if (m->control_pressed) { count = 0; break; }
181 if (count >= header.numReads) { break; }
185 if (!m->control_pressed) { if((count) % 500 != 0){ m->mothurOut(toString(count)); m->mothurOutEndLine(); } }
192 catch(exception& e) {
193 m->errorOut(e, "SffInfoCommand", "extractSffInfo");
197 //**********************************************************************************************************************
198 int SffInfoCommand::readCommonHeader(ifstream& in, CommonHeader& header){
204 char buffer[sizeof(header.magicNumber)];
205 in.read(buffer, sizeof(header.magicNumber));
206 header.magicNumber = be_int4(*(unsigned int *)(&buffer));
212 for (int i = 0; i < 4; i++) { header.version += toString((int)(buffer9[i])); }
215 char buffer2 [sizeof(header.indexOffset)];
216 in.read(buffer2, sizeof(header.indexOffset));
217 header.indexOffset = be_int8(*(unsigned long int *)(&buffer2));
220 char buffer3 [sizeof(header.indexLength)];
221 in.read(buffer3, sizeof(header.indexLength));
222 header.indexLength = be_int4(*(unsigned int *)(&buffer3));
225 char buffer4 [sizeof(header.numReads)];
226 in.read(buffer4, sizeof(header.numReads));
227 header.numReads = be_int4(*(unsigned int *)(&buffer4));
230 char buffer5 [sizeof(header.headerLength)];
231 in.read(buffer5, sizeof(header.headerLength));
232 header.headerLength = be_int2(*(unsigned short *)(&buffer5));
235 char buffer6 [sizeof(header.keyLength)];
236 in.read(buffer6, sizeof(header.keyLength));
237 header.keyLength = be_int2(*(unsigned short *)(&buffer6));
239 //read number of flow reads
240 char buffer7 [sizeof(header.numFlowsPerRead)];
241 in.read(buffer7, sizeof(header.numFlowsPerRead));
242 header.numFlowsPerRead = be_int2(*(unsigned short *)(&buffer7));
247 header.flogramFormatCode = (int)(buffer8[0]);
250 char tempBuffer [header.numFlowsPerRead];
251 in.read(tempBuffer, header.numFlowsPerRead);
252 header.flowChars = tempBuffer;
253 if (header.flowChars.length() > header.numFlowsPerRead) { header.flowChars = header.flowChars.substr(0, header.numFlowsPerRead); }
256 char myAlloc [header.keyLength];
257 in.read(myAlloc, header.keyLength);
258 header.keySequence = myAlloc;
259 if (header.keySequence.length() > header.keyLength) { header.keySequence = header.keySequence.substr(0, header.keyLength); }
262 int spotInFile = in.tellg();
263 int spot = (spotInFile + 7)& ~7;
267 m->mothurOut("Error reading sff common header."); m->mothurOutEndLine();
272 catch(exception& e) {
273 m->errorOut(e, "SffInfoCommand", "readCommonHeader");
277 //**********************************************************************************************************************
278 int SffInfoCommand::readHeader(ifstream& in, Header& header){
284 char buffer [sizeof(header.headerLength)];
285 in.read(buffer, sizeof(header.headerLength));
286 header.headerLength = be_int2(*(unsigned short *)(&buffer));
289 char buffer2 [sizeof(header.nameLength)];
290 in.read(buffer2, sizeof(header.nameLength));
291 header.nameLength = be_int2(*(unsigned short *)(&buffer2));
294 char buffer3 [sizeof(header.numBases)];
295 in.read(buffer3, sizeof(header.numBases));
296 header.numBases = be_int4(*(unsigned int *)(&buffer3));
298 //read clip qual left
299 char buffer4 [sizeof(header.clipQualLeft)];
300 in.read(buffer4, sizeof(header.clipQualLeft));
301 header.clipQualLeft = be_int2(*(unsigned short *)(&buffer4));
303 //read clip qual right
304 char buffer5 [sizeof(header.clipQualRight)];
305 in.read(buffer5, sizeof(header.clipQualRight));
306 header.clipQualRight = be_int2(*(unsigned short *)(&buffer5));
308 //read clipAdapterLeft
309 char buffer6 [sizeof(header.clipAdapterLeft)];
310 in.read(buffer6, sizeof(header.clipAdapterLeft));
311 header.clipAdapterLeft = be_int2(*(unsigned short *)(&buffer6));
313 //read clipAdapterRight
314 char buffer7 [sizeof(header.clipAdapterRight)];
315 in.read(buffer7, sizeof(header.clipAdapterRight));
316 header.clipAdapterRight = be_int2(*(unsigned short *)(&buffer7));
319 char tempBuffer [header.nameLength];
320 in.read(tempBuffer, header.nameLength);
321 header.name = tempBuffer;
322 if (header.name.length() > header.nameLength) { header.name = header.name.substr(0, header.nameLength); }
325 int spotInFile = in.tellg();
326 int spot = (spotInFile + 7)& ~7;
330 m->mothurOut("Error reading sff header info."); m->mothurOutEndLine();
335 catch(exception& e) {
336 m->errorOut(e, "SffInfoCommand", "readHeader");
340 //**********************************************************************************************************************
341 int SffInfoCommand::readSeqData(ifstream& in, seqRead& read, int numFlowReads, int numBases){
347 read.flowgram.resize(numFlowReads);
348 for (int i = 0; i < numFlowReads; i++) {
349 char buffer [sizeof(unsigned short)];
350 in.read(buffer, (sizeof(unsigned short)));
351 read.flowgram[i] = be_int2(*(unsigned short *)(&buffer));
355 read.flowIndex.resize(numBases);
356 for (int i = 0; i < numBases; i++) {
359 read.flowIndex[i] = be_int1(*(unsigned char *)(&temp));
363 char tempBuffer[numBases];
364 in.read(tempBuffer, numBases);
365 read.bases = tempBuffer;
366 if (read.bases.length() > numBases) { read.bases = read.bases.substr(0, numBases); }
369 read.qualScores.resize(numBases);
370 for (int i = 0; i < numBases; i++) {
373 read.qualScores[i] = be_int1(*(unsigned char *)(&temp));
377 int spotInFile = in.tellg();
378 int spot = (spotInFile + 7)& ~7;
382 m->mothurOut("Error reading."); m->mothurOutEndLine();
387 catch(exception& e) {
388 m->errorOut(e, "SffInfoCommand", "readSeqData");
392 //**********************************************************************************************************************
393 int SffInfoCommand::printCommonHeader(ofstream& out, CommonHeader& header) {
396 out << "Common Header:\nMagic Number: " << header.magicNumber << endl;
397 out << "Version: " << header.version << endl;
398 out << "Index Offset: " << header.indexOffset << endl;
399 out << "Index Length: " << header.indexLength << endl;
400 out << "Number of Reads: " << header.numReads << endl;
401 out << "Header Length: " << header.headerLength << endl;
402 out << "Key Length: " << header.keyLength << endl;
403 out << "Number of Flows: " << header.numFlowsPerRead << endl;
404 out << "Format Code: " << header.flogramFormatCode << endl;
405 out << "Flow Chars: " << header.flowChars << endl;
406 out << "Key Sequence: " << header.keySequence << endl << endl;
410 catch(exception& e) {
411 m->errorOut(e, "SffInfoCommand", "printCommonHeader");
415 //**********************************************************************************************************************
416 int SffInfoCommand::printHeader(ofstream& out, Header& header) {
418 out << ">" << header.name << endl;
419 out << "Read Header Length: " << header.headerLength << endl;
420 out << "Name Length: " << header.nameLength << endl;
421 out << "Number of Bases: " << header.numBases << endl;
422 out << "Clip Qual Left: " << header.clipQualLeft << endl;
423 out << "Clip Qual Right: " << header.clipQualRight << endl;
424 out << "Clip Adap Left: " << header.clipAdapterLeft << endl;
425 out << "Clip Adap Right: " << header.clipAdapterRight << endl << endl;
429 catch(exception& e) {
430 m->errorOut(e, "SffInfoCommand", "printHeader");
435 //**********************************************************************************************************************
436 int SffInfoCommand::printSeqData(ofstream& out, seqRead& read) {
440 for (int i = 0; i < read.flowgram.size(); i++) { out << setprecision(2) << (read.flowgram[i]/(float)100) << '\t'; }
442 out << endl << "Flow Indexes: ";
444 for (int i = 0; i < read.flowIndex.size(); i++) { sum += read.flowIndex[i]; out << sum << '\t'; }
446 out << endl << "Bases: " << read.bases << endl << "Quality Scores: ";
447 for (int i = 0; i < read.qualScores.size(); i++) { out << read.qualScores[i] << '\t'; }
452 catch(exception& e) {
453 m->errorOut(e, "SffInfoCommand", "printSeqData");
457 //**********************************************************************************************************************/