]> git.donarmstrong.com Git - mothur.git/blob - sffinfocommand.cpp
sffinfo command is working
[mothur.git] / sffinfocommand.cpp
1 /*
2  *  sffinfocommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 7/7/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "sffinfocommand.h"
11 #include "endiannessmacros.h"
12
13 //**********************************************************************************************************************
14
15 SffInfoCommand::SffInfoCommand(string option)  {
16         try {
17                 abort = false;
18                 
19                 //allow user to run help
20                 if(option == "help") { help(); abort = true; }
21                 
22                 else {
23                         //valid paramters for this command
24                         string Array[] =  {"sff","outputdir","inputdir", "outputdir"};
25                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
26                         
27                         OptionParser parser(option);
28                         map<string, string> parameters = parser.getParameters();
29                         
30                         ValidParameters validParameter;
31                         //check to make sure all parameters are valid for command
32                         for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
33                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
34                         }
35                         
36                         //if the user changes the output directory command factory will send this info to us in the output parameter 
37                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
38                         
39                         //if the user changes the input directory command factory will send this info to us in the output parameter 
40                         string inputDir = validParameter.validFile(parameters, "inputdir", false);        if (inputDir == "not found"){ inputDir = "";          }
41
42                         sffFilename = validParameter.validFile(parameters, "sff", false);
43                         if (sffFilename == "not found") { m->mothurOut("sff is a required parameter for the sffinfo command."); m->mothurOutEndLine(); abort = true;  }
44                         else { 
45                                 splitAtDash(sffFilename, filenames);
46                                 
47                                 //go through files and make sure they are good, if not, then disregard them
48                                 for (int i = 0; i < filenames.size(); i++) {
49                                         if (inputDir != "") {
50                                                 string path = hasPath(filenames[i]);
51                                                 //if the user has not given a path then, add inputdir. else leave path alone.
52                                                 if (path == "") {       filenames[i] = inputDir + filenames[i];         }
53                                         }
54         
55                                         ifstream in;
56                                         int ableToOpen = openInputFile(filenames[i], in);
57                                         in.close();
58                                         
59                                         if (ableToOpen == 1) { 
60                                                 m->mothurOut(filenames[i] + " will be disregarded."); m->mothurOutEndLine(); 
61                                                 //erase from file list
62                                                 filenames.erase(filenames.begin()+i);
63                                                 i--;
64                                         }
65                                 }
66                                 
67                                 //make sure there is at least one valid file left
68                                 if (filenames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; }
69                         }
70                 }
71         }
72         catch(exception& e) {
73                 m->errorOut(e, "SffInfoCommand", "SffInfoCommand");
74                 exit(1);
75         }
76 }
77 //**********************************************************************************************************************
78
79 void SffInfoCommand::help(){
80         try {
81                 m->mothurOut("The sffinfo command reads a sff file and outputs a .sff.txt file.\n");
82                 
83                 m->mothurOut("Example sffinfo(sff=...).\n");
84                 m->mothurOut("Note: No spaces between parameter labels (i.e. sff), '=' and parameters (i.e.yourSffFileName).\n\n");
85         }
86         catch(exception& e) {
87                 m->errorOut(e, "SffInfoCommand", "help");
88                 exit(1);
89         }
90 }
91 //**********************************************************************************************************************
92
93 SffInfoCommand::~SffInfoCommand(){}
94
95 //**********************************************************************************************************************
96 int SffInfoCommand::execute(){
97         try {
98                 
99                 if (abort == true) { return 0; }
100                 
101                 for (int s = 0; s < filenames.size(); s++) {
102                         
103                         if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) {       remove(outputNames[i].c_str());         } return 0; }
104                         
105                         int start = time(NULL);
106                         
107                         m->mothurOut("Extracting info from " + filenames[s] + " ..." ); m->mothurOutEndLine();
108                         
109                         if (outputDir == "") {  outputDir += hasPath(filenames[s]); }
110                         string outputFileName = outputDir + getRootName(getSimpleName(filenames[s])) + "sff.txt";
111                                                 
112                         int numReads = extractSffInfo(filenames[s], outputFileName);
113                         
114                         outputNames.push_back(outputFileName);
115
116                         m->mothurOut("It took " + toString(time(NULL) - start) + " secs to extract " + toString(numReads) + ".");
117                 }
118                 
119                 if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) {       remove(outputNames[i].c_str());         } return 0; }
120                 
121                 //report output filenames
122                 m->mothurOutEndLine();
123                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
124                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
125                 m->mothurOutEndLine();
126
127                 return 0;
128         }
129         catch(exception& e) {
130                 m->errorOut(e, "SffInfoCommand", "execute");
131                 exit(1);
132         }
133 }
134 //**********************************************************************************************************************
135 int SffInfoCommand::extractSffInfo(string input, string output){
136         try {
137                 
138                 ofstream out;
139                 openOutputFile(output, out);
140                 
141                 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
142                 
143                 ifstream in;
144                 in.open(input.c_str(), ios::binary);
145                 
146                 CommonHeader header; 
147                 readCommonHeader(in, header);
148                 
149                 //print common header
150                 printCommonHeader(out, header);
151                 
152                 int count = 0;
153                 
154                 //check magic number and version
155                 if (header.magicNumber != 779314790) { m->mothurOut("Magic Number is not correct, not a valid .sff file"); m->mothurOutEndLine(); return count; }
156                 if (header.version != "0001") { m->mothurOut("Version is not supported, only support version 0001."); m->mothurOutEndLine(); return count; }
157                 
158                 //read through the sff file
159                 while (!in.eof()) {
160                         
161                         //read header
162                         Header readheader;
163                         readHeader(in, readheader);
164                 
165                         //print header
166                         printHeader(out, readheader);
167                         
168                         //read data
169                         seqRead read; 
170                         readSeqData(in, read, header.numFlowsPerRead, readheader.numBases);
171                 
172                         //print data
173                         printSeqData(out, read);
174                         
175                         count++;
176                         
177                         //report progress
178                         if((count+1) % 500 == 0){       m->mothurOut(toString(count+1)); m->mothurOutEndLine();         }
179                 
180                         if (m->control_pressed) { count = 0; break;   }
181                         
182                         if (count >= header.numReads) { break; }
183                 }
184                 
185                 //report progress
186                 if (!m->control_pressed) {   if((count) % 500 != 0){    m->mothurOut(toString(count)); m->mothurOutEndLine();           }  }
187                 
188                 in.close();
189                 out.close();
190                 
191                 return count;
192         }
193         catch(exception& e) {
194                 m->errorOut(e, "SffInfoCommand", "extractSffInfo");
195                 exit(1);
196         }
197 }
198 //**********************************************************************************************************************
199 int SffInfoCommand::readCommonHeader(ifstream& in, CommonHeader& header){
200         try {
201
202                 if (!in.eof()) {
203                 
204                         //read magic number
205                         char buffer[sizeof(header.magicNumber)];
206                         in.read(buffer, sizeof(header.magicNumber));
207                         header.magicNumber = be_int4(*(unsigned int *)(&buffer));
208                         
209                         //read version
210                         char buffer9[4];
211                         in.read(buffer9, 4);
212                         header.version = "";
213                         for (int i = 0; i < 4; i++) {  header.version += toString((int)(buffer9[i])); }
214                                 
215                         //read offset
216                         char buffer2 [sizeof(header.indexOffset)];
217                         in.read(buffer2, sizeof(header.indexOffset));
218                         header.indexOffset =  be_int8(*(unsigned long int *)(&buffer2));
219                         
220                         //read index length
221                         char buffer3 [sizeof(header.indexLength)];
222                         in.read(buffer3, sizeof(header.indexLength));
223                         header.indexLength =  be_int4(*(unsigned int *)(&buffer3));
224                         
225                         //read num reads
226                         char buffer4 [sizeof(header.numReads)];
227                         in.read(buffer4, sizeof(header.numReads));
228                         header.numReads =  be_int4(*(unsigned int *)(&buffer4));
229                                 
230                         //read header length
231                         char buffer5 [sizeof(header.headerLength)];
232                         in.read(buffer5, sizeof(header.headerLength));
233                         header.headerLength =  be_int2(*(unsigned short *)(&buffer5));
234                                         
235                         //read key length
236                         char buffer6 [sizeof(header.keyLength)];
237                         in.read(buffer6, sizeof(header.keyLength));
238                         header.keyLength = be_int2(*(unsigned short *)(&buffer6));
239                         
240                         //read number of flow reads
241                         char buffer7 [sizeof(header.numFlowsPerRead)];
242                         in.read(buffer7, sizeof(header.numFlowsPerRead));
243                         header.numFlowsPerRead =  be_int2(*(unsigned short *)(&buffer7));
244                                 
245                         //read format code
246                         char buffer8 [1];
247                         in.read(buffer8, 1);
248                         header.flogramFormatCode = (int)(buffer8[0]);
249                         
250                         //read flow chars
251                         char tempBuffer [header.numFlowsPerRead];
252                         in.read(tempBuffer, header.numFlowsPerRead); 
253                         header.flowChars = tempBuffer;
254                         if (header.flowChars.length() > header.numFlowsPerRead) { header.flowChars = header.flowChars.substr(0, header.numFlowsPerRead);  }
255                         
256                         //read key
257                         char tempBuffer2 [header.keyLength];
258                         in.read(tempBuffer2, header.keyLength);
259                         header.keySequence = tempBuffer2;
260                         if (header.keySequence.length() > header.keyLength) { header.keySequence = header.keySequence.substr(0, header.keyLength);  }
261                                 
262                         /* Pad to 8 chars */
263                         int spotInFile = in.tellg();
264                         int spot = (spotInFile + 7)& ~7;  // ~ inverts
265                         in.seekg(spot);
266                         
267                 }else{
268                         m->mothurOut("Error reading sff common header."); m->mothurOutEndLine();
269                 }
270
271                 return 0;
272         }
273         catch(exception& e) {
274                 m->errorOut(e, "SffInfoCommand", "readCommonHeader");
275                 exit(1);
276         }
277 }
278 //**********************************************************************************************************************
279 int SffInfoCommand::readHeader(ifstream& in, Header& header){
280         try {
281         
282                 if (!in.eof()) {
283                         
284                         //read header length
285                         char buffer [sizeof(header.headerLength)];
286                         in.read(buffer, sizeof(header.headerLength));
287                         header.headerLength = be_int2(*(unsigned short *)(&buffer));
288                                                 
289                         //read name length
290                         char buffer2 [sizeof(header.nameLength)];
291                         in.read(buffer2, sizeof(header.nameLength));
292                         header.nameLength = be_int2(*(unsigned short *)(&buffer2));
293
294                         //read num bases
295                         char buffer3 [sizeof(header.numBases)];
296                         in.read(buffer3, sizeof(header.numBases));
297                         header.numBases =  be_int4(*(unsigned int *)(&buffer3));
298                         
299                         //read clip qual left
300                         char buffer4 [sizeof(header.clipQualLeft)];
301                         in.read(buffer4, sizeof(header.clipQualLeft));
302                         header.clipQualLeft =  be_int2(*(unsigned short *)(&buffer4));
303                         
304                         //read clip qual right
305                         char buffer5 [sizeof(header.clipQualRight)];
306                         in.read(buffer5, sizeof(header.clipQualRight));
307                         header.clipQualRight =  be_int2(*(unsigned short *)(&buffer5));
308                         
309                         //read clipAdapterLeft
310                         char buffer6 [sizeof(header.clipAdapterLeft)];
311                         in.read(buffer6, sizeof(header.clipAdapterLeft));
312                         header.clipAdapterLeft = be_int2(*(unsigned short *)(&buffer6));
313
314                         //read clipAdapterRight
315                         char buffer7 [sizeof(header.clipAdapterRight)];
316                         in.read(buffer7, sizeof(header.clipAdapterRight));
317                         header.clipAdapterRight = be_int2(*(unsigned short *)(&buffer7));
318                 
319                         //read name
320                         char tempBuffer [header.nameLength];
321                         in.read(tempBuffer, header.nameLength);
322                         header.name = tempBuffer;
323                         if (header.name.length() > header.nameLength) { header.name = header.name.substr(0, header.nameLength);  }
324                         
325                         /* Pad to 8 chars */
326                         int spotInFile = in.tellg();
327                         int spot = (spotInFile + 7)& ~7;
328                         in.seekg(spot);
329                         
330                 }else{
331                         m->mothurOut("Error reading sff header info."); m->mothurOutEndLine();
332                 }
333
334                 return 0;
335         }
336         catch(exception& e) {
337                 m->errorOut(e, "SffInfoCommand", "readHeader");
338                 exit(1);
339         }
340 }
341 //**********************************************************************************************************************
342 int SffInfoCommand::readSeqData(ifstream& in, seqRead& read, int numFlowReads, int numBases){
343         try {
344         
345                 if (!in.eof()) {
346         
347                         //read flowgram
348                         read.flowgram.resize(numFlowReads);
349                         for (int i = 0; i < numFlowReads; i++) {  
350                                 char buffer [sizeof(unsigned short)];
351                                 in.read(buffer, (sizeof(unsigned short)));
352                                 read.flowgram[i] = be_int2(*(unsigned short *)(&buffer));
353                         }
354         
355                         //read flowIndex
356                         read.flowIndex.resize(numBases);
357                         for (int i = 0; i < numBases; i++) {  
358                                 char temp[1];
359                                 in.read(temp, 1);
360                                 read.flowIndex[i] = be_int1(*(unsigned char *)(&temp));
361                         }
362                 
363                         //read bases
364                         char tempBuffer[numBases];
365                         in.read(tempBuffer, numBases);
366                         read.bases = tempBuffer;
367                         if (read.bases.length() > numBases) { read.bases = read.bases.substr(0, numBases);  }
368
369                         //read flowgram
370                         read.qualScores.resize(numBases);
371                         for (int i = 0; i < numBases; i++) {  
372                                 char temp[1];
373                                 in.read(temp, 1);
374                                 read.qualScores[i] = be_int1(*(unsigned char *)(&temp));
375                         }
376                 
377                         /* Pad to 8 chars */
378                         int spotInFile = in.tellg();
379                         int spot = (spotInFile + 7)& ~7;
380                         in.seekg(spot);
381                         
382                 }else{
383                         m->mothurOut("Error reading."); m->mothurOutEndLine();
384                 }
385
386                 return 0;
387         }
388         catch(exception& e) {
389                 m->errorOut(e, "SffInfoCommand", "readSeqData");
390                 exit(1);
391         }
392 }
393 //**********************************************************************************************************************
394 int SffInfoCommand::printCommonHeader(ofstream& out, CommonHeader& header) {
395         try {
396         
397                 out << "Common Header:\nMagic Number: " << header.magicNumber << endl;
398                 out << "Version: " << header.version << endl;
399                 out << "Index Offset: " << header.indexOffset << endl;
400                 out << "Index Length: " << header.indexLength << endl;
401                 out << "Number of Reads: " << header.numReads << endl;
402                 out << "Header Length: " << header.headerLength << endl;
403                 out << "Key Length: " << header.keyLength << endl;
404                 out << "Number of Flows: " << header.numFlowsPerRead << endl;
405                 out << "Format Code: " << header.flogramFormatCode << endl;
406                 out << "Flow Chars: " << header.flowChars << endl;
407                 out << "Key Sequence: " << header.keySequence << endl << endl;
408                         
409                 return 0;
410         }
411         catch(exception& e) {
412                 m->errorOut(e, "SffInfoCommand", "printCommonHeader");
413                 exit(1);
414         }
415 }
416 //**********************************************************************************************************************
417 int SffInfoCommand::printHeader(ofstream& out, Header& header) {
418         try {
419                 out << ">" << header.name << endl;
420                 out << "Run Prefix: " << endl;
421                 out << "Region #:  " << endl;
422                 out << "XY Location: " << endl << endl;
423                 
424                 out << "Run Name:  " << endl;
425                 out << "Analysis Name:  " << endl;
426                 out << "Full Path: " << endl << endl;
427                 
428                 out << "Read Header Len: " << header.headerLength << endl;
429                 out << "Name Length: " << header.nameLength << endl;
430                 out << "# of Bases: " << header.numBases << endl;
431                 out << "Clip Qual Left: " << header.clipQualLeft << endl;
432                 out << "Clip Qual Right: " << header.clipQualRight << endl;
433                 out << "Clip Adap Left: " << header.clipAdapterLeft << endl;
434                 out << "Clip Adap Right: " << header.clipAdapterRight << endl << endl;
435                 
436                 return 0;
437         }
438         catch(exception& e) {
439                 m->errorOut(e, "SffInfoCommand", "printHeader");
440                 exit(1);
441         }
442 }
443
444 //**********************************************************************************************************************
445 int SffInfoCommand::printSeqData(ofstream& out, seqRead& read) {
446         try {
447                 
448                 out << "FlowGram: ";
449                 for (int i = 0; i < read.flowgram.size(); i++) { out << setprecision(2) << (read.flowgram[i]/(float)100) << '\t';  }
450                 
451                 out << endl <<  "Flow Indexes: ";
452                 int sum = 0;
453                 for (int i = 0; i < read.flowIndex.size(); i++) {  sum +=  read.flowIndex[i];  out << sum << '\t'; }
454                 
455                 out << endl <<  "Bases: " << read.bases << endl << "Quality Scores: ";
456                 for (int i = 0; i < read.qualScores.size(); i++) {   out << read.qualScores[i] << '\t';  }
457                 out << endl << endl;
458                 
459                 return 0;
460         }
461         catch(exception& e) {
462                 m->errorOut(e, "SffInfoCommand", "printSeqData");
463                 exit(1);
464         }
465 }
466 //**********************************************************************************************************************/