]> git.donarmstrong.com Git - mothur.git/blob - sffinfocommand.cpp
started work on sffinfo command. fixed bug across all paralellized commands if the...
[mothur.git] / sffinfocommand.cpp
1 /*
2  *  sffinfocommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 7/7/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "sffinfocommand.h"
11 #include "endiannessmacros.h"
12
13 //**********************************************************************************************************************
14
15 SffInfoCommand::SffInfoCommand(string option)  {
16         try {
17                 abort = false;
18                 
19                 //allow user to run help
20                 if(option == "help") { help(); abort = true; }
21                 
22                 else {
23                         //valid paramters for this command
24                         string Array[] =  {"sff","outputdir","inputdir", "outputdir"};
25                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
26                         
27                         OptionParser parser(option);
28                         map<string, string> parameters = parser.getParameters();
29                         
30                         ValidParameters validParameter;
31                         //check to make sure all parameters are valid for command
32                         for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
33                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
34                         }
35                         
36                         //if the user changes the output directory command factory will send this info to us in the output parameter 
37                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
38                         
39                         //if the user changes the input directory command factory will send this info to us in the output parameter 
40                         string inputDir = validParameter.validFile(parameters, "inputdir", false);        if (inputDir == "not found"){ inputDir = "";          }
41
42                         sffFilename = validParameter.validFile(parameters, "sff", false);
43                         if (sffFilename == "not found") { m->mothurOut("sff is a required parameter for the sffinfo command."); m->mothurOutEndLine(); abort = true;  }
44                         else { 
45                                 splitAtDash(sffFilename, filenames);
46                                 
47                                 //go through files and make sure they are good, if not, then disregard them
48                                 for (int i = 0; i < filenames.size(); i++) {
49                                         if (inputDir != "") {
50                                                 string path = hasPath(filenames[i]);
51                                                 //if the user has not given a path then, add inputdir. else leave path alone.
52                                                 if (path == "") {       filenames[i] = inputDir + filenames[i];         }
53                                         }
54         
55                                         ifstream in;
56                                         int ableToOpen = openInputFile(filenames[i], in);
57                                         in.close();
58                                         
59                                         if (ableToOpen == 1) { 
60                                                 m->mothurOut(filenames[i] + " will be disregarded."); m->mothurOutEndLine(); 
61                                                 //erase from file list
62                                                 filenames.erase(filenames.begin()+i);
63                                                 i--;
64                                         }
65                                 }
66                                 
67                                 //make sure there is at least one valid file left
68                                 if (filenames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; }
69                         }
70                 }
71         }
72         catch(exception& e) {
73                 m->errorOut(e, "SffInfoCommand", "SffInfoCommand");
74                 exit(1);
75         }
76 }
77 //**********************************************************************************************************************
78
79 void SffInfoCommand::help(){
80         try {
81                 m->mothurOut("The sffinfo command reads a sff file and outputs a .sff.txt file.\n");
82                 
83                 m->mothurOut("Example sffinfo(sff=...).\n");
84                 m->mothurOut("Note: No spaces between parameter labels (i.e. sff), '=' and parameters (i.e.yourSffFileName).\n\n");
85         }
86         catch(exception& e) {
87                 m->errorOut(e, "SffInfoCommand", "help");
88                 exit(1);
89         }
90 }
91 //**********************************************************************************************************************
92
93 SffInfoCommand::~SffInfoCommand(){}
94
95 //**********************************************************************************************************************
96 int SffInfoCommand::execute(){
97         try {
98                 
99                 if (abort == true) { return 0; }
100                 
101                 for (int s = 0; s < filenames.size(); s++) {
102                         
103                         if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) {       remove(outputNames[i].c_str());         } return 0; }
104                         
105                         m->mothurOut("Extracting info from " + filenames[s] + " ..." ); m->mothurOutEndLine();
106                         
107                         if (outputDir == "") {  outputDir += hasPath(filenames[s]); }
108                         string outputFileName = outputDir + getRootName(getSimpleName(filenames[s])) + "sff.txt";
109                                                 
110                         extractSffInfo(filenames[s], outputFileName);
111                         
112                         outputNames.push_back(outputFileName);
113
114                         m->mothurOut("Done."); m->mothurOutEndLine();
115                 }
116                 
117                 if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) {       remove(outputNames[i].c_str());         } return 0; }
118                 
119                 //report output filenames
120                 m->mothurOutEndLine();
121                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
122                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
123                 m->mothurOutEndLine();
124
125                 return 0;
126         }
127         catch(exception& e) {
128                 m->errorOut(e, "SffInfoCommand", "execute");
129                 exit(1);
130         }
131 }
132 //**********************************************************************************************************************
133 int SffInfoCommand::extractSffInfo(string input, string output){
134         try {
135                 ofstream out;
136                 openOutputFile(output, out);
137                 
138                 ifstream in;
139                 in.open(input.c_str(), ios::binary);
140                 
141                 CommonHeader* header = readCommonHeader(in);
142                 
143                 //cout << "magic = " << header->magicNumber << endl << "version = " << header->version << endl << "index offset = " << header->indexOffset << endl << "index length = "<< header->indexLength << endl << "numreads = " << header->numReads << endl << "header length = " << header->headerLength << endl << "key length = " << header->keyLength << endl;
144 //cout << "numflowreads = "<< header->numFlowsPerRead << endl << "flow format code = "<< header->flogramFormatCode << endl << "flow chars = " << header->flowChars << endl << "key sequence = " << header->keySequence << endl << endl;         
145                 cout << in.tellg() << endl;
146                 //read through the sff file
147                 while (!in.eof()) {
148                 
149                         //read header
150                         Header* readheader = readHeader(in);
151                         
152                         //read data
153                         seqRead* read = readSeqData(in, header->numFlowsPerRead, readheader->numBases);
154                         
155                         cout << in.tellg() << endl;
156                         
157                         //print common header
158                         printCommonHeader(out, header, true);
159                         
160                         //print header
161                         printHeader(out, readheader, true);
162                         
163                         //print data
164                         printSeqData(out, read, true);
165         
166                 }
167                 
168                 
169                 in.close();
170                 out.close();
171                 
172                 return 0;
173         }
174         catch(exception& e) {
175                 m->errorOut(e, "SffInfoCommand", "extractSffInfo");
176                 exit(1);
177         }
178 }
179 //**********************************************************************************************************************
180 CommonHeader* SffInfoCommand::readCommonHeader(ifstream& in){
181         try {
182                 CommonHeader* header = new CommonHeader();
183         
184                 if (!in.eof()) {
185                         string tempBuf = "";
186                         
187                         //read magic number
188                         char* buffer = new char(sizeof(header->magicNumber));
189                         in.read(buffer, sizeof(header->magicNumber));
190                         header->magicNumber = be_int4(*(uint32_t *)(buffer));
191                         delete[] buffer;
192                         
193                         //read version
194                         header->version = new char(4);
195                         in.read(header->version, 4);
196                         tempBuf = buffer;
197                         if (tempBuf.length() > 4) { tempBuf = tempBuf.substr(0, 4); strcpy(header->version, tempBuf.c_str());  }
198                         
199                         //read offset
200                         buffer = new char(sizeof(header->indexOffset));
201                         in.read(buffer, sizeof(header->indexOffset));
202                         header->indexOffset =  be_int8(*(uint64_t *)(buffer));
203                         delete[] buffer;
204                         
205                         //read index length
206                         buffer = new char(sizeof(header->indexLength));
207                         in.read(buffer, sizeof(header->indexLength));
208                         header->indexLength =  be_int4(*(uint32_t *)(buffer));
209                         delete[] buffer;
210                         
211                         //read num reads
212                         buffer = new char(sizeof(header->numReads));
213                         in.read(buffer, sizeof(header->numReads));
214                         header->numReads =  be_int4(*(uint32_t *)(buffer));
215                         delete[] buffer;        
216                         
217                         //read header length
218                         buffer = new char(sizeof(header->headerLength));
219                         in.read(buffer, sizeof(header->headerLength));
220                         header->headerLength =  be_int2(*(uint16_t *)(buffer));
221                         delete[] buffer;
222                         
223                         //read key length
224                         buffer = new char(sizeof(header->keyLength));
225                         in.read(buffer, sizeof(header->keyLength));
226                         header->keyLength = be_int2(*(uint16_t *)(buffer));
227                         delete[] buffer;
228
229                         //read number of flow reads
230                         buffer = new char(sizeof(header->numFlowsPerRead));
231                         in.read(buffer, sizeof(header->numFlowsPerRead));
232                         header->numFlowsPerRead =  be_int2(*(uint16_t *)(buffer));
233                         delete[] buffer;
234                         
235                         //read format code
236                         buffer = new char(sizeof(header->flogramFormatCode));
237                         in.read(buffer, sizeof(header->flogramFormatCode));
238                         header->flogramFormatCode = be_int1(*(uint8_t *)(buffer));
239                         delete[] buffer;
240                         
241                         //read flow chars
242                         header->flowChars = new char(header->numFlowsPerRead);
243                         in.read(header->flowChars, header->numFlowsPerRead);
244                         tempBuf = buffer;
245                         if (tempBuf.length() > header->numFlowsPerRead) { tempBuf = tempBuf.substr(0, header->numFlowsPerRead); strcpy(header->flowChars, tempBuf.c_str());  }
246                         
247                         //read key
248                         header->keySequence = new char(header->keyLength);
249                         in.read(header->keySequence, header->keyLength);
250                         tempBuf = header->keySequence;
251                         if (tempBuf.length() > header->keyLength) { tempBuf = tempBuf.substr(0, header->keyLength); strcpy(header->keySequence, tempBuf.c_str());  }
252                         
253                 }else{
254                         m->mothurOut("Error reading sff common header."); m->mothurOutEndLine();
255                 }
256
257                 return header;
258         }
259         catch(exception& e) {
260                 m->errorOut(e, "SffInfoCommand", "readCommonHeader");
261                 exit(1);
262         }
263 }
264 //**********************************************************************************************************************
265 Header* SffInfoCommand::readHeader(ifstream& in){
266         try {
267                 Header* header = new Header();
268         
269                 if (!in.eof()) {
270                         string tempBuf = "";
271                         
272                         //read header length
273                         char* buffer = new char(sizeof(header->headerLength));
274                         in.read(buffer, sizeof(header->headerLength));
275                         header->headerLength = be_int2(*(unsigned short *)(buffer));
276                         delete[] buffer;
277                                                 
278                         //read name length
279                         buffer = new char(sizeof(header->nameLength));
280                         in.read(buffer, sizeof(header->nameLength));
281                         header->nameLength = be_int2(*(unsigned short *)(buffer));
282                         delete[] buffer;
283
284                         //read num bases
285                         buffer = new char(sizeof(header->numBases));
286                         in.read(buffer, sizeof(header->numBases));
287                         header->numBases =  be_int4(*(unsigned int *)(buffer));
288                         delete[] buffer;
289                         
290                         //read clip qual left
291                         buffer = new char(sizeof(header->clipQualLeft));
292                         in.read(buffer, sizeof(header->clipQualLeft));
293                         header->clipQualLeft =  be_int2(*(unsigned short *)(buffer));
294                         delete[] buffer;
295                         
296                         //read clip qual right
297                         buffer = new char(sizeof(header->clipQualRight));
298                         in.read(buffer, sizeof(header->clipQualRight));
299                         header->clipQualRight =  be_int2(*(unsigned short *)(buffer));
300                         delete[] buffer;
301                         
302                         //read clipAdapterLeft
303                         buffer = new char(sizeof(header->clipAdapterLeft));
304                         in.read(buffer, sizeof(header->clipAdapterLeft));
305                         header->clipAdapterLeft = be_int2(*(unsigned short *)(buffer));
306                         delete[] buffer;
307
308                         //read clipAdapterRight
309                         buffer = new char(sizeof(header->clipAdapterRight));
310                         in.read(buffer, sizeof(header->clipAdapterRight));
311                         header->clipAdapterRight =  be_int2(*(unsigned short *)(buffer));
312                         delete[] buffer;
313                         
314                         //read name
315                         header->name = new char(header->nameLength);
316                         in.read(header->name, header->nameLength);
317                         tempBuf = header->name;
318                         if (tempBuf.length() > header->nameLength) { tempBuf = tempBuf.substr(0, header->nameLength); strcpy(header->name, tempBuf.c_str());  }
319         
320                 }else{
321                         m->mothurOut("Error reading sff header info."); m->mothurOutEndLine();
322                 }
323
324                 return header;
325         }
326         catch(exception& e) {
327                 m->errorOut(e, "SffInfoCommand", "readHeader");
328                 exit(1);
329         }
330 }
331 //**********************************************************************************************************************
332 seqRead* SffInfoCommand::readSeqData(ifstream& in, int numFlowReads, int numBases){
333         try {
334                 seqRead* read = new seqRead();
335         
336                 if (!in.eof()) {
337                                                 
338                         string tempBuf = "";
339                         char* buffer;
340                         
341                         //read flowgram
342                         read->flowgram.resize(numFlowReads);
343                         for (int i = 0; i < numFlowReads; i++) {  
344                                 buffer = new char((sizeof(unsigned short)));
345                                 in.read(buffer, (sizeof(unsigned short)));
346                                 read->flowgram[i] = be_int2(*(unsigned short *)(buffer));
347                                 delete[] buffer;
348                         }
349                         
350                         //read flowgram
351                         read->flowIndex.resize(numBases);
352                         for (int i = 0; i < numBases; i++) {  
353                                 buffer = new char(1);
354                                 in.read(buffer, 1);
355                                 read->flowgram[i] = be_int1(*(unsigned int *)(buffer));
356                                 delete[] buffer;
357                         }
358                         
359                         //read bases
360                         read->bases = new char(numBases);
361                         in.read(read->bases, numBases);
362                         tempBuf = buffer;
363                         if (tempBuf.length() > numBases) { tempBuf = tempBuf.substr(0, numBases); strcpy(read->bases, tempBuf.c_str());  }
364                         
365
366                         //read flowgram
367                         read->qualScores.resize(numBases);
368                         for (int i = 0; i < numBases; i++) {  
369                                 buffer = new char(1);
370                                 in.read(buffer, 1);
371                                 read->qualScores[i] = be_int1(*(unsigned int *)(buffer));
372                                 delete[] buffer;
373                         }
374                         
375                         /* Pad to 8 chars */
376                         int spotInFile = in.tellg();
377         cout << spotInFile << endl;
378                         int spot = floor((spotInFile + 7) /(float) 8) * 8;
379         cout << spot << endl;
380                         in.seekg(spot);
381                         
382                 }else{
383                         m->mothurOut("Error reading."); m->mothurOutEndLine();
384                 }
385
386                 return read;
387         }
388         catch(exception& e) {
389                 m->errorOut(e, "SffInfoCommand", "readSeqData");
390                 exit(1);
391         }
392 }
393 //**********************************************************************************************************************
394 int SffInfoCommand::printCommonHeader(ofstream& out, CommonHeader* header, bool debug) {
395         try {
396         
397                 string output = "Common Header:\nMagic Number: ";
398                 output += toString(header->magicNumber) + '\n';
399                 output += "Version: " + toString(header->version) + '\n';
400                 output += "Index Offset: " + toString(header->indexOffset) + '\n';
401                 output += "Index Length: " + toString(header->indexLength) + '\n';      
402                 output += "Number of Reads: " + toString(header->numReads) + '\n';
403                 output += "Header Length: " + toString(header->headerLength) + '\n';
404                 output += "Key Length: " + toString(header->keyLength) + '\n';
405                 output += "Number of Flows: " + toString(header->numFlowsPerRead) + '\n';
406                 output += "Format Code: " + toString(header->flogramFormatCode) + '\n';
407                 output += "Flow Chars: " + toString(header->flowChars) + '\n';
408                 output += "Key Sequence: " + toString(header->keySequence) + '\n';
409                 
410                 out << output << endl;
411                 
412                 if (debug) { cout << output << endl; }
413                         
414                 return 0;
415         }
416         catch(exception& e) {
417                 m->errorOut(e, "SffInfoCommand", "printCommonHeader");
418                 exit(1);
419         }
420 }
421 //**********************************************************************************************************************
422 int SffInfoCommand::printHeader(ofstream& out, Header* header, bool debug) {
423         try {
424                 string name = header->name;
425                 string output = ">" + name + "\nRead Header Length: " + toString(header->headerLength) + '\n';
426                 output += "Name Length: " + toString(header->nameLength) + '\n';
427                 output += "Number of Bases: " + toString(header->numBases) + '\n';
428                 output += "Clip Qual Left: " + toString(header->clipQualLeft) + '\n';
429                 output += "Clip Qual Right: " + toString(header->clipQualLeft) + '\n';
430                 output += "Clip Adap Left: " + toString(header->clipQualLeft) + '\n';
431                 output += "Clip Adap Right: " + toString(header->clipQualLeft) + '\n';
432                 
433                 out << output << endl;
434                 
435                 if (debug) { cout << output << endl; }
436
437                 return 0;
438         }
439         catch(exception& e) {
440                 m->errorOut(e, "SffInfoCommand", "printHeader");
441                 exit(1);
442         }
443 }
444
445 //**********************************************************************************************************************
446 int SffInfoCommand::printSeqData(ofstream& out, seqRead* read, bool debug) {
447         try {
448
449                 string output = "FlowGram: ";
450                 for (int i = 0; i < read->flowgram.size(); i++) {  output += toString(read->flowgram[i]) +'\t';  }
451                 output += "\nFlow Indexes: ";
452                 for (int i = 0; i < read->flowIndex.size(); i++) {  output += toString(read->flowIndex[i]) +'\t';  }
453                 string bases = read->bases;
454                 output += "\nBases: " + bases + '\n';
455                 for (int i = 0; i < read->qualScores.size(); i++) {  output += toString(read->qualScores[i]) +'\t';  }
456                 output += '\n';
457                 
458                 out << output << endl;
459                 
460                 if (debug) { cout << output << endl; }
461                 
462                 return 0;
463         }
464         catch(exception& e) {
465                 m->errorOut(e, "SffInfoCommand", "printSeqData");
466                 exit(1);
467         }
468 }
469 //**********************************************************************************************************************/