]> git.donarmstrong.com Git - mothur.git/blob - sffinfocommand.cpp
working on sffinfo command.
[mothur.git] / sffinfocommand.cpp
1 /*
2  *  sffinfocommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 7/7/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "sffinfocommand.h"
11 #include "endiannessmacros.h"
12
13 //**********************************************************************************************************************
14
15 SffInfoCommand::SffInfoCommand(string option)  {
16         try {
17                 abort = false;
18                 
19                 //allow user to run help
20                 if(option == "help") { help(); abort = true; }
21                 
22                 else {
23                         //valid paramters for this command
24                         string Array[] =  {"sff","outputdir","inputdir", "outputdir"};
25                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
26                         
27                         OptionParser parser(option);
28                         map<string, string> parameters = parser.getParameters();
29                         
30                         ValidParameters validParameter;
31                         //check to make sure all parameters are valid for command
32                         for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
33                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
34                         }
35                         
36                         //if the user changes the output directory command factory will send this info to us in the output parameter 
37                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
38                         
39                         //if the user changes the input directory command factory will send this info to us in the output parameter 
40                         string inputDir = validParameter.validFile(parameters, "inputdir", false);        if (inputDir == "not found"){ inputDir = "";          }
41
42                         sffFilename = validParameter.validFile(parameters, "sff", false);
43                         if (sffFilename == "not found") { m->mothurOut("sff is a required parameter for the sffinfo command."); m->mothurOutEndLine(); abort = true;  }
44                         else { 
45                                 splitAtDash(sffFilename, filenames);
46                                 
47                                 //go through files and make sure they are good, if not, then disregard them
48                                 for (int i = 0; i < filenames.size(); i++) {
49                                         if (inputDir != "") {
50                                                 string path = hasPath(filenames[i]);
51                                                 //if the user has not given a path then, add inputdir. else leave path alone.
52                                                 if (path == "") {       filenames[i] = inputDir + filenames[i];         }
53                                         }
54         
55                                         ifstream in;
56                                         int ableToOpen = openInputFile(filenames[i], in);
57                                         in.close();
58                                         
59                                         if (ableToOpen == 1) { 
60                                                 m->mothurOut(filenames[i] + " will be disregarded."); m->mothurOutEndLine(); 
61                                                 //erase from file list
62                                                 filenames.erase(filenames.begin()+i);
63                                                 i--;
64                                         }
65                                 }
66                                 
67                                 //make sure there is at least one valid file left
68                                 if (filenames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; }
69                         }
70                 }
71         }
72         catch(exception& e) {
73                 m->errorOut(e, "SffInfoCommand", "SffInfoCommand");
74                 exit(1);
75         }
76 }
77 //**********************************************************************************************************************
78
79 void SffInfoCommand::help(){
80         try {
81                 m->mothurOut("The sffinfo command reads a sff file and outputs a .sff.txt file.\n");
82                 
83                 m->mothurOut("Example sffinfo(sff=...).\n");
84                 m->mothurOut("Note: No spaces between parameter labels (i.e. sff), '=' and parameters (i.e.yourSffFileName).\n\n");
85         }
86         catch(exception& e) {
87                 m->errorOut(e, "SffInfoCommand", "help");
88                 exit(1);
89         }
90 }
91 //**********************************************************************************************************************
92
93 SffInfoCommand::~SffInfoCommand(){}
94
95 //**********************************************************************************************************************
96 int SffInfoCommand::execute(){
97         try {
98                 
99                 if (abort == true) { return 0; }
100                 
101                 for (int s = 0; s < filenames.size(); s++) {
102                         
103                         if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) {       remove(outputNames[i].c_str());         } return 0; }
104                         
105                         m->mothurOut("Extracting info from " + filenames[s] + " ..." ); m->mothurOutEndLine();
106                         
107                         if (outputDir == "") {  outputDir += hasPath(filenames[s]); }
108                         string outputFileName = outputDir + getRootName(getSimpleName(filenames[s])) + "sff.txt";
109                                                 
110                         extractSffInfo(filenames[s], outputFileName);
111                         
112                         outputNames.push_back(outputFileName);
113
114                         m->mothurOut("Done."); m->mothurOutEndLine();
115                 }
116                 
117                 if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) {       remove(outputNames[i].c_str());         } return 0; }
118                 
119                 //report output filenames
120                 m->mothurOutEndLine();
121                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
122                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
123                 m->mothurOutEndLine();
124
125                 return 0;
126         }
127         catch(exception& e) {
128                 m->errorOut(e, "SffInfoCommand", "execute");
129                 exit(1);
130         }
131 }
132 //**********************************************************************************************************************
133 int SffInfoCommand::extractSffInfo(string input, string output){
134         try {
135                 ofstream out;
136                 openOutputFile(output, out);
137                 
138                 ifstream in;
139                 in.open(input.c_str(), ios::binary);
140                 
141                 CommonHeader* header = new CommonHeader(); 
142                 readCommonHeader(in, header);
143                 
144                 cout << strlen(header->flowChars) << endl;      
145                 //cout << "magic = " << header->magicNumber << endl << "version = " << header->version << endl << "index offset = " << header->indexOffset << endl << "index length = "<< header->indexLength << endl << "numreads = " << header->numReads << endl << "header length = " << header->headerLength << endl << "key length = " << header->keyLength << endl;
146 //cout << "numflowreads = "<< header->numFlowsPerRead << endl << "flow format code = "<< header->flogramFormatCode << endl << "flow chars = " << header->flowChars << endl << "key sequence = " << header->keySequence << endl << endl;         
147                 cout << in.tellg() << endl;
148                 //read through the sff file
149                 while (!in.eof()) {
150                         //print common header
151                         printCommonHeader(out, header, true);
152
153                         //read header
154                         Header* readheader = new Header();
155                         readHeader(in, readheader);
156                         
157                         cout << in.tellg() << endl;
158                         
159                         //print header
160                         printHeader(out, readheader, true);
161                         
162                         //read data
163                         seqRead* read = new seqRead(); 
164                         readSeqData(in, read, header->numFlowsPerRead, readheader->numBases);
165                         
166                         cout << in.tellg() << endl;
167                         
168                         //print data
169                         printSeqData(out, read, true);
170         
171                 }
172                 
173                 
174                 in.close();
175                 out.close();
176                 
177                 return 0;
178         }
179         catch(exception& e) {
180                 m->errorOut(e, "SffInfoCommand", "extractSffInfo");
181                 exit(1);
182         }
183 }
184 //**********************************************************************************************************************
185 int SffInfoCommand::readCommonHeader(ifstream& in, CommonHeader*& header){
186         try {
187
188                 if (!in.eof()) {
189                 
190                         //read magic number
191                         char* buffer = new char(sizeof(header->magicNumber));
192                         in.read(buffer, sizeof(header->magicNumber));
193                         header->magicNumber = be_int4(*(unsigned int *)(buffer));
194                         delete[] buffer;
195         //cout << "here " << header->magicNumber << '\t' << in.tellg() << endl;                 
196                         //read version
197                         header->version = new char(4);
198                         in.read(header->version, 4);
199                         string tempBuf0 = header->version;
200                         if (tempBuf0.length() > 4) { tempBuf0 = tempBuf0.substr(0, 4);  strcpy(header->version, tempBuf0.c_str());  }
201                         //memcpy(header->version, buffer+4, 4);
202         //cout << "here " << header->version  << '\t' << in.tellg() << endl;    
203                         //read offset
204                         buffer = new char(sizeof(header->indexOffset));
205                         in.read(buffer, sizeof(header->indexOffset));
206                         header->indexOffset =  be_int8(*(unsigned long int *)(buffer));
207                         delete[] buffer;
208         //cout << "here " << header->indexOffset  << '\t' << in.tellg() << endl;                
209                         //read index length
210                         buffer = new char(sizeof(header->indexLength));
211                         in.read(buffer, sizeof(header->indexLength));
212                         header->indexLength =  be_int4(*(unsigned int *)(buffer));
213                         delete[] buffer;
214         //cout << "here " << header->indexLength << '\t' << in.tellg() << endl;                 
215                         //read num reads
216                         buffer = new char(sizeof(header->numReads));
217                         in.read(buffer, sizeof(header->numReads));
218                         header->numReads =  be_int4(*(unsigned int *)(buffer));
219                         delete[] buffer;        
220         //cout << "here " << header->numReads << '\t' << in.tellg() << endl;                    
221                         //read header length
222                         buffer = new char(sizeof(header->headerLength));
223                         in.read(buffer, sizeof(header->headerLength));
224                         header->headerLength =  be_int2(*(unsigned short *)(buffer));
225                         delete[] buffer;
226         //cout << "here " << header->headerLength << '\t' << in.tellg() << endl;                        
227                         //read key length
228                         buffer = new char(sizeof(header->keyLength));
229                         in.read(buffer, sizeof(header->keyLength));
230                         header->keyLength = be_int2(*(unsigned short *)(buffer));
231                         delete[] buffer;
232         
233 //cout << "here " << header->keyLength << '\t' << in.tellg() << endl;
234                         //read number of flow reads
235                         buffer = new char(sizeof(header->numFlowsPerRead));
236                         in.read(buffer, sizeof(header->numFlowsPerRead));
237                         header->numFlowsPerRead =  be_int2(*(unsigned short *)(buffer));
238                         delete[] buffer;
239                 //cout << "here " << header->numFlowsPerRead << '\t' << in.tellg() << endl;             
240                         //read format code
241                         buffer = new char(sizeof(header->flogramFormatCode));
242                         in.read(buffer, sizeof(header->flogramFormatCode));
243                         header->flogramFormatCode = be_int1(*(char *)(buffer));
244                         delete[] buffer;
245         //cout << "here " << header->flogramFormatCode << '\t' << in.tellg() << endl;   
246         
247                         //read flow chars
248                         //header->numFlowsPerRead = 800;
249                         header->flowChars = new char(header->numFlowsPerRead);
250                         buffer = new char(header->numFlowsPerRead);
251         //cout << "here" << endl;
252                         //in.read(header->flowChars, header->numFlowsPerRead); 
253                         in.read(buffer, header->numFlowsPerRead); 
254                         memcpy(header->flowChars, buffer, header->numFlowsPerRead);
255                         delete[] buffer;
256         //cout << "here" << endl;
257                         //string tempBuf1 = header->flowChars;
258         //cout << "here " << in.tellg() << endl;
259                         //if (tempBuf1.length() > header->numFlowsPerRead) { tempBuf1 = tempBuf1.substr(0, header->numFlowsPerRead);  strcpy(header->flowChars, tempBuf1.c_str()); }
260                         
261         //      cout << "here " << header->flowChars << '\t' << in.tellg() << endl;     
262                         //read key
263                         //header->keyLength = 4;
264         //char* myAlloc2 = new char(4); cout << "alloced" << endl;
265                         header->keySequence = new char(header->keyLength);
266                         //char* myAlloc = new char(4);
267         //      cout << "here " << endl;                
268                         in.read(header->keySequence, header->keyLength);
269                         string tempBuf2 = header->keySequence;
270                         if (tempBuf2.length() > header->keyLength) { tempBuf2 = tempBuf2.substr(0, header->keyLength);  strcpy(header->keySequence, tempBuf2.c_str()); }
271         //cout << "here " << header->keySequence << '\t' << in.tellg() << endl;
272         
273                         /* Pad to 8 chars */
274                         int spotInFile = in.tellg();
275         //cout << spotInFile << endl;
276                         int spot = floor((spotInFile + 7) /(float) 8) * 8;
277         //cout << spot << endl;
278                         in.seekg(spot);
279         
280         //exit(1);      
281
282                 }else{
283                         m->mothurOut("Error reading sff common header."); m->mothurOutEndLine();
284                 }
285
286                 return 0;
287         }
288         catch(exception& e) {
289                 m->errorOut(e, "SffInfoCommand", "readCommonHeader");
290                 exit(1);
291         }
292 }
293 //**********************************************************************************************************************
294 int SffInfoCommand::readHeader(ifstream& in, Header*& header){
295         try {
296         
297                 if (!in.eof()) {
298                         string tempBuf = "";
299                         
300                         //read header length
301                         char* buffer = new char(sizeof(header->headerLength));
302                         in.read(buffer, sizeof(header->headerLength));
303                         header->headerLength = be_int2(*(unsigned short *)(buffer));
304                         delete[] buffer;
305                                                 
306                         //read name length
307                         buffer = new char(sizeof(header->nameLength));
308                         in.read(buffer, sizeof(header->nameLength));
309                         header->nameLength = be_int2(*(unsigned short *)(buffer));
310                         delete[] buffer;
311
312                         //read num bases
313                         buffer = new char(sizeof(header->numBases));
314                         in.read(buffer, sizeof(header->numBases));
315                         header->numBases =  be_int4(*(unsigned int *)(buffer));
316                         delete[] buffer;
317                         
318                         //read clip qual left
319                         buffer = new char(sizeof(header->clipQualLeft));
320                         in.read(buffer, sizeof(header->clipQualLeft));
321                         header->clipQualLeft =  be_int2(*(unsigned short *)(buffer));
322                         delete[] buffer;
323                         
324                         //read clip qual right
325                         buffer = new char(sizeof(header->clipQualRight));
326                         in.read(buffer, sizeof(header->clipQualRight));
327                         header->clipQualRight =  be_int2(*(unsigned short *)(buffer));
328                         delete[] buffer;
329                         
330                         //read clipAdapterLeft
331                         buffer = new char(sizeof(header->clipAdapterLeft));
332                         in.read(buffer, sizeof(header->clipAdapterLeft));
333                         header->clipAdapterLeft = be_int2(*(unsigned short *)(buffer));
334                         delete[] buffer;
335
336                         //read clipAdapterRight
337                         buffer = new char(sizeof(header->clipAdapterRight));
338                         in.read(buffer, sizeof(header->clipAdapterRight));
339                         header->clipAdapterRight =  be_int2(*(unsigned short *)(buffer));
340                         delete[] buffer;
341                 
342                         //read name
343                         header->name = new char(header->nameLength);
344                         //buffer = new char(header->nameLength);
345                         in.read(header->name, header->nameLength);
346                         //memcpy(header->name, buffer, header->nameLength);
347                         //delete[] buffer;
348
349                 }else{
350                         m->mothurOut("Error reading sff header info."); m->mothurOutEndLine();
351                 }
352
353                 return 0;
354         }
355         catch(exception& e) {
356                 m->errorOut(e, "SffInfoCommand", "readHeader");
357                 exit(1);
358         }
359 }
360 //**********************************************************************************************************************
361 int SffInfoCommand::readSeqData(ifstream& in, seqRead*& read, int numFlowReads, int numBases){
362         try {
363         
364                 if (!in.eof()) {
365                                                 
366                         string tempBuf = "";
367                         char* buffer;
368                         
369                         //read flowgram
370                         read->flowgram.resize(numFlowReads);
371                         for (int i = 0; i < numFlowReads; i++) {  
372                                 buffer = new char((sizeof(unsigned short)));
373                                 in.read(buffer, (sizeof(unsigned short)));
374                                 read->flowgram[i] = be_int2(*(unsigned short *)(buffer));
375                                 delete[] buffer;
376                         }
377                         
378                         //read flowgram
379                         read->flowIndex.resize(numBases);
380                         for (int i = 0; i < numBases; i++) {  
381                                 buffer = new char(1);
382                                 in.read(buffer, 1);
383                                 read->flowgram[i] = be_int1(*(unsigned int *)(buffer));
384                                 delete[] buffer;
385                         }
386                         
387                         //read bases
388                         read->bases = new char(numBases);
389                         in.read(read->bases, numBases);
390                         tempBuf = buffer;
391                         if (tempBuf.length() > numBases) { tempBuf = tempBuf.substr(0, numBases); strcpy(read->bases, tempBuf.c_str());  }
392                         
393
394                         //read flowgram
395                         read->qualScores.resize(numBases);
396                         for (int i = 0; i < numBases; i++) {  
397                                 buffer = new char(1);
398                                 in.read(buffer, 1);
399                                 read->qualScores[i] = be_int1(*(unsigned int *)(buffer));
400                                 delete[] buffer;
401                         }
402                         
403                         /* Pad to 8 chars */
404                         int spotInFile = in.tellg();
405         cout << spotInFile << endl;
406                         int spot = floor((spotInFile + 7) /(float) 8) * 8;
407         cout << spot << endl;
408                         in.seekg(spot);
409                         
410                 }else{
411                         m->mothurOut("Error reading."); m->mothurOutEndLine();
412                 }
413
414                 return 0;
415         }
416         catch(exception& e) {
417                 m->errorOut(e, "SffInfoCommand", "readSeqData");
418                 exit(1);
419         }
420 }
421 //**********************************************************************************************************************
422 int SffInfoCommand::printCommonHeader(ofstream& out, CommonHeader* header, bool debug) {
423         try {
424         
425                 string output = "Common Header:\nMagic Number: ";
426                 output += toString(header->magicNumber) + '\n';
427                 output += "Version: " + toString(header->version) + '\n';
428                 output += "Index Offset: " + toString(header->indexOffset) + '\n';
429                 output += "Index Length: " + toString(header->indexLength) + '\n';      
430                 output += "Number of Reads: " + toString(header->numReads) + '\n';
431                 output += "Header Length: " + toString(header->headerLength) + '\n';
432                 output += "Key Length: " + toString(header->keyLength) + '\n';
433                 output += "Number of Flows: " + toString(header->numFlowsPerRead) + '\n';
434                 output += "Format Code: " + toString(header->flogramFormatCode) + '\n';
435                 output += "Flow Chars: " + toString(header->flowChars) + '\n';
436                 output += "Key Sequence: " + toString(header->keySequence) + '\n';
437                 
438                 out << output << endl;
439                 
440                 if (debug) { cout << output << endl; }
441                         
442                 return 0;
443         }
444         catch(exception& e) {
445                 m->errorOut(e, "SffInfoCommand", "printCommonHeader");
446                 exit(1);
447         }
448 }
449 //**********************************************************************************************************************
450 int SffInfoCommand::printHeader(ofstream& out, Header* header, bool debug) {
451         try {
452                 string name = header->name;
453                 string output = ">" + name + "\nRead Header Length: " + toString(header->headerLength) + '\n';
454                 output += "Name Length: " + toString(header->nameLength) + '\n';
455                 output += "Number of Bases: " + toString(header->numBases) + '\n';
456                 output += "Clip Qual Left: " + toString(header->clipQualLeft) + '\n';
457                 output += "Clip Qual Right: " + toString(header->clipQualLeft) + '\n';
458                 output += "Clip Adap Left: " + toString(header->clipQualLeft) + '\n';
459                 output += "Clip Adap Right: " + toString(header->clipQualLeft) + '\n';
460                 
461                 out << output << endl;
462                 
463                 if (debug) { cout << output << endl; }
464
465                 return 0;
466         }
467         catch(exception& e) {
468                 m->errorOut(e, "SffInfoCommand", "printHeader");
469                 exit(1);
470         }
471 }
472
473 //**********************************************************************************************************************
474 int SffInfoCommand::printSeqData(ofstream& out, seqRead* read, bool debug) {
475         try {
476
477                 string output = "FlowGram: ";
478                 for (int i = 0; i < read->flowgram.size(); i++) {  output += toString(read->flowgram[i]) +'\t';  }
479                 output += "\nFlow Indexes: ";
480                 for (int i = 0; i < read->flowIndex.size(); i++) {  output += toString(read->flowIndex[i]) +'\t';  }
481                 string bases = read->bases;
482                 output += "\nBases: " + bases + '\n';
483                 for (int i = 0; i < read->qualScores.size(); i++) {  output += toString(read->qualScores[i]) +'\t';  }
484                 output += '\n';
485                 
486                 out << output << endl;
487                 
488                 if (debug) { cout << output << endl; }
489                 
490                 return 0;
491         }
492         catch(exception& e) {
493                 m->errorOut(e, "SffInfoCommand", "printSeqData");
494                 exit(1);
495         }
496 }
497 //**********************************************************************************************************************/