]> git.donarmstrong.com Git - mothur.git/blob - sffinfocommand.cpp
working on sffinfo command.
[mothur.git] / sffinfocommand.cpp
1 /*
2  *  sffinfocommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 7/7/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "sffinfocommand.h"
11 #include "endiannessmacros.h"
12
13 //**********************************************************************************************************************
14
15 SffInfoCommand::SffInfoCommand(string option)  {
16         try {
17                 abort = false;
18                 
19                 //allow user to run help
20                 if(option == "help") { help(); abort = true; }
21                 
22                 else {
23                         //valid paramters for this command
24                         string Array[] =  {"sff","outputdir","inputdir", "outputdir"};
25                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
26                         
27                         OptionParser parser(option);
28                         map<string, string> parameters = parser.getParameters();
29                         
30                         ValidParameters validParameter;
31                         //check to make sure all parameters are valid for command
32                         for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
33                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
34                         }
35                         
36                         //if the user changes the output directory command factory will send this info to us in the output parameter 
37                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
38                         
39                         //if the user changes the input directory command factory will send this info to us in the output parameter 
40                         string inputDir = validParameter.validFile(parameters, "inputdir", false);        if (inputDir == "not found"){ inputDir = "";          }
41
42                         sffFilename = validParameter.validFile(parameters, "sff", false);
43                         if (sffFilename == "not found") { m->mothurOut("sff is a required parameter for the sffinfo command."); m->mothurOutEndLine(); abort = true;  }
44                         else { 
45                                 splitAtDash(sffFilename, filenames);
46                                 
47                                 //go through files and make sure they are good, if not, then disregard them
48                                 for (int i = 0; i < filenames.size(); i++) {
49                                         if (inputDir != "") {
50                                                 string path = hasPath(filenames[i]);
51                                                 //if the user has not given a path then, add inputdir. else leave path alone.
52                                                 if (path == "") {       filenames[i] = inputDir + filenames[i];         }
53                                         }
54         
55                                         ifstream in;
56                                         int ableToOpen = openInputFile(filenames[i], in);
57                                         in.close();
58                                         
59                                         if (ableToOpen == 1) { 
60                                                 m->mothurOut(filenames[i] + " will be disregarded."); m->mothurOutEndLine(); 
61                                                 //erase from file list
62                                                 filenames.erase(filenames.begin()+i);
63                                                 i--;
64                                         }
65                                 }
66                                 
67                                 //make sure there is at least one valid file left
68                                 if (filenames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; }
69                         }
70                 }
71         }
72         catch(exception& e) {
73                 m->errorOut(e, "SffInfoCommand", "SffInfoCommand");
74                 exit(1);
75         }
76 }
77 //**********************************************************************************************************************
78
79 void SffInfoCommand::help(){
80         try {
81                 m->mothurOut("The sffinfo command reads a sff file and outputs a .sff.txt file.\n");
82                 
83                 m->mothurOut("Example sffinfo(sff=...).\n");
84                 m->mothurOut("Note: No spaces between parameter labels (i.e. sff), '=' and parameters (i.e.yourSffFileName).\n\n");
85         }
86         catch(exception& e) {
87                 m->errorOut(e, "SffInfoCommand", "help");
88                 exit(1);
89         }
90 }
91 //**********************************************************************************************************************
92
93 SffInfoCommand::~SffInfoCommand(){}
94
95 //**********************************************************************************************************************
96 int SffInfoCommand::execute(){
97         try {
98                 
99                 if (abort == true) { return 0; }
100                 
101                 for (int s = 0; s < filenames.size(); s++) {
102                         
103                         if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) {       remove(outputNames[i].c_str());         } return 0; }
104                         
105                         m->mothurOut("Extracting info from " + filenames[s] + " ..." ); m->mothurOutEndLine();
106                         
107                         if (outputDir == "") {  outputDir += hasPath(filenames[s]); }
108                         string outputFileName = outputDir + getRootName(getSimpleName(filenames[s])) + "sff.txt";
109                                                 
110                         extractSffInfo(filenames[s], outputFileName);
111                         
112                         outputNames.push_back(outputFileName);
113
114                         m->mothurOut("Done."); m->mothurOutEndLine();
115                 }
116                 
117                 if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) {       remove(outputNames[i].c_str());         } return 0; }
118                 
119                 //report output filenames
120                 m->mothurOutEndLine();
121                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
122                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
123                 m->mothurOutEndLine();
124
125                 return 0;
126         }
127         catch(exception& e) {
128                 m->errorOut(e, "SffInfoCommand", "execute");
129                 exit(1);
130         }
131 }
132 //**********************************************************************************************************************
133 int SffInfoCommand::extractSffInfo(string input, string output){
134         try {
135                 ofstream out;
136                 openOutputFile(output, out);
137                 
138                 ifstream in;
139                 in.open(input.c_str(), ios::binary);
140                 
141                 CommonHeader* header = new CommonHeader(); 
142                 readCommonHeader(in, header);
143                 
144                 cout << strlen(header->flowChars) << endl;      
145                 //cout << "magic = " << header->magicNumber << endl << "version = " << header->version << endl << "index offset = " << header->indexOffset << endl << "index length = "<< header->indexLength << endl << "numreads = " << header->numReads << endl << "header length = " << header->headerLength << endl << "key length = " << header->keyLength << endl;
146 //cout << "numflowreads = "<< header->numFlowsPerRead << endl << "flow format code = "<< header->flogramFormatCode << endl << "flow chars = " << header->flowChars << endl << "key sequence = " << header->keySequence << endl << endl;         
147                 cout << in.tellg() << endl;
148                 //read through the sff file
149                 while (!in.eof()) {
150                         //print common header
151                         printCommonHeader(out, header, true);
152
153                         //read header
154                         Header* readheader = new Header();
155                         readHeader(in, readheader);
156                         
157                         cout << in.tellg() << endl;
158                         
159                         //print header
160                         printHeader(out, readheader, true);
161                         
162                         //read data
163                         seqRead* read = new seqRead(); 
164                         readSeqData(in, read, header->numFlowsPerRead, readheader->numBases);
165                         
166                         cout << in.tellg() << endl;
167                         
168                         //print data
169                         printSeqData(out, read, true);
170                         
171                         delete readheader;
172                         delete read;
173                 }
174                 
175                 in.close();
176                 out.close();
177                 
178                 return 0;
179         }
180         catch(exception& e) {
181                 m->errorOut(e, "SffInfoCommand", "extractSffInfo");
182                 exit(1);
183         }
184 }
185 //**********************************************************************************************************************
186 int SffInfoCommand::readCommonHeader(ifstream& in, CommonHeader*& header){
187         try {
188
189                 if (!in.eof()) {
190                 
191                         //read magic number
192                         char* buffer = new char(sizeof(header->magicNumber));
193                         in.read(buffer, sizeof(header->magicNumber));
194                         header->magicNumber = be_int4(*(unsigned int *)(buffer));
195                         delete[] buffer;
196         //cout << "here " << header->magicNumber << '\t' << in.tellg() << endl;                 
197                         //read version
198                         header->version = new char(4);
199                         in.read(header->version, 4);
200                         string tempBuf0 = header->version;  //this is in here because the read sometimes tacks on extra chars, not sure why?
201                         if (tempBuf0.length() > 4) { tempBuf0 = tempBuf0.substr(0, 4);  strcpy(header->version, tempBuf0.c_str());  }
202                         //memcpy(header->version, buffer+4, 4);
203         //cout << "here " << header->version  << '\t' << in.tellg() << endl;    
204                         //read offset
205                         buffer = new char(sizeof(header->indexOffset));
206                         in.read(buffer, sizeof(header->indexOffset));
207                         header->indexOffset =  be_int8(*(unsigned long int *)(buffer));
208                         delete[] buffer;
209         //cout << "here " << header->indexOffset  << '\t' << in.tellg() << endl;                
210                         //read index length
211                         buffer = new char(sizeof(header->indexLength));
212                         in.read(buffer, sizeof(header->indexLength));
213                         header->indexLength =  be_int4(*(unsigned int *)(buffer));
214                         delete[] buffer;
215         //cout << "here " << header->indexLength << '\t' << in.tellg() << endl;                 
216                         //read num reads
217                         buffer = new char(sizeof(header->numReads));
218                         in.read(buffer, sizeof(header->numReads));
219                         header->numReads =  be_int4(*(unsigned int *)(buffer));
220                         delete[] buffer;        
221         //cout << "here " << header->numReads << '\t' << in.tellg() << endl;                    
222                         //read header length
223                         buffer = new char(sizeof(header->headerLength));
224                         in.read(buffer, sizeof(header->headerLength));
225                         header->headerLength =  be_int2(*(unsigned short *)(buffer));
226                         delete[] buffer;
227         //cout << "here " << header->headerLength << '\t' << in.tellg() << endl;                        
228                         //read key length
229                         buffer = new char(sizeof(header->keyLength));
230                         in.read(buffer, sizeof(header->keyLength));
231                         header->keyLength = be_int2(*(unsigned short *)(buffer));
232                         delete[] buffer;
233         
234 //cout << "here " << header->keyLength << '\t' << in.tellg() << endl;
235                         //read number of flow reads
236                         buffer = new char(sizeof(header->numFlowsPerRead));
237                         in.read(buffer, sizeof(header->numFlowsPerRead));
238                         header->numFlowsPerRead =  be_int2(*(unsigned short *)(buffer));
239                         delete[] buffer;
240                 //cout << "here " << header->numFlowsPerRead << '\t' << in.tellg() << endl;             
241                         //read format code
242                         //buffer = new char(sizeof(header->flogramFormatCode));
243                         in.read(&header->flogramFormatCode, 1);
244                         header->flogramFormatCode = be_int1(*(char *)(&header->flogramFormatCode));
245                         //delete[] buffer;
246         //cout << "here " << header->flogramFormatCode << '\t' << in.tellg() << endl;   
247         
248                         //read flow chars
249                         //header->numFlowsPerRead = 800;
250                         header->flowChars = new char(header->numFlowsPerRead);
251                         buffer = new char(header->numFlowsPerRead);
252         //cout << "here" << endl;
253                         //in.read(header->flowChars, header->numFlowsPerRead); 
254                         //string tempBuf1 = header->flowChars + '\0';  //this is in here because the read sometimes tacks on extra chars, not sure why?
255                         //if (tempBuf0.length() > header->numFlowsPerRead) { tempBuf1 = tempBuf1.substr(0, header->numFlowsPerRead);  strcpy(header->flowChars, tempBuf1.c_str());  }
256                         in.read(buffer, header->numFlowsPerRead); 
257                         memcpy(header->flowChars, buffer, header->numFlowsPerRead);
258                         delete[] buffer;
259         //cout << "here" << endl;
260                         //string tempBuf1 = header->flowChars;
261         //cout << "here " << in.tellg() << endl;
262                         //if (tempBuf1.length() > header->numFlowsPerRead) { tempBuf1 = tempBuf1.substr(0, header->numFlowsPerRead);  strcpy(header->flowChars, tempBuf1.c_str()); }
263                         
264                 //cout << "here " << header->flowChars << '\t' << in.tellg() << endl;   
265                         //read key
266                         //header->keyLength = 4;
267         //char* myAlloc2 = new char(4); cout << "alloced" << endl;
268                         header->keySequence = new char(header->keyLength);
269                         //char* myAlloc = new char(4);
270         //      cout << "here " << endl;                
271                         in.read(header->keySequence, header->keyLength);
272                         string tempBuf2 = header->keySequence;
273                         if (tempBuf2.length() > header->keyLength) { tempBuf2 = tempBuf2.substr(0, header->keyLength);  strcpy(header->keySequence, tempBuf2.c_str()); }
274         //cout << "here " << header->keySequence << '\t' << in.tellg() << endl;
275         
276                         /* Pad to 8 chars */
277                         int spotInFile = in.tellg();
278         //cout << spotInFile << endl;
279                         int spot = floor((spotInFile + 7) /(float) 8) * 8;
280         //cout << spot << endl;
281                         in.seekg(spot);
282         
283         //exit(1);      
284
285                 }else{
286                         m->mothurOut("Error reading sff common header."); m->mothurOutEndLine();
287                 }
288
289                 return 0;
290         }
291         catch(exception& e) {
292                 m->errorOut(e, "SffInfoCommand", "readCommonHeader");
293                 exit(1);
294         }
295 }
296 //**********************************************************************************************************************
297 int SffInfoCommand::readHeader(ifstream& in, Header*& header){
298         try {
299         
300                 if (!in.eof()) {
301                         string tempBuf = "";
302                         
303                         //read header length
304                         char* buffer = new char(sizeof(header->headerLength));
305                         in.read(buffer, sizeof(header->headerLength));
306                         header->headerLength = be_int2(*(unsigned short *)(buffer));
307                         delete[] buffer;
308                                                 
309                         //read name length
310                         buffer = new char(sizeof(header->nameLength));
311                         in.read(buffer, sizeof(header->nameLength));
312                         header->nameLength = be_int2(*(unsigned short *)(buffer));
313                         delete[] buffer;
314
315                         //read num bases
316                         buffer = new char(sizeof(header->numBases));
317                         in.read(buffer, sizeof(header->numBases));
318                         header->numBases =  be_int4(*(unsigned int *)(buffer));
319                         delete[] buffer;
320                         
321                         //read clip qual left
322                         buffer = new char(sizeof(header->clipQualLeft));
323                         in.read(buffer, sizeof(header->clipQualLeft));
324                         header->clipQualLeft =  be_int2(*(unsigned short *)(buffer));
325                         delete[] buffer;
326                         
327                         //read clip qual right
328                         buffer = new char(sizeof(header->clipQualRight));
329                         in.read(buffer, sizeof(header->clipQualRight));
330                         header->clipQualRight =  be_int2(*(unsigned short *)(buffer));
331                         delete[] buffer;
332                         
333                         //read clipAdapterLeft
334                         buffer = new char(sizeof(header->clipAdapterLeft));
335                         in.read(buffer, sizeof(header->clipAdapterLeft));
336                         header->clipAdapterLeft = be_int2(*(unsigned short *)(buffer));
337                         delete[] buffer;
338
339                         //read clipAdapterRight
340                         buffer = new char(sizeof(header->clipAdapterRight));
341                         in.read(buffer, sizeof(header->clipAdapterRight));
342                         header->clipAdapterRight =  be_int2(*(unsigned short *)(buffer));
343                         delete[] buffer;
344                 
345                         //read name
346                         header->name = new char(header->nameLength);
347                         //buffer = new char(header->nameLength);
348                         in.read(header->name, header->nameLength);
349                         //memcpy(header->name, buffer, header->nameLength);
350                         //delete[] buffer;
351
352                 }else{
353                         m->mothurOut("Error reading sff header info."); m->mothurOutEndLine();
354                 }
355
356                 return 0;
357         }
358         catch(exception& e) {
359                 m->errorOut(e, "SffInfoCommand", "readHeader");
360                 exit(1);
361         }
362 }
363 //**********************************************************************************************************************
364 int SffInfoCommand::readSeqData(ifstream& in, seqRead*& read, int numFlowReads, int numBases){
365         try {
366         
367                 if (!in.eof()) {
368                                                 
369                         string tempBuf = "";
370                         char* buffer;
371                         
372                         //read flowgram
373                         read->flowgram.resize(numFlowReads);
374                         for (int i = 0; i < numFlowReads; i++) {  
375                                 buffer = new char((sizeof(unsigned short)));
376                                 in.read(buffer, (sizeof(unsigned short)));
377                                 read->flowgram[i] = be_int2(*(unsigned short *)(buffer));
378                                 delete[] buffer;
379                         }
380         
381                         //read flowIndex
382                         read->flowIndex.resize(numBases);
383                         for (int i = 0; i < numBases; i++) {  
384                                 buffer = new char(1);
385                                 in.read(buffer, 1);
386                                 read->flowgram[i] = be_int1(*(unsigned int *)(buffer));
387                                 delete[] buffer;
388                         }
389                 
390                         //read bases
391                         read->bases = new char(numBases);
392                         in.read(read->bases, numBases);
393                         tempBuf = buffer;
394                         if (tempBuf.length() > numBases) { tempBuf = tempBuf.substr(0, numBases); strcpy(read->bases, tempBuf.c_str());  }
395
396                         //read flowgram
397                         read->qualScores.resize(numBases);
398                         for (int i = 0; i < numBases; i++) {  
399                                 char temp;
400                                 in.read(&temp, 1);
401                                 read->qualScores[i] = be_int1(*(unsigned int *)(&temp));
402                         }
403                 
404                         /* Pad to 8 chars */
405                         int spotInFile = in.tellg();
406         cout << spotInFile << endl;
407                         int spot = floor((spotInFile + 7) /(float) 8) * 8;
408         cout << spot << endl;
409                         in.seekg(spot);
410                         
411                 }else{
412                         m->mothurOut("Error reading."); m->mothurOutEndLine();
413                 }
414
415                 return 0;
416         }
417         catch(exception& e) {
418                 m->errorOut(e, "SffInfoCommand", "readSeqData");
419                 exit(1);
420         }
421 }
422 //**********************************************************************************************************************
423 int SffInfoCommand::printCommonHeader(ofstream& out, CommonHeader* header, bool debug) {
424         try {
425         
426                 string output = "Common Header:\nMagic Number: ";
427                 output += toString(header->magicNumber) + '\n';
428                 output += "Version: " + toString(header->version) + '\n';
429                 output += "Index Offset: " + toString(header->indexOffset) + '\n';
430                 output += "Index Length: " + toString(header->indexLength) + '\n';      
431                 output += "Number of Reads: " + toString(header->numReads) + '\n';
432                 output += "Header Length: " + toString(header->headerLength) + '\n';
433                 output += "Key Length: " + toString(header->keyLength) + '\n';
434                 output += "Number of Flows: " + toString(header->numFlowsPerRead) + '\n';
435                 output += "Format Code: " + toString(header->flogramFormatCode) + '\n';
436                 output += "Flow Chars: " + toString(header->flowChars) + '\n';
437                 output += "Key Sequence: " + toString(header->keySequence) + '\n';
438                 
439                 out << output << endl;
440                 
441                 if (debug) { cout << output << endl; }
442                         
443                 return 0;
444         }
445         catch(exception& e) {
446                 m->errorOut(e, "SffInfoCommand", "printCommonHeader");
447                 exit(1);
448         }
449 }
450 //**********************************************************************************************************************
451 int SffInfoCommand::printHeader(ofstream& out, Header* header, bool debug) {
452         try {
453                 string name = header->name;
454                 string output = ">" + name + "\nRead Header Length: " + toString(header->headerLength) + '\n';
455                 output += "Name Length: " + toString(header->nameLength) + '\n';
456                 output += "Number of Bases: " + toString(header->numBases) + '\n';
457                 output += "Clip Qual Left: " + toString(header->clipQualLeft) + '\n';
458                 output += "Clip Qual Right: " + toString(header->clipQualLeft) + '\n';
459                 output += "Clip Adap Left: " + toString(header->clipQualLeft) + '\n';
460                 output += "Clip Adap Right: " + toString(header->clipQualLeft) + '\n';
461                 
462                 out << output << endl;
463                 
464                 if (debug) { cout << output << endl; }
465
466                 return 0;
467         }
468         catch(exception& e) {
469                 m->errorOut(e, "SffInfoCommand", "printHeader");
470                 exit(1);
471         }
472 }
473
474 //**********************************************************************************************************************
475 int SffInfoCommand::printSeqData(ofstream& out, seqRead* read, bool debug) {
476         try {
477
478                 string output = "FlowGram: ";
479                 for (int i = 0; i < read->flowgram.size(); i++) {  output += toString(read->flowgram[i]) +'\t';  }
480                 output += "\nFlow Indexes: ";
481                 for (int i = 0; i < read->flowIndex.size(); i++) {  output += toString(read->flowIndex[i]) +'\t';  }
482                 string bases = read->bases;
483                 output += "\nBases: " + bases + '\n';
484                 for (int i = 0; i < read->qualScores.size(); i++) {  output += toString(read->qualScores[i]) +'\t';  }
485                 output += '\n';
486                 
487                 out << output << endl;
488                 
489                 if (debug) { cout << output << endl; }
490                 
491                 return 0;
492         }
493         catch(exception& e) {
494                 m->errorOut(e, "SffInfoCommand", "printSeqData");
495                 exit(1);
496         }
497 }
498 //**********************************************************************************************************************/