]> git.donarmstrong.com Git - mothur.git/blob - sffinfocommand.cpp
added [ERROR] flag if command aborts
[mothur.git] / sffinfocommand.cpp
1 /*
2  *  sffinfocommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 7/7/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "sffinfocommand.h"
11 #include "endiannessmacros.h"
12
13 //**********************************************************************************************************************
14 vector<string> SffInfoCommand::getValidParameters(){    
15         try {
16                 string Array[] =  {"sff","qfile","fasta","flow","trim","accnos","sfftxt","outputdir","inputdir", "outputdir"};
17                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
18                 return myArray;
19         }
20         catch(exception& e) {
21                 m->errorOut(e, "SffInfoCommand", "getValidParameters");
22                 exit(1);
23         }
24 }
25 //**********************************************************************************************************************
26 SffInfoCommand::SffInfoCommand(){       
27         try {
28                 abort = true; calledHelp = true; 
29                 vector<string> tempOutNames;
30                 outputTypes["fasta"] = tempOutNames;
31                 outputTypes["flow"] = tempOutNames;
32                 outputTypes["sfftxt"] = tempOutNames;
33                 outputTypes["qual"] = tempOutNames;
34         }
35         catch(exception& e) {
36                 m->errorOut(e, "SffInfoCommand", "SffInfoCommand");
37                 exit(1);
38         }
39 }
40 //**********************************************************************************************************************
41 vector<string> SffInfoCommand::getRequiredParameters(){ 
42         try {
43                 string Array[] =  {"sff", "sfftxt", "or"};
44                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
45                 return myArray;
46         }
47         catch(exception& e) {
48                 m->errorOut(e, "SffInfoCommand", "getRequiredParameters");
49                 exit(1);
50         }
51 }
52 //**********************************************************************************************************************
53 vector<string> SffInfoCommand::getRequiredFiles(){      
54         try {
55                 vector<string> myArray;
56                 return myArray;
57         }
58         catch(exception& e) {
59                 m->errorOut(e, "SffInfoCommand", "getRequiredFiles");
60                 exit(1);
61         }
62 }
63 //**********************************************************************************************************************
64
65 SffInfoCommand::SffInfoCommand(string option)  {
66         try {
67                 abort = false; calledHelp = false;   
68                 hasAccnos = false;
69                 
70                 //allow user to run help
71                 if(option == "help") { help(); abort = true; calledHelp = true; }
72                 
73                 else {
74                         //valid paramters for this command
75                         string Array[] =  {"sff","qfile","fasta","flow","trim","accnos","sfftxt","outputdir","inputdir", "outputdir"};
76                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
77                         
78                         OptionParser parser(option);
79                         map<string, string> parameters = parser.getParameters();
80                         
81                         ValidParameters validParameter;
82                         //check to make sure all parameters are valid for command
83                         for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
84                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
85                         }
86                         
87                         //initialize outputTypes
88                         vector<string> tempOutNames;
89                         outputTypes["fasta"] = tempOutNames;
90                         outputTypes["flow"] = tempOutNames;
91                         outputTypes["sfftxt"] = tempOutNames;
92                         outputTypes["qual"] = tempOutNames;
93                         
94                         //if the user changes the output directory command factory will send this info to us in the output parameter 
95                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
96                         
97                         //if the user changes the input directory command factory will send this info to us in the output parameter 
98                         string inputDir = validParameter.validFile(parameters, "inputdir", false);        if (inputDir == "not found"){ inputDir = "";          }
99
100                         sffFilename = validParameter.validFile(parameters, "sff", false);
101                         if (sffFilename == "not found") { sffFilename = "";  }
102                         else { 
103                                 m->splitAtDash(sffFilename, filenames);
104                                 
105                                 //go through files and make sure they are good, if not, then disregard them
106                                 for (int i = 0; i < filenames.size(); i++) {
107                                         if (inputDir != "") {
108                                                 string path = m->hasPath(filenames[i]);
109                                                 //if the user has not given a path then, add inputdir. else leave path alone.
110                                                 if (path == "") {       filenames[i] = inputDir + filenames[i];         }
111                                         }
112         
113                                         ifstream in;
114                                         int ableToOpen = m->openInputFile(filenames[i], in, "noerror");
115                                 
116                                         //if you can't open it, try default location
117                                         if (ableToOpen == 1) {
118                                                 if (m->getDefaultPath() != "") { //default path is set
119                                                         string tryPath = m->getDefaultPath() + m->getSimpleName(filenames[i]);
120                                                         m->mothurOut("Unable to open " + filenames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
121                                                         ifstream in2;
122                                                         ableToOpen = m->openInputFile(tryPath, in2, "noerror");
123                                                         in2.close();
124                                                         filenames[i] = tryPath;
125                                                 }
126                                         }
127                                         
128                                         //if you can't open it, try default location
129                                         if (ableToOpen == 1) {
130                                                 if (m->getOutputDir() != "") { //default path is set
131                                                         string tryPath = m->getOutputDir() + m->getSimpleName(filenames[i]);
132                                                         m->mothurOut("Unable to open " + filenames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
133                                                         ifstream in2;
134                                                         ableToOpen = m->openInputFile(tryPath, in2, "noerror");
135                                                         in2.close();
136                                                         filenames[i] = tryPath;
137                                                 }
138                                         }
139                                         
140                                         in.close();
141                                         
142                                         if (ableToOpen == 1) { 
143                                                 m->mothurOut("Unable to open " + filenames[i] + ". It will be disregarded."); m->mothurOutEndLine();
144                                                 //erase from file list
145                                                 filenames.erase(filenames.begin()+i);
146                                                 i--;
147                                         }
148                                 }
149                                 
150                                 //make sure there is at least one valid file left
151                                 if (filenames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; }
152                         }
153                         
154                         accnosName = validParameter.validFile(parameters, "accnos", false);
155                         if (accnosName == "not found") { accnosName = "";  }
156                         else { 
157                                 hasAccnos = true;
158                                 m->splitAtDash(accnosName, accnosFileNames);
159                                 
160                                 //go through files and make sure they are good, if not, then disregard them
161                                 for (int i = 0; i < accnosFileNames.size(); i++) {
162                                         if (inputDir != "") {
163                                                 string path = m->hasPath(accnosFileNames[i]);
164                                                 //if the user has not given a path then, add inputdir. else leave path alone.
165                                                 if (path == "") {       accnosFileNames[i] = inputDir + accnosFileNames[i];             }
166                                         }
167         
168                                         ifstream in;
169                                         int ableToOpen = m->openInputFile(accnosFileNames[i], in, "noerror");
170                                 
171                                         //if you can't open it, try default location
172                                         if (ableToOpen == 1) {
173                                                 if (m->getDefaultPath() != "") { //default path is set
174                                                         string tryPath = m->getDefaultPath() + m->getSimpleName(accnosFileNames[i]);
175                                                         m->mothurOut("Unable to open " + accnosFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
176                                                         ifstream in2;
177                                                         ableToOpen = m->openInputFile(tryPath, in2, "noerror");
178                                                         in2.close();
179                                                         accnosFileNames[i] = tryPath;
180                                                 }
181                                         }
182                                         //if you can't open it, try default location
183                                         if (ableToOpen == 1) {
184                                                 if (m->getOutputDir() != "") { //default path is set
185                                                         string tryPath = m->getOutputDir() + m->getSimpleName(accnosFileNames[i]);
186                                                         m->mothurOut("Unable to open " + accnosFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
187                                                         ifstream in2;
188                                                         ableToOpen = m->openInputFile(tryPath, in2, "noerror");
189                                                         in2.close();
190                                                         accnosFileNames[i] = tryPath;
191                                                 }
192                                         }
193                                         in.close();
194                                         
195                                         if (ableToOpen == 1) { 
196                                                 m->mothurOut("Unable to open " + accnosFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();
197                                                 //erase from file list
198                                                 accnosFileNames.erase(accnosFileNames.begin()+i);
199                                                 i--;
200                                         }
201                                 }
202                                 
203                                 //make sure there is at least one valid file left
204                                 if (accnosFileNames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; }
205                         }
206                         
207                         if (hasAccnos) {
208                                 if (accnosFileNames.size() != filenames.size()) { abort = true; m->mothurOut("If you provide a accnos file, you must have one for each sff file."); m->mothurOutEndLine(); }
209                         }
210                         
211                         string temp = validParameter.validFile(parameters, "qfile", false);                     if (temp == "not found"){       temp = "T";                             }
212                         qual = m->isTrue(temp); 
213                         
214                         temp = validParameter.validFile(parameters, "fasta", false);                            if (temp == "not found"){       temp = "T";                             }
215                         fasta = m->isTrue(temp); 
216                         
217                         temp = validParameter.validFile(parameters, "flow", false);                                     if (temp == "not found"){       temp = "F";                             }
218                         flow = m->isTrue(temp); 
219                         
220                         temp = validParameter.validFile(parameters, "trim", false);                                     if (temp == "not found"){       temp = "T";                             }
221                         trim = m->isTrue(temp); 
222                         
223                         temp = validParameter.validFile(parameters, "sfftxt", false);                           
224                         if (temp == "not found")        {       temp = "F";      sfftxt = false; sfftxtFilename = "";           }
225                         else if (m->isTrue(temp))       {       sfftxt = true;          sfftxtFilename = "";                            }
226                         else {
227                                 //you are a filename
228                                 if (inputDir != "") {
229                                         map<string,string>::iterator it = parameters.find("sfftxt");
230                                         //user has given a template file
231                                         if(it != parameters.end()){ 
232                                                 string path = m->hasPath(it->second);
233                                                 //if the user has not given a path then, add inputdir. else leave path alone.
234                                                 if (path == "") {       parameters["sfftxt"] = inputDir + it->second;           }
235                                         }
236                                 }
237                                 
238                                 sfftxtFilename = validParameter.validFile(parameters, "sfftxt", true);
239                                 if (sfftxtFilename == "not found") { sfftxtFilename = "";  }
240                                 else if (sfftxtFilename == "not open") { sfftxtFilename = "";  }
241                         }
242                         
243                         if ((sfftxtFilename == "") && (filenames.size() == 0)) {  m->mothurOut("[ERROR]: you must provide a valid sff or sfftxt file."); m->mothurOutEndLine(); abort=true; }
244                 }
245         }
246         catch(exception& e) {
247                 m->errorOut(e, "SffInfoCommand", "SffInfoCommand");
248                 exit(1);
249         }
250 }
251 //**********************************************************************************************************************
252
253 void SffInfoCommand::help(){
254         try {
255                 m->mothurOut("The sffinfo command reads a sff file and extracts the sequence data, or you can use it to parse a sfftxt file..\n");
256                 m->mothurOut("The sffinfo command parameters are sff, fasta, qfile, accnos, flow, sfftxt, and trim. sff is required. \n");
257                 m->mothurOut("The sff parameter allows you to enter the sff file you would like to extract data from.  You may enter multiple files by separating them by -'s.\n");
258                 m->mothurOut("The fasta parameter allows you to indicate if you would like a fasta formatted file generated.  Default=True. \n");
259                 m->mothurOut("The qfile parameter allows you to indicate if you would like a quality file generated.  Default=True. \n");
260                 m->mothurOut("The flow parameter allows you to indicate if you would like a flowgram file generated.  Default=False. \n");
261                 m->mothurOut("The sfftxt parameter allows you to indicate if you would like a sff.txt file generated.  Default=False. \n");
262                 m->mothurOut("If you want to parse an existing sfftxt file into flow, fasta and quality file, enter the file name using the sfftxt parameter. \n");
263                 m->mothurOut("The trim parameter allows you to indicate if you would like a sequences and quality scores trimmed to the clipQualLeft and clipQualRight values.  Default=True. \n");
264                 m->mothurOut("The accnos parameter allows you to provide a accnos file containing the names of the sequences you would like extracted. You may enter multiple files by separating them by -'s. \n");
265                 m->mothurOut("Example sffinfo(sff=mySffFile.sff, trim=F).\n");
266                 m->mothurOut("Note: No spaces between parameter labels (i.e. sff), '=' and parameters (i.e.yourSffFileName).\n\n");
267         }
268         catch(exception& e) {
269                 m->errorOut(e, "SffInfoCommand", "help");
270                 exit(1);
271         }
272 }
273 //**********************************************************************************************************************
274
275 SffInfoCommand::~SffInfoCommand(){}
276
277 //**********************************************************************************************************************
278 int SffInfoCommand::execute(){
279         try {
280                 
281                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
282                 
283                 for (int s = 0; s < filenames.size(); s++) {
284                         
285                         if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) {       remove(outputNames[i].c_str());         } return 0; }
286                         
287                         int start = time(NULL);
288                         
289                         m->mothurOut("Extracting info from " + filenames[s] + " ..." ); m->mothurOutEndLine();
290                         
291                         string accnos = "";
292                         if (hasAccnos) { accnos = accnosFileNames[s]; }
293                         
294                         int numReads = extractSffInfo(filenames[s], accnos);
295
296                         m->mothurOut("It took " + toString(time(NULL) - start) + " secs to extract " + toString(numReads) + ".");
297                 }
298                 
299                 if (sfftxtFilename != "") {  parseSffTxt(); }
300                 
301                 if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) {       remove(outputNames[i].c_str());         } return 0; }
302                 
303                 //report output filenames
304                 m->mothurOutEndLine();
305                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
306                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
307                 m->mothurOutEndLine();
308
309                 return 0;
310         }
311         catch(exception& e) {
312                 m->errorOut(e, "SffInfoCommand", "execute");
313                 exit(1);
314         }
315 }
316 //**********************************************************************************************************************
317 int SffInfoCommand::extractSffInfo(string input, string accnos){
318         try {
319                 
320                 if (outputDir == "") {  outputDir += m->hasPath(input); }
321                 
322                 if (accnos != "")       {  readAccnosFile(accnos);  }
323                 else                            {       seqNames.clear();               }
324
325                 ofstream outSfftxt, outFasta, outQual, outFlow;
326                 string outFastaFileName, outQualFileName;
327                 string sfftxtFileName = outputDir + m->getRootName(m->getSimpleName(input)) + "sff.txt";
328                 string outFlowFileName = outputDir + m->getRootName(m->getSimpleName(input)) + "flow";
329                 if (trim) {
330                         outFastaFileName = outputDir + m->getRootName(m->getSimpleName(input)) + "fasta";
331                         outQualFileName = outputDir + m->getRootName(m->getSimpleName(input)) + "qual";
332                 }else{
333                         outFastaFileName = outputDir + m->getRootName(m->getSimpleName(input)) + "raw.fasta";
334                         outQualFileName = outputDir + m->getRootName(m->getSimpleName(input)) + "raw.qual";
335                 }
336                 
337                 if (sfftxt) { m->openOutputFile(sfftxtFileName, outSfftxt); outSfftxt.setf(ios::fixed, ios::floatfield); outSfftxt.setf(ios::showpoint);  outputNames.push_back(sfftxtFileName);  outputTypes["sfftxt"].push_back(sfftxtFileName); }
338                 if (fasta)      { m->openOutputFile(outFastaFileName, outFasta);        outputNames.push_back(outFastaFileName); outputTypes["fasta"].push_back(outFastaFileName); }
339                 if (qual)       { m->openOutputFile(outQualFileName, outQual);          outputNames.push_back(outQualFileName); outputTypes["qual"].push_back(outQualFileName);  }
340                 if (flow)       { m->openOutputFile(outFlowFileName, outFlow);          outputNames.push_back(outFlowFileName);  outFlow.setf(ios::fixed, ios::floatfield); outFlow.setf(ios::showpoint); outputTypes["flow"].push_back(outFlowFileName);  }
341                 
342                 ifstream in;
343                 in.open(input.c_str(), ios::binary);
344                 
345                 CommonHeader header; 
346                 readCommonHeader(in, header);
347                 
348                 int count = 0;
349                 
350                 //check magic number and version
351                 if (header.magicNumber != 779314790) { m->mothurOut("Magic Number is not correct, not a valid .sff file"); m->mothurOutEndLine(); return count; }
352                 if (header.version != "0001") { m->mothurOut("Version is not supported, only support version 0001."); m->mothurOutEndLine(); return count; }
353         
354                 //print common header
355                 if (sfftxt) {   printCommonHeader(outSfftxt, header);           }
356                 if (flow)       {       outFlow << header.numFlowsPerRead << endl;      }
357                         
358                 //read through the sff file
359                 while (!in.eof()) {
360                         
361                         bool print = true;
362                         
363                         //read header
364                         Header readheader;
365                         readHeader(in, readheader);
366                         
367                         //read data
368                         seqRead read; 
369                         readSeqData(in, read, header.numFlowsPerRead, readheader.numBases);
370                                 
371                         //if you have provided an accosfile and this seq is not in it, then dont print
372                         if (seqNames.size() != 0) {   if (seqNames.count(readheader.name) == 0) { print = false; }  }
373                         
374                         //print 
375                         if (print) {
376                                 if (sfftxt) { printHeader(outSfftxt, readheader); printSffTxtSeqData(outSfftxt, read, readheader); }
377                                 if (fasta)      {       printFastaSeqData(outFasta, read, readheader);  }
378                                 if (qual)       {       printQualSeqData(outQual, read, readheader);    }
379                                 if (flow)       {       printFlowSeqData(outFlow, read, readheader);    }
380                         }
381                         
382                         count++;
383                 
384                         //report progress
385                         if((count+1) % 10000 == 0){     m->mothurOut(toString(count+1)); m->mothurOutEndLine();         }
386                 
387                         if (m->control_pressed) { count = 0; break;   }
388                         
389                         if (count >= header.numReads) { break; }
390                 }
391                 
392                 //report progress
393                 if (!m->control_pressed) {   if((count) % 10000 != 0){  m->mothurOut(toString(count)); m->mothurOutEndLine();           }  }
394                 
395                 in.close();
396                 
397                 if (sfftxt) {  outSfftxt.close();       }
398                 if (fasta)      {  outFasta.close();    }
399                 if (qual)       {  outQual.close();             }
400                 if (flow)       {  outFlow.close();             }
401                 
402                 return count;
403         }
404         catch(exception& e) {
405                 m->errorOut(e, "SffInfoCommand", "extractSffInfo");
406                 exit(1);
407         }
408 }
409 //**********************************************************************************************************************
410 int SffInfoCommand::readCommonHeader(ifstream& in, CommonHeader& header){
411         try {
412
413                 if (!in.eof()) {
414
415                         //read magic number
416                         char buffer[4];
417                         in.read(buffer, 4);
418                         header.magicNumber = be_int4(*(unsigned int *)(&buffer));
419                 
420                         //read version
421                         char buffer9[4];
422                         in.read(buffer9, 4);
423                         header.version = "";
424                         for (int i = 0; i < 4; i++) {  header.version += toString((int)(buffer9[i])); }
425                                 
426                         //read offset
427                         char buffer2 [8];
428                         in.read(buffer2, 8);
429                         header.indexOffset =  be_int8(*(unsigned long int *)(&buffer2));
430                         
431                         //read index length
432                         char buffer3 [4];
433                         in.read(buffer3, 4);
434                         header.indexLength =  be_int4(*(unsigned int *)(&buffer3));
435                         
436                         //read num reads
437                         char buffer4 [4];
438                         in.read(buffer4, 4);
439                         header.numReads =  be_int4(*(unsigned int *)(&buffer4));
440                                 
441                         //read header length
442                         char buffer5 [2];
443                         in.read(buffer5, 2);
444                         header.headerLength =  be_int2(*(unsigned short *)(&buffer5));
445                                         
446                         //read key length
447                         char buffer6 [2];
448                         in.read(buffer6, 2);
449                         header.keyLength = be_int2(*(unsigned short *)(&buffer6));
450                         
451                         //read number of flow reads
452                         char buffer7 [2];
453                         in.read(buffer7, 2);
454                         header.numFlowsPerRead =  be_int2(*(unsigned short *)(&buffer7));
455                                 
456                         //read format code
457                         char buffer8 [1];
458                         in.read(buffer8, 1);
459                         header.flogramFormatCode = (int)(buffer8[0]);
460                         
461                         //read flow chars
462                         char* tempBuffer = new char[header.numFlowsPerRead];
463                         in.read(&(*tempBuffer), header.numFlowsPerRead); 
464                         header.flowChars = tempBuffer;
465                         if (header.flowChars.length() > header.numFlowsPerRead) { header.flowChars = header.flowChars.substr(0, header.numFlowsPerRead);  }
466                         delete[] tempBuffer;
467                         
468                         //read key
469                         char* tempBuffer2 = new char[header.keyLength];
470                         in.read(&(*tempBuffer2), header.keyLength);
471                         header.keySequence = tempBuffer2;
472                         if (header.keySequence.length() > header.keyLength) { header.keySequence = header.keySequence.substr(0, header.keyLength);  }
473                         delete[] tempBuffer2;
474                                 
475                         /* Pad to 8 chars */
476                         unsigned long int spotInFile = in.tellg();
477                         unsigned long int spot = (spotInFile + 7)& ~7;  // ~ inverts
478                         in.seekg(spot);
479                         
480                 }else{
481                         m->mothurOut("Error reading sff common header."); m->mothurOutEndLine();
482                 }
483
484                 return 0;
485         }
486         catch(exception& e) {
487                 m->errorOut(e, "SffInfoCommand", "readCommonHeader");
488                 exit(1);
489         }
490 }
491 //**********************************************************************************************************************
492 int SffInfoCommand::readHeader(ifstream& in, Header& header){
493         try {
494         
495                 if (!in.eof()) {
496                         
497                         //read header length
498                         char buffer [2];
499                         in.read(buffer, 2);
500                         header.headerLength = be_int2(*(unsigned short *)(&buffer));
501                                                 
502                         //read name length
503                         char buffer2 [2];
504                         in.read(buffer2, 2);
505                         header.nameLength = be_int2(*(unsigned short *)(&buffer2));
506
507                         //read num bases
508                         char buffer3 [4];
509                         in.read(buffer3, 4);
510                         header.numBases =  be_int4(*(unsigned int *)(&buffer3));
511                         
512                         //read clip qual left
513                         char buffer4 [2];
514                         in.read(buffer4, 2);
515                         header.clipQualLeft =  be_int2(*(unsigned short *)(&buffer4));
516                         header.clipQualLeft = 5; 
517                         
518                         //read clip qual right
519                         char buffer5 [2];
520                         in.read(buffer5, 2);
521                         header.clipQualRight =  be_int2(*(unsigned short *)(&buffer5));
522                         
523                         //read clipAdapterLeft
524                         char buffer6 [2];
525                         in.read(buffer6, 2);
526                         header.clipAdapterLeft = be_int2(*(unsigned short *)(&buffer6));
527
528                         //read clipAdapterRight
529                         char buffer7 [2];
530                         in.read(buffer7, 2);
531                         header.clipAdapterRight = be_int2(*(unsigned short *)(&buffer7));
532                 
533                         //read name
534                         char* tempBuffer = new char[header.nameLength];
535                         in.read(&(*tempBuffer), header.nameLength);
536                         header.name = tempBuffer;
537                         if (header.name.length() > header.nameLength) { header.name = header.name.substr(0, header.nameLength);  }
538                         delete[] tempBuffer;
539                         
540                         //extract info from name
541                         decodeName(header.timestamp, header.region, header.xy, header.name);
542                         
543                         /* Pad to 8 chars */
544                         unsigned long int spotInFile = in.tellg();
545                         unsigned long int spot = (spotInFile + 7)& ~7;
546                         in.seekg(spot);
547                         
548                 }else{
549                         m->mothurOut("Error reading sff header info."); m->mothurOutEndLine();
550                 }
551
552                 return 0;
553         }
554         catch(exception& e) {
555                 m->errorOut(e, "SffInfoCommand", "readHeader");
556                 exit(1);
557         }
558 }
559 //**********************************************************************************************************************
560 int SffInfoCommand::readSeqData(ifstream& in, seqRead& read, int numFlowReads, int numBases){
561         try {
562         
563                 if (!in.eof()) {
564         
565                         //read flowgram
566                         read.flowgram.resize(numFlowReads);
567                         for (int i = 0; i < numFlowReads; i++) {  
568                                 char buffer [2];
569                                 in.read(buffer, 2);
570                                 read.flowgram[i] = be_int2(*(unsigned short *)(&buffer));
571                         }
572         
573                         //read flowIndex
574                         read.flowIndex.resize(numBases);
575                         for (int i = 0; i < numBases; i++) {  
576                                 char temp[1];
577                                 in.read(temp, 1);
578                                 read.flowIndex[i] = be_int1(*(unsigned char *)(&temp));
579                         }
580         
581                         //read bases
582                         char* tempBuffer = new char[numBases];
583                         in.read(&(*tempBuffer), numBases);
584                         read.bases = tempBuffer;
585                         if (read.bases.length() > numBases) { read.bases = read.bases.substr(0, numBases);  }
586                         delete[] tempBuffer;
587
588                         //read qual scores
589                         read.qualScores.resize(numBases);
590                         for (int i = 0; i < numBases; i++) {  
591                                 char temp[1];
592                                 in.read(temp, 1);
593                                 read.qualScores[i] = be_int1(*(unsigned char *)(&temp));
594                         }
595         
596                         /* Pad to 8 chars */
597                         unsigned long int spotInFile = in.tellg();
598                         unsigned long int spot = (spotInFile + 7)& ~7;
599                         in.seekg(spot);
600                         
601                 }else{
602                         m->mothurOut("Error reading."); m->mothurOutEndLine();
603                 }
604
605                 return 0;
606         }
607         catch(exception& e) {
608                 m->errorOut(e, "SffInfoCommand", "readSeqData");
609                 exit(1);
610         }
611 }
612 //**********************************************************************************************************************
613 int SffInfoCommand::decodeName(string& timestamp, string& region, string& xy, string name) {
614         try {
615                 
616                 string time = name.substr(0, 6);
617                 unsigned int timeNum = m->fromBase36(time);
618                         
619                 int q1 = timeNum / 60;
620                 int sec = timeNum - 60 * q1;
621                 int q2 = q1 / 60;
622                 int minute = q1 - 60 * q2;
623                 int q3 = q2 / 24;
624                 int hr = q2 - 24 * q3;
625                 int q4 = q3 / 32;
626                 int day = q3 - 32 * q4;
627                 int q5 = q4 / 13;
628                 int mon = q4 - 13 * q5;
629                 int year = 2000 + q5;
630                 
631                 timestamp = toString(year) + "_" + toString(mon) + "_" + toString(day) + "_" + toString(hr) + "_" + toString(minute) + "_" + toString(sec);
632                 
633                 region = name.substr(7, 2);
634                 
635                 string xyNum = name.substr(9);
636                 unsigned int myXy = m->fromBase36(xyNum);
637                 int x = myXy >> 12;
638                 int y = myXy & 4095;
639                 
640                 xy = toString(x) + "_" + toString(y);
641                         
642                 return 0;
643         }
644         catch(exception& e) {
645                 m->errorOut(e, "SffInfoCommand", "decodeName");
646                 exit(1);
647         }
648 }
649 //**********************************************************************************************************************
650 int SffInfoCommand::printCommonHeader(ofstream& out, CommonHeader& header) {
651         try {
652         
653                 out << "Common Header:\nMagic Number: " << header.magicNumber << endl;
654                 out << "Version: " << header.version << endl;
655                 out << "Index Offset: " << header.indexOffset << endl;
656                 out << "Index Length: " << header.indexLength << endl;
657                 out << "Number of Reads: " << header.numReads << endl;
658                 out << "Header Length: " << header.headerLength << endl;
659                 out << "Key Length: " << header.keyLength << endl;
660                 out << "Number of Flows: " << header.numFlowsPerRead << endl;
661                 out << "Format Code: " << header.flogramFormatCode << endl;
662                 out << "Flow Chars: " << header.flowChars << endl;
663                 out << "Key Sequence: " << header.keySequence << endl << endl;
664                         
665                 return 0;
666         }
667         catch(exception& e) {
668                 m->errorOut(e, "SffInfoCommand", "printCommonHeader");
669                 exit(1);
670         }
671 }
672 //**********************************************************************************************************************
673 int SffInfoCommand::printHeader(ofstream& out, Header& header) {
674         try {
675                 
676                 out << ">" << header.name << endl;
677                 out << "Run Prefix: " << header.timestamp << endl;
678                 out << "Region #:  " << header.region << endl;
679                 out << "XY Location: " << header.xy << endl << endl;
680                 
681                 out << "Run Name:  " << endl;
682                 out << "Analysis Name:  " << endl;
683                 out << "Full Path: " << endl << endl;
684                 
685                 out << "Read Header Len: " << header.headerLength << endl;
686                 out << "Name Length: " << header.nameLength << endl;
687                 out << "# of Bases: " << header.numBases << endl;
688                 out << "Clip Qual Left: " << header.clipQualLeft << endl;
689                 out << "Clip Qual Right: " << header.clipQualRight << endl;
690                 out << "Clip Adap Left: " << header.clipAdapterLeft << endl;
691                 out << "Clip Adap Right: " << header.clipAdapterRight << endl << endl;
692                 
693                 return 0;
694         }
695         catch(exception& e) {
696                 m->errorOut(e, "SffInfoCommand", "printHeader");
697                 exit(1);
698         }
699 }
700
701 //**********************************************************************************************************************
702 int SffInfoCommand::printSffTxtSeqData(ofstream& out, seqRead& read, Header& header) {
703         try {
704                 
705                 out << "Flowgram: ";
706                 for (int i = 0; i < read.flowgram.size(); i++) { out << setprecision(2) << (read.flowgram[i]/(float)100) << '\t';  }
707                 
708                 out << endl <<  "Flow Indexes: ";
709                 int sum = 0;
710                 for (int i = 0; i < read.flowIndex.size(); i++) {  sum +=  read.flowIndex[i];  out << sum << '\t'; }
711                 
712                 //make the bases you want to clip lowercase and the bases you want to keep upper case
713                 if(header.clipQualRight == 0){  header.clipQualRight = read.bases.length();     }
714                 for (int i = 0; i < (header.clipQualLeft-1); i++) { read.bases[i] = tolower(read.bases[i]); }
715                 for (int i = (header.clipQualLeft-1); i < (header.clipQualRight-1); i++) {   read.bases[i] = toupper(read.bases[i]);  }
716                 for (int i = (header.clipQualRight-1); i < read.bases.length(); i++) {   read.bases[i] = tolower(read.bases[i]);  }
717                 
718                 out << endl <<  "Bases: " << read.bases << endl << "Quality Scores: ";
719                 for (int i = 0; i < read.qualScores.size(); i++) {   out << read.qualScores[i] << '\t';  }
720         
721                 
722                 out << endl << endl;
723                 
724                 return 0;
725         }
726         catch(exception& e) {
727                 m->errorOut(e, "SffInfoCommand", "printSffTxtSeqData");
728                 exit(1);
729         }
730 }
731 //**********************************************************************************************************************
732 int SffInfoCommand::printFastaSeqData(ofstream& out, seqRead& read, Header& header) {
733         try {
734                 
735                 string seq = read.bases;
736                 
737                 if (trim) {
738                         if(header.clipQualRight < header.clipQualLeft){
739                                 seq = "NNNN";
740                         }
741                         else if((header.clipQualRight != 0) && ((header.clipQualRight-header.clipQualLeft) >= 0)){
742                                 seq = seq.substr((header.clipQualLeft-1), (header.clipQualRight-header.clipQualLeft));
743                         }
744                         else {
745                                 seq = seq.substr(header.clipQualLeft-1);
746                         }
747                 }else{
748                         //if you wanted the sfftxt then you already converted the bases to the right case
749                         if (!sfftxt) {
750                                 //make the bases you want to clip lowercase and the bases you want to keep upper case
751                                 if(header.clipQualRight == 0){  header.clipQualRight = seq.length();    }
752                                 for (int i = 0; i < (header.clipQualLeft-1); i++) { seq[i] = tolower(seq[i]);  }
753                                 for (int i = (header.clipQualLeft-1); i < (header.clipQualRight-1); i++)  {   seq[i] = toupper(seq[i]);  }
754                                 for (int i = (header.clipQualRight-1); i < seq.length(); i++) {   seq[i] = tolower(seq[i]);  }
755                         }
756                 }
757                 
758                 out << ">" << header.name  << " xy=" << header.xy << endl;
759                 out << seq << endl;
760                 
761                 return 0;
762         }
763         catch(exception& e) {
764                 m->errorOut(e, "SffInfoCommand", "printFastaSeqData");
765                 exit(1);
766         }
767 }
768
769 //**********************************************************************************************************************
770 int SffInfoCommand::printQualSeqData(ofstream& out, seqRead& read, Header& header) {
771         try {
772                 
773                 if (trim) {
774                         if(header.clipQualRight < header.clipQualLeft){
775                                 out << "0\t0\t0\t0";
776                         }
777                         else if((header.clipQualRight != 0) && ((header.clipQualRight-header.clipQualLeft) >= 0)){
778                                 out << ">" << header.name << " xy=" << header.xy << " length=" << (header.clipQualRight-header.clipQualLeft) << endl;
779                                 for (int i = (header.clipQualLeft-1); i < (header.clipQualRight-1); i++) {   out << read.qualScores[i] << '\t'; }
780                         }
781                         else{
782                                 out << ">" << header.name << " xy=" << header.xy << " length=" << (header.clipQualRight-header.clipQualLeft) << endl;
783                                 for (int i = (header.clipQualLeft-1); i < read.qualScores.size(); i++) {   out << read.qualScores[i] << '\t';   }                       
784                         }
785                 }else{
786                         out << ">" << header.name << " xy=" << header.xy << " length=" << read.qualScores.size() << endl;
787                         for (int i = 0; i < read.qualScores.size(); i++) {   out << read.qualScores[i] << '\t';  }
788                 }
789                 
790                 out << endl;
791                 
792                 return 0;
793         }
794         catch(exception& e) {
795                 m->errorOut(e, "SffInfoCommand", "printQualSeqData");
796                 exit(1);
797         }
798 }
799
800 //**********************************************************************************************************************
801 int SffInfoCommand::printFlowSeqData(ofstream& out, seqRead& read, Header& header) {
802         try {
803                 if(header.clipQualRight > header.clipQualLeft){
804                         
805                         int rightIndex = 0;
806                         for (int i = 0; i < header.clipQualRight; i++) {  rightIndex +=  read.flowIndex[i];     }
807
808                         out << header.name << ' ' << rightIndex;
809                         for (int i = 0; i < read.flowgram.size(); i++) { out << setprecision(2) << ' ' << (read.flowgram[i]/(float)100);  }
810                         out << endl;
811                 }
812                 
813                 
814                 return 0;
815         }
816         catch(exception& e) {
817                 m->errorOut(e, "SffInfoCommand", "printFlowSeqData");
818                 exit(1);
819         }
820 }
821 //**********************************************************************************************************************
822 int SffInfoCommand::readAccnosFile(string filename) {
823         try {
824                 //remove old names
825                 seqNames.clear();
826                 
827                 ifstream in;
828                 m->openInputFile(filename, in);
829                 string name;
830                 
831                 while(!in.eof()){
832                         in >> name; m->gobble(in);
833                                                 
834                         seqNames.insert(name);
835                         
836                         if (m->control_pressed) { seqNames.clear(); break; }
837                 }
838                 in.close();             
839                 
840                 return 0;
841         }
842         catch(exception& e) {
843                 m->errorOut(e, "SffInfoCommand", "readAccnosFile");
844                 exit(1);
845         }
846 }
847 //**********************************************************************************************************************
848 int SffInfoCommand::parseSffTxt() {
849         try {
850                 
851                 ifstream inSFF;
852                 m->openInputFile(sfftxtFilename, inSFF);
853                 
854                 if (outputDir == "") {  outputDir += m->hasPath(sfftxtFilename); }
855                 
856                 //output file names
857                 ofstream outFasta, outQual, outFlow;
858                 string outFastaFileName, outQualFileName;
859                 string outFlowFileName = outputDir + m->getRootName(m->getSimpleName(sfftxtFilename)) + "flow";
860                 if (trim) {
861                         outFastaFileName = outputDir + m->getRootName(m->getSimpleName(sfftxtFilename)) + "fasta";
862                         outQualFileName = outputDir + m->getRootName(m->getSimpleName(sfftxtFilename)) + "qual";
863                 }else{
864                         outFastaFileName = outputDir + m->getRootName(m->getSimpleName(sfftxtFilename)) + "raw.fasta";
865                         outQualFileName = outputDir + m->getRootName(m->getSimpleName(sfftxtFilename)) + "raw.qual";
866                 }
867                 
868                 if (fasta)      { m->openOutputFile(outFastaFileName, outFasta);        outputNames.push_back(outFastaFileName); outputTypes["fasta"].push_back(outFastaFileName); }
869                 if (qual)       { m->openOutputFile(outQualFileName, outQual);          outputNames.push_back(outQualFileName); outputTypes["qual"].push_back(outQualFileName);  }
870                 if (flow)       { m->openOutputFile(outFlowFileName, outFlow);          outputNames.push_back(outFlowFileName);  outFlow.setf(ios::fixed, ios::floatfield); outFlow.setf(ios::showpoint); outputTypes["flow"].push_back(outFlowFileName);  }
871                 
872                 //read common header
873                 string commonHeader = m->getline(inSFF);
874                 string magicNumber = m->getline(inSFF); 
875                 string version = m->getline(inSFF);
876                 string indexOffset = m->getline(inSFF);
877                 string indexLength = m->getline(inSFF);
878                 int numReads = parseHeaderLineToInt(inSFF);
879                 string headerLength = m->getline(inSFF);
880                 string keyLength = m->getline(inSFF);
881                 int numFlows = parseHeaderLineToInt(inSFF);
882                 string flowgramCode = m->getline(inSFF);
883                 string flowChars = m->getline(inSFF);
884                 string keySequence = m->getline(inSFF);
885                 m->gobble(inSFF);
886                 
887                 string seqName;
888                 
889                 if (flow)       {       outFlow << numFlows << endl;    }
890                 
891                 for(int i=0;i<numReads;i++){
892                         
893                         //sanity check
894                         if (inSFF.eof()) { m->mothurOut("[ERROR]: Expected " + toString(numReads) + " but reached end of file at " + toString(i+1) + "."); m->mothurOutEndLine(); break; }
895                         
896                         Header header;
897                         
898                         //parse read header
899                         inSFF >> seqName;
900                         seqName = seqName.substr(1);
901                         m->gobble(inSFF);
902                         header.name = seqName;
903                         
904                         string runPrefix = parseHeaderLineToString(inSFF);              header.timestamp = runPrefix;
905                         string regionNumber = parseHeaderLineToString(inSFF);   header.region = regionNumber;
906                         string xyLocation = parseHeaderLineToString(inSFF);             header.xy = xyLocation;
907                         m->gobble(inSFF);
908                                 
909                         string runName = parseHeaderLineToString(inSFF);
910                         string analysisName = parseHeaderLineToString(inSFF);
911                         string fullPath = parseHeaderLineToString(inSFF);
912                         m->gobble(inSFF);
913                         
914                         string readHeaderLen = parseHeaderLineToString(inSFF);  convert(readHeaderLen, header.headerLength);
915                         string nameLength = parseHeaderLineToString(inSFF);             convert(nameLength, header.nameLength);
916                         int numBases = parseHeaderLineToInt(inSFF);                             header.numBases = numBases;
917                         string clipQualLeft = parseHeaderLineToString(inSFF);   convert(clipQualLeft, header.clipQualLeft);
918                         int clipQualRight = parseHeaderLineToInt(inSFF);                header.clipQualRight = clipQualRight;
919                         string clipAdapLeft = parseHeaderLineToString(inSFF);   convert(clipAdapLeft, header.clipAdapterLeft);
920                         string clipAdapRight = parseHeaderLineToString(inSFF);  convert(clipAdapRight, header.clipAdapterRight);
921                         m->gobble(inSFF);
922                                 
923                         seqRead read;
924                         
925                         //parse read
926                         vector<unsigned short> flowVector = parseHeaderLineToFloatVector(inSFF, numFlows);      read.flowgram = flowVector;
927                         vector<unsigned int> flowIndices = parseHeaderLineToIntVector(inSFF, numBases); 
928                         
929                         //adjust for print
930                         vector<unsigned int> flowIndicesAdjusted; flowIndicesAdjusted.push_back(flowIndices[0]);
931                         for (int j = 1; j < flowIndices.size(); j++) {   flowIndicesAdjusted.push_back(flowIndices[j] - flowIndices[j-1]);   }
932                         read.flowIndex = flowIndicesAdjusted;
933                         
934                         string bases = parseHeaderLineToString(inSFF);                                                                          read.bases = bases;
935                         vector<unsigned int> qualityScores = parseHeaderLineToIntVector(inSFF, numBases);       read.qualScores = qualityScores;
936                         m->gobble(inSFF);
937                                         
938                         //if you have provided an accosfile and this seq is not in it, then dont print
939                         bool print = true;
940                         if (seqNames.size() != 0) {   if (seqNames.count(header.name) == 0) { print = false; }  }
941                         
942                         //print 
943                         if (print) {
944                                 if (fasta)      {       printFastaSeqData(outFasta, read, header);      }
945                                 if (qual)       {       printQualSeqData(outQual, read, header);        }
946                                 if (flow)       {       printFlowSeqData(outFlow, read, header);        }
947                         }
948                         
949                         //report progress
950                         if((i+1) % 10000 == 0){ m->mothurOut(toString(i+1)); m->mothurOutEndLine();             }
951                         
952                         if (m->control_pressed) {  break;  }
953                 }
954                 
955                 //report progress
956                 if (!m->control_pressed) {   if((numReads) % 10000 != 0){       m->mothurOut(toString(numReads)); m->mothurOutEndLine();                }  }
957                 
958                 inSFF.close();
959                 
960                 if (fasta)      {  outFasta.close();    }
961                 if (qual)       {  outQual.close();             }
962                 if (flow)       {  outFlow.close();             }
963                 
964                 return 0;
965         }
966         catch(exception& e) {
967                 m->errorOut(e, "SffInfoCommand", "parseSffTxt");
968                 exit(1);
969         }
970 }
971 //**********************************************************************************************************************
972
973 int SffInfoCommand::parseHeaderLineToInt(ifstream& file){
974         try {
975                 int number;
976                 
977                 while (!file.eof())     {
978                         
979                         char c = file.get(); 
980                         if (c == ':'){
981                                 file >> number;
982                                 break;
983                         }
984                         
985                 }
986                 m->gobble(file);
987                 return number;
988         }
989         catch(exception& e) {
990                 m->errorOut(e, "SffInfoCommand", "parseHeaderLineToInt");
991                 exit(1);
992         }
993         
994 }
995
996 //**********************************************************************************************************************
997
998 string SffInfoCommand::parseHeaderLineToString(ifstream& file){
999         try {
1000                 string text;
1001                 
1002                 while (!file.eof())     {
1003                         char c = file.get(); 
1004                         
1005                         if (c == ':'){
1006                                 //m->gobble(file);
1007                                 //text = m->getline(file);      
1008                                 file >> text;
1009                                 break;
1010                         }
1011                 }
1012                 m->gobble(file);
1013                 
1014                 return text;
1015         }
1016         catch(exception& e) {
1017                 m->errorOut(e, "SffInfoCommand", "parseHeaderLineToString");
1018                 exit(1);
1019         }
1020 }
1021
1022 //**********************************************************************************************************************
1023
1024 vector<unsigned short> SffInfoCommand::parseHeaderLineToFloatVector(ifstream& file, int length){
1025         try {
1026                 vector<unsigned short> floatVector(length);
1027                 
1028                 while (!file.eof())     {
1029                         char c = file.get(); 
1030                         if (c == ':'){
1031                                 float temp;
1032                                 for(int i=0;i<length;i++){
1033                                         file >> temp;
1034                                         floatVector[i] = temp * 100;
1035                                 }
1036                                 break;
1037                         }
1038                 }
1039                 m->gobble(file);        
1040                 return floatVector;
1041         }
1042         catch(exception& e) {
1043                 m->errorOut(e, "SffInfoCommand", "parseHeaderLineToFloatVector");
1044                 exit(1);
1045         }
1046 }
1047
1048 //**********************************************************************************************************************
1049
1050 vector<unsigned int> SffInfoCommand::parseHeaderLineToIntVector(ifstream& file, int length){
1051         try {
1052                 vector<unsigned int> intVector(length);
1053                 
1054                 while (!file.eof())     {
1055                         char c = file.get(); 
1056                         if (c == ':'){
1057                                 for(int i=0;i<length;i++){
1058                                         file >> intVector[i];
1059                                 }
1060                                 break;
1061                         }
1062                 }
1063                 m->gobble(file);        
1064                 return intVector;
1065         }
1066         catch(exception& e) {
1067                 m->errorOut(e, "SffInfoCommand", "parseHeaderLineToIntVector");
1068                 exit(1);
1069         }
1070 }
1071
1072 //**********************************************************************************************************************
1073
1074
1075                                 
1076