]> git.donarmstrong.com Git - mothur.git/blob - sffinfocommand.cpp
removed parse.sff command and made its functionality part of sffinfo command. fixed...
[mothur.git] / sffinfocommand.cpp
1 /*
2  *  sffinfocommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 7/7/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "sffinfocommand.h"
11 #include "endiannessmacros.h"
12
13 //**********************************************************************************************************************
14 vector<string> SffInfoCommand::getValidParameters(){    
15         try {
16                 string Array[] =  {"sff","qfile","fasta","flow","trim","accnos","sfftxt","outputdir","inputdir", "outputdir"};
17                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
18                 return myArray;
19         }
20         catch(exception& e) {
21                 m->errorOut(e, "SffInfoCommand", "getValidParameters");
22                 exit(1);
23         }
24 }
25 //**********************************************************************************************************************
26 SffInfoCommand::SffInfoCommand(){       
27         try {
28                 abort = true;
29                 //initialize outputTypes
30                 vector<string> tempOutNames;
31                 outputTypes["fasta"] = tempOutNames;
32                 outputTypes["flow"] = tempOutNames;
33                 outputTypes["sfftxt"] = tempOutNames;
34                 outputTypes["qual"] = tempOutNames;
35         }
36         catch(exception& e) {
37                 m->errorOut(e, "SffInfoCommand", "SffInfoCommand");
38                 exit(1);
39         }
40 }
41 //**********************************************************************************************************************
42 vector<string> SffInfoCommand::getRequiredParameters(){ 
43         try {
44                 string Array[] =  {"sff", "sfftxt", "or"};
45                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
46                 return myArray;
47         }
48         catch(exception& e) {
49                 m->errorOut(e, "SffInfoCommand", "getRequiredParameters");
50                 exit(1);
51         }
52 }
53 //**********************************************************************************************************************
54 vector<string> SffInfoCommand::getRequiredFiles(){      
55         try {
56                 vector<string> myArray;
57                 return myArray;
58         }
59         catch(exception& e) {
60                 m->errorOut(e, "SffInfoCommand", "getRequiredFiles");
61                 exit(1);
62         }
63 }
64 //**********************************************************************************************************************
65
66 SffInfoCommand::SffInfoCommand(string option)  {
67         try {
68                 abort = false;
69                 hasAccnos = false;
70                 
71                 //allow user to run help
72                 if(option == "help") { help(); abort = true; }
73                 
74                 else {
75                         //valid paramters for this command
76                         string Array[] =  {"sff","qfile","fasta","flow","trim","accnos","sfftxt","outputdir","inputdir", "outputdir"};
77                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
78                         
79                         OptionParser parser(option);
80                         map<string, string> parameters = parser.getParameters();
81                         
82                         ValidParameters validParameter;
83                         //check to make sure all parameters are valid for command
84                         for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
85                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
86                         }
87                         
88                         //initialize outputTypes
89                         vector<string> tempOutNames;
90                         outputTypes["fasta"] = tempOutNames;
91                         outputTypes["flow"] = tempOutNames;
92                         outputTypes["sfftxt"] = tempOutNames;
93                         outputTypes["qual"] = tempOutNames;
94                         
95                         //if the user changes the output directory command factory will send this info to us in the output parameter 
96                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
97                         
98                         //if the user changes the input directory command factory will send this info to us in the output parameter 
99                         string inputDir = validParameter.validFile(parameters, "inputdir", false);        if (inputDir == "not found"){ inputDir = "";          }
100
101                         sffFilename = validParameter.validFile(parameters, "sff", false);
102                         if (sffFilename == "not found") { sffFilename = "";  }
103                         else { 
104                                 m->splitAtDash(sffFilename, filenames);
105                                 
106                                 //go through files and make sure they are good, if not, then disregard them
107                                 for (int i = 0; i < filenames.size(); i++) {
108                                         if (inputDir != "") {
109                                                 string path = m->hasPath(filenames[i]);
110                                                 //if the user has not given a path then, add inputdir. else leave path alone.
111                                                 if (path == "") {       filenames[i] = inputDir + filenames[i];         }
112                                         }
113         
114                                         ifstream in;
115                                         int ableToOpen = m->openInputFile(filenames[i], in, "noerror");
116                                 
117                                         //if you can't open it, try default location
118                                         if (ableToOpen == 1) {
119                                                 if (m->getDefaultPath() != "") { //default path is set
120                                                         string tryPath = m->getDefaultPath() + m->getSimpleName(filenames[i]);
121                                                         m->mothurOut("Unable to open " + filenames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
122                                                         ifstream in2;
123                                                         ableToOpen = m->openInputFile(tryPath, in2, "noerror");
124                                                         in2.close();
125                                                         filenames[i] = tryPath;
126                                                 }
127                                         }
128                                         
129                                         //if you can't open it, try default location
130                                         if (ableToOpen == 1) {
131                                                 if (m->getOutputDir() != "") { //default path is set
132                                                         string tryPath = m->getOutputDir() + m->getSimpleName(filenames[i]);
133                                                         m->mothurOut("Unable to open " + filenames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
134                                                         ifstream in2;
135                                                         ableToOpen = m->openInputFile(tryPath, in2, "noerror");
136                                                         in2.close();
137                                                         filenames[i] = tryPath;
138                                                 }
139                                         }
140                                         
141                                         in.close();
142                                         
143                                         if (ableToOpen == 1) { 
144                                                 m->mothurOut("Unable to open " + filenames[i] + ". It will be disregarded."); m->mothurOutEndLine();
145                                                 //erase from file list
146                                                 filenames.erase(filenames.begin()+i);
147                                                 i--;
148                                         }
149                                 }
150                                 
151                                 //make sure there is at least one valid file left
152                                 if (filenames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; }
153                         }
154                         
155                         accnosName = validParameter.validFile(parameters, "accnos", false);
156                         if (accnosName == "not found") { accnosName = "";  }
157                         else { 
158                                 hasAccnos = true;
159                                 m->splitAtDash(accnosName, accnosFileNames);
160                                 
161                                 //go through files and make sure they are good, if not, then disregard them
162                                 for (int i = 0; i < accnosFileNames.size(); i++) {
163                                         if (inputDir != "") {
164                                                 string path = m->hasPath(accnosFileNames[i]);
165                                                 //if the user has not given a path then, add inputdir. else leave path alone.
166                                                 if (path == "") {       accnosFileNames[i] = inputDir + accnosFileNames[i];             }
167                                         }
168         
169                                         ifstream in;
170                                         int ableToOpen = m->openInputFile(accnosFileNames[i], in, "noerror");
171                                 
172                                         //if you can't open it, try default location
173                                         if (ableToOpen == 1) {
174                                                 if (m->getDefaultPath() != "") { //default path is set
175                                                         string tryPath = m->getDefaultPath() + m->getSimpleName(accnosFileNames[i]);
176                                                         m->mothurOut("Unable to open " + accnosFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
177                                                         ifstream in2;
178                                                         ableToOpen = m->openInputFile(tryPath, in2, "noerror");
179                                                         in2.close();
180                                                         accnosFileNames[i] = tryPath;
181                                                 }
182                                         }
183                                         //if you can't open it, try default location
184                                         if (ableToOpen == 1) {
185                                                 if (m->getOutputDir() != "") { //default path is set
186                                                         string tryPath = m->getOutputDir() + m->getSimpleName(accnosFileNames[i]);
187                                                         m->mothurOut("Unable to open " + accnosFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
188                                                         ifstream in2;
189                                                         ableToOpen = m->openInputFile(tryPath, in2, "noerror");
190                                                         in2.close();
191                                                         accnosFileNames[i] = tryPath;
192                                                 }
193                                         }
194                                         in.close();
195                                         
196                                         if (ableToOpen == 1) { 
197                                                 m->mothurOut("Unable to open " + accnosFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();
198                                                 //erase from file list
199                                                 accnosFileNames.erase(accnosFileNames.begin()+i);
200                                                 i--;
201                                         }
202                                 }
203                                 
204                                 //make sure there is at least one valid file left
205                                 if (accnosFileNames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; }
206                         }
207                         
208                         if (hasAccnos) {
209                                 if (accnosFileNames.size() != filenames.size()) { abort = true; m->mothurOut("If you provide a accnos file, you must have one for each sff file."); m->mothurOutEndLine(); }
210                         }
211                         
212                         string temp = validParameter.validFile(parameters, "qfile", false);                     if (temp == "not found"){       temp = "T";                             }
213                         qual = m->isTrue(temp); 
214                         
215                         temp = validParameter.validFile(parameters, "fasta", false);                            if (temp == "not found"){       temp = "T";                             }
216                         fasta = m->isTrue(temp); 
217                         
218                         temp = validParameter.validFile(parameters, "flow", false);                                     if (temp == "not found"){       temp = "F";                             }
219                         flow = m->isTrue(temp); 
220                         
221                         temp = validParameter.validFile(parameters, "trim", false);                                     if (temp == "not found"){       temp = "T";                             }
222                         trim = m->isTrue(temp); 
223                         
224                         temp = validParameter.validFile(parameters, "sfftxt", false);                           
225                         if (temp == "not found")        {       temp = "F";      sfftxt = false; sfftxtFilename = "";           }
226                         else if (m->isTrue(temp))       {       sfftxt = true;          sfftxtFilename = "";                            }
227                         else {
228                                 //you are a filename
229                                 if (inputDir != "") {
230                                         map<string,string>::iterator it = parameters.find("sfftxt");
231                                         //user has given a template file
232                                         if(it != parameters.end()){ 
233                                                 string path = m->hasPath(it->second);
234                                                 //if the user has not given a path then, add inputdir. else leave path alone.
235                                                 if (path == "") {       parameters["sfftxt"] = inputDir + it->second;           }
236                                         }
237                                 }
238                                 
239                                 sfftxtFilename = validParameter.validFile(parameters, "sfftxt", true);
240                                 if (sfftxtFilename == "not found") { sfftxtFilename = "";  }
241                                 else if (sfftxtFilename == "not open") { sfftxtFilename = "";  }
242                         }
243                         
244                         if ((sfftxtFilename == "") && (filenames.size() == 0)) {  m->mothurOut("[ERROR]: you must provide a valid sff or sfftxt file."); m->mothurOutEndLine(); abort=true; }
245                 }
246         }
247         catch(exception& e) {
248                 m->errorOut(e, "SffInfoCommand", "SffInfoCommand");
249                 exit(1);
250         }
251 }
252 //**********************************************************************************************************************
253
254 void SffInfoCommand::help(){
255         try {
256                 m->mothurOut("The sffinfo command reads a sff file and extracts the sequence data, or you can use it to parse a sfftxt file..\n");
257                 m->mothurOut("The sffinfo command parameters are sff, fasta, qfile, accnos, flow, sfftxt, and trim. sff is required. \n");
258                 m->mothurOut("The sff parameter allows you to enter the sff file you would like to extract data from.  You may enter multiple files by separating them by -'s.\n");
259                 m->mothurOut("The fasta parameter allows you to indicate if you would like a fasta formatted file generated.  Default=True. \n");
260                 m->mothurOut("The qfile parameter allows you to indicate if you would like a quality file generated.  Default=True. \n");
261                 m->mothurOut("The flow parameter allows you to indicate if you would like a flowgram file generated.  Default=False. \n");
262                 m->mothurOut("The sfftxt parameter allows you to indicate if you would like a sff.txt file generated.  Default=False. \n");
263                 m->mothurOut("If you want to parse an existing sfftxt file into flow, fasta and quality file, enter the file name using the sfftxt parameter. \n");
264                 m->mothurOut("The trim parameter allows you to indicate if you would like a sequences and quality scores trimmed to the clipQualLeft and clipQualRight values.  Default=True. \n");
265                 m->mothurOut("The accnos parameter allows you to provide a accnos file containing the names of the sequences you would like extracted. You may enter multiple files by separating them by -'s. \n");
266                 m->mothurOut("Example sffinfo(sff=mySffFile.sff, trim=F).\n");
267                 m->mothurOut("Note: No spaces between parameter labels (i.e. sff), '=' and parameters (i.e.yourSffFileName).\n\n");
268         }
269         catch(exception& e) {
270                 m->errorOut(e, "SffInfoCommand", "help");
271                 exit(1);
272         }
273 }
274 //**********************************************************************************************************************
275
276 SffInfoCommand::~SffInfoCommand(){}
277
278 //**********************************************************************************************************************
279 int SffInfoCommand::execute(){
280         try {
281                 
282                 if (abort == true) { return 0; }
283                 
284                 for (int s = 0; s < filenames.size(); s++) {
285                         
286                         if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) {       remove(outputNames[i].c_str());         } return 0; }
287                         
288                         int start = time(NULL);
289                         
290                         m->mothurOut("Extracting info from " + filenames[s] + " ..." ); m->mothurOutEndLine();
291                         
292                         string accnos = "";
293                         if (hasAccnos) { accnos = accnosFileNames[s]; }
294                         
295                         int numReads = extractSffInfo(filenames[s], accnos);
296
297                         m->mothurOut("It took " + toString(time(NULL) - start) + " secs to extract " + toString(numReads) + ".");
298                 }
299                 
300                 if (sfftxtFilename != "") {  parseSffTxt(); }
301                 
302                 if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) {       remove(outputNames[i].c_str());         } return 0; }
303                 
304                 //report output filenames
305                 m->mothurOutEndLine();
306                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
307                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
308                 m->mothurOutEndLine();
309
310                 return 0;
311         }
312         catch(exception& e) {
313                 m->errorOut(e, "SffInfoCommand", "execute");
314                 exit(1);
315         }
316 }
317 //**********************************************************************************************************************
318 int SffInfoCommand::extractSffInfo(string input, string accnos){
319         try {
320                 
321                 if (outputDir == "") {  outputDir += m->hasPath(input); }
322                 
323                 if (accnos != "")       {  readAccnosFile(accnos);  }
324                 else                            {       seqNames.clear();               }
325
326                 ofstream outSfftxt, outFasta, outQual, outFlow;
327                 string outFastaFileName, outQualFileName;
328                 string sfftxtFileName = outputDir + m->getRootName(m->getSimpleName(input)) + "sff.txt";
329                 string outFlowFileName = outputDir + m->getRootName(m->getSimpleName(input)) + "flow";
330                 if (trim) {
331                         outFastaFileName = outputDir + m->getRootName(m->getSimpleName(input)) + "fasta";
332                         outQualFileName = outputDir + m->getRootName(m->getSimpleName(input)) + "qual";
333                 }else{
334                         outFastaFileName = outputDir + m->getRootName(m->getSimpleName(input)) + "raw.fasta";
335                         outQualFileName = outputDir + m->getRootName(m->getSimpleName(input)) + "raw.qual";
336                 }
337                 
338                 if (sfftxt) { m->openOutputFile(sfftxtFileName, outSfftxt); outSfftxt.setf(ios::fixed, ios::floatfield); outSfftxt.setf(ios::showpoint);  outputNames.push_back(sfftxtFileName);  outputTypes["sfftxt"].push_back(sfftxtFileName); }
339                 if (fasta)      { m->openOutputFile(outFastaFileName, outFasta);        outputNames.push_back(outFastaFileName); outputTypes["fasta"].push_back(outFastaFileName); }
340                 if (qual)       { m->openOutputFile(outQualFileName, outQual);          outputNames.push_back(outQualFileName); outputTypes["qual"].push_back(outQualFileName);  }
341                 if (flow)       { m->openOutputFile(outFlowFileName, outFlow);          outputNames.push_back(outFlowFileName);  outFlow.setf(ios::fixed, ios::floatfield); outFlow.setf(ios::showpoint); outputTypes["flow"].push_back(outFlowFileName);  }
342                 
343                 ifstream in;
344                 in.open(input.c_str(), ios::binary);
345                 
346                 CommonHeader header; 
347                 readCommonHeader(in, header);
348                 
349                 int count = 0;
350                 
351                 //check magic number and version
352                 if (header.magicNumber != 779314790) { m->mothurOut("Magic Number is not correct, not a valid .sff file"); m->mothurOutEndLine(); return count; }
353                 if (header.version != "0001") { m->mothurOut("Version is not supported, only support version 0001."); m->mothurOutEndLine(); return count; }
354         
355                 //print common header
356                 if (sfftxt) {   printCommonHeader(outSfftxt, header);           }
357                 if (flow)       {       outFlow << header.numFlowsPerRead << endl;      }
358                         
359                 //read through the sff file
360                 while (!in.eof()) {
361                         
362                         bool print = true;
363                         
364                         //read header
365                         Header readheader;
366                         readHeader(in, readheader);
367                         
368                         //read data
369                         seqRead read; 
370                         readSeqData(in, read, header.numFlowsPerRead, readheader.numBases);
371                                 
372                         //if you have provided an accosfile and this seq is not in it, then dont print
373                         if (seqNames.size() != 0) {   if (seqNames.count(readheader.name) == 0) { print = false; }  }
374                         
375                         //print 
376                         if (print) {
377                                 if (sfftxt) { printHeader(outSfftxt, readheader); printSffTxtSeqData(outSfftxt, read, readheader); }
378                                 if (fasta)      {       printFastaSeqData(outFasta, read, readheader);  }
379                                 if (qual)       {       printQualSeqData(outQual, read, readheader);    }
380                                 if (flow)       {       printFlowSeqData(outFlow, read, readheader);    }
381                         }
382                         
383                         count++;
384                 
385                         //report progress
386                         if((count+1) % 10000 == 0){     m->mothurOut(toString(count+1)); m->mothurOutEndLine();         }
387                 
388                         if (m->control_pressed) { count = 0; break;   }
389                         
390                         if (count >= header.numReads) { break; }
391                 }
392                 
393                 //report progress
394                 if (!m->control_pressed) {   if((count) % 10000 != 0){  m->mothurOut(toString(count)); m->mothurOutEndLine();           }  }
395                 
396                 in.close();
397                 
398                 if (sfftxt) {  outSfftxt.close();       }
399                 if (fasta)      {  outFasta.close();    }
400                 if (qual)       {  outQual.close();             }
401                 if (flow)       {  outFlow.close();             }
402                 
403                 return count;
404         }
405         catch(exception& e) {
406                 m->errorOut(e, "SffInfoCommand", "extractSffInfo");
407                 exit(1);
408         }
409 }
410 //**********************************************************************************************************************
411 int SffInfoCommand::readCommonHeader(ifstream& in, CommonHeader& header){
412         try {
413
414                 if (!in.eof()) {
415
416                         //read magic number
417                         char buffer[4];
418                         in.read(buffer, 4);
419                         header.magicNumber = be_int4(*(unsigned int *)(&buffer));
420                 
421                         //read version
422                         char buffer9[4];
423                         in.read(buffer9, 4);
424                         header.version = "";
425                         for (int i = 0; i < 4; i++) {  header.version += toString((int)(buffer9[i])); }
426                                 
427                         //read offset
428                         char buffer2 [8];
429                         in.read(buffer2, 8);
430                         header.indexOffset =  be_int8(*(unsigned long int *)(&buffer2));
431                         
432                         //read index length
433                         char buffer3 [4];
434                         in.read(buffer3, 4);
435                         header.indexLength =  be_int4(*(unsigned int *)(&buffer3));
436                         
437                         //read num reads
438                         char buffer4 [4];
439                         in.read(buffer4, 4);
440                         header.numReads =  be_int4(*(unsigned int *)(&buffer4));
441                                 
442                         //read header length
443                         char buffer5 [2];
444                         in.read(buffer5, 2);
445                         header.headerLength =  be_int2(*(unsigned short *)(&buffer5));
446                                         
447                         //read key length
448                         char buffer6 [2];
449                         in.read(buffer6, 2);
450                         header.keyLength = be_int2(*(unsigned short *)(&buffer6));
451                         
452                         //read number of flow reads
453                         char buffer7 [2];
454                         in.read(buffer7, 2);
455                         header.numFlowsPerRead =  be_int2(*(unsigned short *)(&buffer7));
456                                 
457                         //read format code
458                         char buffer8 [1];
459                         in.read(buffer8, 1);
460                         header.flogramFormatCode = (int)(buffer8[0]);
461                         
462                         //read flow chars
463                         char* tempBuffer = new char[header.numFlowsPerRead];
464                         in.read(&(*tempBuffer), header.numFlowsPerRead); 
465                         header.flowChars = tempBuffer;
466                         if (header.flowChars.length() > header.numFlowsPerRead) { header.flowChars = header.flowChars.substr(0, header.numFlowsPerRead);  }
467                         delete[] tempBuffer;
468                         
469                         //read key
470                         char* tempBuffer2 = new char[header.keyLength];
471                         in.read(&(*tempBuffer2), header.keyLength);
472                         header.keySequence = tempBuffer2;
473                         if (header.keySequence.length() > header.keyLength) { header.keySequence = header.keySequence.substr(0, header.keyLength);  }
474                         delete[] tempBuffer2;
475                                 
476                         /* Pad to 8 chars */
477                         unsigned long int spotInFile = in.tellg();
478                         unsigned long int spot = (spotInFile + 7)& ~7;  // ~ inverts
479                         in.seekg(spot);
480                         
481                 }else{
482                         m->mothurOut("Error reading sff common header."); m->mothurOutEndLine();
483                 }
484
485                 return 0;
486         }
487         catch(exception& e) {
488                 m->errorOut(e, "SffInfoCommand", "readCommonHeader");
489                 exit(1);
490         }
491 }
492 //**********************************************************************************************************************
493 int SffInfoCommand::readHeader(ifstream& in, Header& header){
494         try {
495         
496                 if (!in.eof()) {
497                         
498                         //read header length
499                         char buffer [2];
500                         in.read(buffer, 2);
501                         header.headerLength = be_int2(*(unsigned short *)(&buffer));
502                                                 
503                         //read name length
504                         char buffer2 [2];
505                         in.read(buffer2, 2);
506                         header.nameLength = be_int2(*(unsigned short *)(&buffer2));
507
508                         //read num bases
509                         char buffer3 [4];
510                         in.read(buffer3, 4);
511                         header.numBases =  be_int4(*(unsigned int *)(&buffer3));
512                         
513                         //read clip qual left
514                         char buffer4 [2];
515                         in.read(buffer4, 2);
516                         header.clipQualLeft =  be_int2(*(unsigned short *)(&buffer4));
517                         header.clipQualLeft = 5; 
518                         
519                         //read clip qual right
520                         char buffer5 [2];
521                         in.read(buffer5, 2);
522                         header.clipQualRight =  be_int2(*(unsigned short *)(&buffer5));
523                         
524                         //read clipAdapterLeft
525                         char buffer6 [2];
526                         in.read(buffer6, 2);
527                         header.clipAdapterLeft = be_int2(*(unsigned short *)(&buffer6));
528
529                         //read clipAdapterRight
530                         char buffer7 [2];
531                         in.read(buffer7, 2);
532                         header.clipAdapterRight = be_int2(*(unsigned short *)(&buffer7));
533                 
534                         //read name
535                         char* tempBuffer = new char[header.nameLength];
536                         in.read(&(*tempBuffer), header.nameLength);
537                         header.name = tempBuffer;
538                         if (header.name.length() > header.nameLength) { header.name = header.name.substr(0, header.nameLength);  }
539                         delete[] tempBuffer;
540                         
541                         //extract info from name
542                         decodeName(header.timestamp, header.region, header.xy, header.name);
543                         
544                         /* Pad to 8 chars */
545                         unsigned long int spotInFile = in.tellg();
546                         unsigned long int spot = (spotInFile + 7)& ~7;
547                         in.seekg(spot);
548                         
549                 }else{
550                         m->mothurOut("Error reading sff header info."); m->mothurOutEndLine();
551                 }
552
553                 return 0;
554         }
555         catch(exception& e) {
556                 m->errorOut(e, "SffInfoCommand", "readHeader");
557                 exit(1);
558         }
559 }
560 //**********************************************************************************************************************
561 int SffInfoCommand::readSeqData(ifstream& in, seqRead& read, int numFlowReads, int numBases){
562         try {
563         
564                 if (!in.eof()) {
565         
566                         //read flowgram
567                         read.flowgram.resize(numFlowReads);
568                         for (int i = 0; i < numFlowReads; i++) {  
569                                 char buffer [2];
570                                 in.read(buffer, 2);
571                                 read.flowgram[i] = be_int2(*(unsigned short *)(&buffer));
572                         }
573         
574                         //read flowIndex
575                         read.flowIndex.resize(numBases);
576                         for (int i = 0; i < numBases; i++) {  
577                                 char temp[1];
578                                 in.read(temp, 1);
579                                 read.flowIndex[i] = be_int1(*(unsigned char *)(&temp));
580                         }
581         
582                         //read bases
583                         char* tempBuffer = new char[numBases];
584                         in.read(&(*tempBuffer), numBases);
585                         read.bases = tempBuffer;
586                         if (read.bases.length() > numBases) { read.bases = read.bases.substr(0, numBases);  }
587                         delete[] tempBuffer;
588
589                         //read qual scores
590                         read.qualScores.resize(numBases);
591                         for (int i = 0; i < numBases; i++) {  
592                                 char temp[1];
593                                 in.read(temp, 1);
594                                 read.qualScores[i] = be_int1(*(unsigned char *)(&temp));
595                         }
596         
597                         /* Pad to 8 chars */
598                         unsigned long int spotInFile = in.tellg();
599                         unsigned long int spot = (spotInFile + 7)& ~7;
600                         in.seekg(spot);
601                         
602                 }else{
603                         m->mothurOut("Error reading."); m->mothurOutEndLine();
604                 }
605
606                 return 0;
607         }
608         catch(exception& e) {
609                 m->errorOut(e, "SffInfoCommand", "readSeqData");
610                 exit(1);
611         }
612 }
613 //**********************************************************************************************************************
614 int SffInfoCommand::decodeName(string& timestamp, string& region, string& xy, string name) {
615         try {
616                 
617                 string time = name.substr(0, 6);
618                 unsigned int timeNum = m->fromBase36(time);
619                         
620                 int q1 = timeNum / 60;
621                 int sec = timeNum - 60 * q1;
622                 int q2 = q1 / 60;
623                 int minute = q1 - 60 * q2;
624                 int q3 = q2 / 24;
625                 int hr = q2 - 24 * q3;
626                 int q4 = q3 / 32;
627                 int day = q3 - 32 * q4;
628                 int q5 = q4 / 13;
629                 int mon = q4 - 13 * q5;
630                 int year = 2000 + q5;
631                 
632                 timestamp = toString(year) + "_" + toString(mon) + "_" + toString(day) + "_" + toString(hr) + "_" + toString(minute) + "_" + toString(sec);
633                 
634                 region = name.substr(7, 2);
635                 
636                 string xyNum = name.substr(9);
637                 unsigned int myXy = m->fromBase36(xyNum);
638                 int x = myXy >> 12;
639                 int y = myXy & 4095;
640                 
641                 xy = toString(x) + "_" + toString(y);
642                         
643                 return 0;
644         }
645         catch(exception& e) {
646                 m->errorOut(e, "SffInfoCommand", "decodeName");
647                 exit(1);
648         }
649 }
650 //**********************************************************************************************************************
651 int SffInfoCommand::printCommonHeader(ofstream& out, CommonHeader& header) {
652         try {
653         
654                 out << "Common Header:\nMagic Number: " << header.magicNumber << endl;
655                 out << "Version: " << header.version << endl;
656                 out << "Index Offset: " << header.indexOffset << endl;
657                 out << "Index Length: " << header.indexLength << endl;
658                 out << "Number of Reads: " << header.numReads << endl;
659                 out << "Header Length: " << header.headerLength << endl;
660                 out << "Key Length: " << header.keyLength << endl;
661                 out << "Number of Flows: " << header.numFlowsPerRead << endl;
662                 out << "Format Code: " << header.flogramFormatCode << endl;
663                 out << "Flow Chars: " << header.flowChars << endl;
664                 out << "Key Sequence: " << header.keySequence << endl << endl;
665                         
666                 return 0;
667         }
668         catch(exception& e) {
669                 m->errorOut(e, "SffInfoCommand", "printCommonHeader");
670                 exit(1);
671         }
672 }
673 //**********************************************************************************************************************
674 int SffInfoCommand::printHeader(ofstream& out, Header& header) {
675         try {
676                 
677                 out << ">" << header.name << endl;
678                 out << "Run Prefix: " << header.timestamp << endl;
679                 out << "Region #:  " << header.region << endl;
680                 out << "XY Location: " << header.xy << endl << endl;
681                 
682                 out << "Run Name:  " << endl;
683                 out << "Analysis Name:  " << endl;
684                 out << "Full Path: " << endl << endl;
685                 
686                 out << "Read Header Len: " << header.headerLength << endl;
687                 out << "Name Length: " << header.nameLength << endl;
688                 out << "# of Bases: " << header.numBases << endl;
689                 out << "Clip Qual Left: " << header.clipQualLeft << endl;
690                 out << "Clip Qual Right: " << header.clipQualRight << endl;
691                 out << "Clip Adap Left: " << header.clipAdapterLeft << endl;
692                 out << "Clip Adap Right: " << header.clipAdapterRight << endl << endl;
693                 
694                 return 0;
695         }
696         catch(exception& e) {
697                 m->errorOut(e, "SffInfoCommand", "printHeader");
698                 exit(1);
699         }
700 }
701
702 //**********************************************************************************************************************
703 int SffInfoCommand::printSffTxtSeqData(ofstream& out, seqRead& read, Header& header) {
704         try {
705                 
706                 out << "Flowgram: ";
707                 for (int i = 0; i < read.flowgram.size(); i++) { out << setprecision(2) << (read.flowgram[i]/(float)100) << '\t';  }
708                 
709                 out << endl <<  "Flow Indexes: ";
710                 int sum = 0;
711                 for (int i = 0; i < read.flowIndex.size(); i++) {  sum +=  read.flowIndex[i];  out << sum << '\t'; }
712                 
713                 //make the bases you want to clip lowercase and the bases you want to keep upper case
714                 if(header.clipQualRight == 0){  header.clipQualRight = read.bases.length();     }
715                 for (int i = 0; i < (header.clipQualLeft-1); i++) { read.bases[i] = tolower(read.bases[i]); }
716                 for (int i = (header.clipQualLeft-1); i < (header.clipQualRight-1); i++) {   read.bases[i] = toupper(read.bases[i]);  }
717                 for (int i = (header.clipQualRight-1); i < read.bases.length(); i++) {   read.bases[i] = tolower(read.bases[i]);  }
718                 
719                 out << endl <<  "Bases: " << read.bases << endl << "Quality Scores: ";
720                 for (int i = 0; i < read.qualScores.size(); i++) {   out << read.qualScores[i] << '\t';  }
721         
722                 
723                 out << endl << endl;
724                 
725                 return 0;
726         }
727         catch(exception& e) {
728                 m->errorOut(e, "SffInfoCommand", "printSffTxtSeqData");
729                 exit(1);
730         }
731 }
732 //**********************************************************************************************************************
733 int SffInfoCommand::printFastaSeqData(ofstream& out, seqRead& read, Header& header) {
734         try {
735                 
736                 string seq = read.bases;
737                 
738                 if (trim) {
739                         if(header.clipQualRight < header.clipQualLeft){
740                                 seq = "NNNN";
741                         }
742                         else if((header.clipQualRight != 0) && ((header.clipQualRight-header.clipQualLeft) >= 0)){
743                                 seq = seq.substr((header.clipQualLeft-1), (header.clipQualRight-header.clipQualLeft));
744                         }
745                         else {
746                                 seq = seq.substr(header.clipQualLeft-1);
747                         }
748                 }else{
749                         //if you wanted the sfftxt then you already converted the bases to the right case
750                         if (!sfftxt) {
751                                 //make the bases you want to clip lowercase and the bases you want to keep upper case
752                                 if(header.clipQualRight == 0){  header.clipQualRight = seq.length();    }
753                                 for (int i = 0; i < (header.clipQualLeft-1); i++) { seq[i] = tolower(seq[i]);  }
754                                 for (int i = (header.clipQualLeft-1); i < (header.clipQualRight-1); i++)  {   seq[i] = toupper(seq[i]);  }
755                                 for (int i = (header.clipQualRight-1); i < seq.length(); i++) {   seq[i] = tolower(seq[i]);  }
756                         }
757                 }
758                 
759                 out << ">" << header.name  << " xy=" << header.xy << endl;
760                 out << seq << endl;
761                 
762                 return 0;
763         }
764         catch(exception& e) {
765                 m->errorOut(e, "SffInfoCommand", "printFastaSeqData");
766                 exit(1);
767         }
768 }
769
770 //**********************************************************************************************************************
771 int SffInfoCommand::printQualSeqData(ofstream& out, seqRead& read, Header& header) {
772         try {
773                 
774                 if (trim) {
775                         if(header.clipQualRight < header.clipQualLeft){
776                                 out << "0\t0\t0\t0";
777                         }
778                         else if((header.clipQualRight != 0) && ((header.clipQualRight-header.clipQualLeft) >= 0)){
779                                 out << ">" << header.name << " xy=" << header.xy << " length=" << (header.clipQualRight-header.clipQualLeft) << endl;
780                                 for (int i = (header.clipQualLeft-1); i < (header.clipQualRight-1); i++) {   out << read.qualScores[i] << '\t'; }
781                         }
782                         else{
783                                 out << ">" << header.name << " xy=" << header.xy << " length=" << (header.clipQualRight-header.clipQualLeft) << endl;
784                                 for (int i = (header.clipQualLeft-1); i < read.qualScores.size(); i++) {   out << read.qualScores[i] << '\t';   }                       
785                         }
786                 }else{
787                         out << ">" << header.name << " xy=" << header.xy << " length=" << read.qualScores.size() << endl;
788                         for (int i = 0; i < read.qualScores.size(); i++) {   out << read.qualScores[i] << '\t';  }
789                 }
790                 
791                 out << endl;
792                 
793                 return 0;
794         }
795         catch(exception& e) {
796                 m->errorOut(e, "SffInfoCommand", "printQualSeqData");
797                 exit(1);
798         }
799 }
800
801 //**********************************************************************************************************************
802 int SffInfoCommand::printFlowSeqData(ofstream& out, seqRead& read, Header& header) {
803         try {
804                 if(header.clipQualRight > header.clipQualLeft){
805                         
806                         int rightIndex = 0;
807                         for (int i = 0; i < header.clipQualRight; i++) {  rightIndex +=  read.flowIndex[i];     }
808
809                         out << header.name << ' ' << rightIndex;
810                         for (int i = 0; i < read.flowgram.size(); i++) { out << setprecision(2) << ' ' << (read.flowgram[i]/(float)100);  }
811                         out << endl;
812                 }
813                 
814                 
815                 return 0;
816         }
817         catch(exception& e) {
818                 m->errorOut(e, "SffInfoCommand", "printFlowSeqData");
819                 exit(1);
820         }
821 }
822 //**********************************************************************************************************************
823 int SffInfoCommand::readAccnosFile(string filename) {
824         try {
825                 //remove old names
826                 seqNames.clear();
827                 
828                 ifstream in;
829                 m->openInputFile(filename, in);
830                 string name;
831                 
832                 while(!in.eof()){
833                         in >> name; m->gobble(in);
834                                                 
835                         seqNames.insert(name);
836                         
837                         if (m->control_pressed) { seqNames.clear(); break; }
838                 }
839                 in.close();             
840                 
841                 return 0;
842         }
843         catch(exception& e) {
844                 m->errorOut(e, "SffInfoCommand", "readAccnosFile");
845                 exit(1);
846         }
847 }
848 //**********************************************************************************************************************
849 int SffInfoCommand::parseSffTxt() {
850         try {
851                 
852                 ifstream inSFF;
853                 m->openInputFile(sfftxtFilename, inSFF);
854                 
855                 if (outputDir == "") {  outputDir += m->hasPath(sfftxtFilename); }
856                 
857                 //output file names
858                 ofstream outFasta, outQual, outFlow;
859                 string outFastaFileName, outQualFileName;
860                 string outFlowFileName = outputDir + m->getRootName(m->getSimpleName(sfftxtFilename)) + "flow";
861                 if (trim) {
862                         outFastaFileName = outputDir + m->getRootName(m->getSimpleName(sfftxtFilename)) + "fasta";
863                         outQualFileName = outputDir + m->getRootName(m->getSimpleName(sfftxtFilename)) + "qual";
864                 }else{
865                         outFastaFileName = outputDir + m->getRootName(m->getSimpleName(sfftxtFilename)) + "raw.fasta";
866                         outQualFileName = outputDir + m->getRootName(m->getSimpleName(sfftxtFilename)) + "raw.qual";
867                 }
868                 
869                 if (fasta)      { m->openOutputFile(outFastaFileName, outFasta);        outputNames.push_back(outFastaFileName); outputTypes["fasta"].push_back(outFastaFileName); }
870                 if (qual)       { m->openOutputFile(outQualFileName, outQual);          outputNames.push_back(outQualFileName); outputTypes["qual"].push_back(outQualFileName);  }
871                 if (flow)       { m->openOutputFile(outFlowFileName, outFlow);          outputNames.push_back(outFlowFileName);  outFlow.setf(ios::fixed, ios::floatfield); outFlow.setf(ios::showpoint); outputTypes["flow"].push_back(outFlowFileName);  }
872                 
873                 //read common header
874                 string commonHeader = m->getline(inSFF);
875                 string magicNumber = m->getline(inSFF); 
876                 string version = m->getline(inSFF);
877                 string indexOffset = m->getline(inSFF);
878                 string indexLength = m->getline(inSFF);
879                 int numReads = parseHeaderLineToInt(inSFF);
880                 string headerLength = m->getline(inSFF);
881                 string keyLength = m->getline(inSFF);
882                 int numFlows = parseHeaderLineToInt(inSFF);
883                 string flowgramCode = m->getline(inSFF);
884                 string flowChars = m->getline(inSFF);
885                 string keySequence = m->getline(inSFF);
886                 m->gobble(inSFF);
887                 
888                 string seqName;
889                 
890                 if (flow)       {       outFlow << numFlows << endl;    }
891                 
892                 for(int i=0;i<numReads;i++){
893                         
894                         //sanity check
895                         if (inSFF.eof()) { m->mothurOut("[ERROR]: Expected " + toString(numReads) + " but reached end of file at " + toString(i+1) + "."); m->mothurOutEndLine(); break; }
896                         
897                         Header header;
898                         
899                         //parse read header
900                         inSFF >> seqName;
901                         seqName = seqName.substr(1);
902                         m->gobble(inSFF);
903                         header.name = seqName;
904                         
905                         string runPrefix = parseHeaderLineToString(inSFF);              header.timestamp = runPrefix;
906                         string regionNumber = parseHeaderLineToString(inSFF);   header.region = regionNumber;
907                         string xyLocation = parseHeaderLineToString(inSFF);             header.xy = xyLocation;
908                         m->gobble(inSFF);
909                                 
910                         string runName = parseHeaderLineToString(inSFF);
911                         string analysisName = parseHeaderLineToString(inSFF);
912                         string fullPath = parseHeaderLineToString(inSFF);
913                         m->gobble(inSFF);
914                         
915                         string readHeaderLen = parseHeaderLineToString(inSFF);  convert(readHeaderLen, header.headerLength);
916                         string nameLength = parseHeaderLineToString(inSFF);             convert(nameLength, header.nameLength);
917                         int numBases = parseHeaderLineToInt(inSFF);                             header.numBases = numBases;
918                         string clipQualLeft = parseHeaderLineToString(inSFF);   convert(clipQualLeft, header.clipQualLeft);
919                         int clipQualRight = parseHeaderLineToInt(inSFF);                header.clipQualRight = clipQualRight;
920                         string clipAdapLeft = parseHeaderLineToString(inSFF);   convert(clipAdapLeft, header.clipAdapterLeft);
921                         string clipAdapRight = parseHeaderLineToString(inSFF);  convert(clipAdapRight, header.clipAdapterRight);
922                         m->gobble(inSFF);
923                                 
924                         seqRead read;
925                         
926                         //parse read
927                         vector<unsigned short> flowVector = parseHeaderLineToFloatVector(inSFF, numFlows);      read.flowgram = flowVector;
928                         vector<unsigned int> flowIndices = parseHeaderLineToIntVector(inSFF, numBases); 
929                         
930                         //adjust for print
931                         vector<unsigned int> flowIndicesAdjusted; flowIndicesAdjusted.push_back(flowIndices[0]);
932                         for (int j = 1; j < flowIndices.size(); j++) {   flowIndicesAdjusted.push_back(flowIndices[j] - flowIndices[j-1]);   }
933                         read.flowIndex = flowIndicesAdjusted;
934                         
935                         string bases = parseHeaderLineToString(inSFF);                                                                          read.bases = bases;
936                         vector<unsigned int> qualityScores = parseHeaderLineToIntVector(inSFF, numBases);       read.qualScores = qualityScores;
937                         m->gobble(inSFF);
938                                         
939                         //if you have provided an accosfile and this seq is not in it, then dont print
940                         bool print = true;
941                         if (seqNames.size() != 0) {   if (seqNames.count(header.name) == 0) { print = false; }  }
942                         
943                         //print 
944                         if (print) {
945                                 if (fasta)      {       printFastaSeqData(outFasta, read, header);      }
946                                 if (qual)       {       printQualSeqData(outQual, read, header);        }
947                                 if (flow)       {       printFlowSeqData(outFlow, read, header);        }
948                         }
949                         
950                         //report progress
951                         if((i+1) % 10000 == 0){ m->mothurOut(toString(i+1)); m->mothurOutEndLine();             }
952                         
953                         if (m->control_pressed) {  break;  }
954                 }
955                 
956                 //report progress
957                 if (!m->control_pressed) {   if((numReads) % 10000 != 0){       m->mothurOut(toString(numReads)); m->mothurOutEndLine();                }  }
958                 
959                 inSFF.close();
960                 
961                 if (fasta)      {  outFasta.close();    }
962                 if (qual)       {  outQual.close();             }
963                 if (flow)       {  outFlow.close();             }
964                 
965                 return 0;
966         }
967         catch(exception& e) {
968                 m->errorOut(e, "SffInfoCommand", "parseSffTxt");
969                 exit(1);
970         }
971 }
972 //**********************************************************************************************************************
973
974 int SffInfoCommand::parseHeaderLineToInt(ifstream& file){
975         try {
976                 int number;
977                 
978                 while (!file.eof())     {
979                         
980                         char c = file.get(); 
981                         if (c == ':'){
982                                 file >> number;
983                                 break;
984                         }
985                         
986                 }
987                 m->gobble(file);
988                 return number;
989         }
990         catch(exception& e) {
991                 m->errorOut(e, "SffInfoCommand", "parseHeaderLineToInt");
992                 exit(1);
993         }
994         
995 }
996
997 //**********************************************************************************************************************
998
999 string SffInfoCommand::parseHeaderLineToString(ifstream& file){
1000         try {
1001                 string text;
1002                 
1003                 while (!file.eof())     {
1004                         char c = file.get(); 
1005                         
1006                         if (c == ':'){
1007                                 //m->gobble(file);
1008                                 //text = m->getline(file);      
1009                                 file >> text;
1010                                 break;
1011                         }
1012                 }
1013                 m->gobble(file);
1014                 
1015                 return text;
1016         }
1017         catch(exception& e) {
1018                 m->errorOut(e, "SffInfoCommand", "parseHeaderLineToString");
1019                 exit(1);
1020         }
1021 }
1022
1023 //**********************************************************************************************************************
1024
1025 vector<unsigned short> SffInfoCommand::parseHeaderLineToFloatVector(ifstream& file, int length){
1026         try {
1027                 vector<unsigned short> floatVector(length);
1028                 
1029                 while (!file.eof())     {
1030                         char c = file.get(); 
1031                         if (c == ':'){
1032                                 float temp;
1033                                 for(int i=0;i<length;i++){
1034                                         file >> temp;
1035                                         floatVector[i] = temp * 100;
1036                                 }
1037                                 break;
1038                         }
1039                 }
1040                 m->gobble(file);        
1041                 return floatVector;
1042         }
1043         catch(exception& e) {
1044                 m->errorOut(e, "SffInfoCommand", "parseHeaderLineToFloatVector");
1045                 exit(1);
1046         }
1047 }
1048
1049 //**********************************************************************************************************************
1050
1051 vector<unsigned int> SffInfoCommand::parseHeaderLineToIntVector(ifstream& file, int length){
1052         try {
1053                 vector<unsigned int> intVector(length);
1054                 
1055                 while (!file.eof())     {
1056                         char c = file.get(); 
1057                         if (c == ':'){
1058                                 for(int i=0;i<length;i++){
1059                                         file >> intVector[i];
1060                                 }
1061                                 break;
1062                         }
1063                 }
1064                 m->gobble(file);        
1065                 return intVector;
1066         }
1067         catch(exception& e) {
1068                 m->errorOut(e, "SffInfoCommand", "parseHeaderLineToIntVector");
1069                 exit(1);
1070         }
1071 }
1072
1073 //**********************************************************************************************************************
1074
1075
1076                                 
1077