]> git.donarmstrong.com Git - mothur.git/blob - sffinfocommand.cpp
fixed clearcut version bug, added group count output to get.groups and remove.groups
[mothur.git] / sffinfocommand.cpp
1 /*
2  *  sffinfocommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 7/7/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "sffinfocommand.h"
11 #include "endiannessmacros.h"
12
13 //**********************************************************************************************************************
14 vector<string> SffInfoCommand::getValidParameters(){    
15         try {
16                 string Array[] =  {"sff","qfile","fasta","flow","trim","accnos","sfftxt","outputdir","inputdir", "outputdir"};
17                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
18                 return myArray;
19         }
20         catch(exception& e) {
21                 m->errorOut(e, "SffInfoCommand", "getValidParameters");
22                 exit(1);
23         }
24 }
25 //**********************************************************************************************************************
26 SffInfoCommand::SffInfoCommand(){       
27         try {
28                 abort = true; calledHelp = true; 
29                 vector<string> tempOutNames;
30                 outputTypes["fasta"] = tempOutNames;
31                 outputTypes["flow"] = tempOutNames;
32                 outputTypes["sfftxt"] = tempOutNames;
33                 outputTypes["qfile"] = tempOutNames;
34         }
35         catch(exception& e) {
36                 m->errorOut(e, "SffInfoCommand", "SffInfoCommand");
37                 exit(1);
38         }
39 }
40 //**********************************************************************************************************************
41 vector<string> SffInfoCommand::getRequiredParameters(){ 
42         try {
43                 string Array[] =  {"sff", "sfftxt", "or"};
44                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
45                 return myArray;
46         }
47         catch(exception& e) {
48                 m->errorOut(e, "SffInfoCommand", "getRequiredParameters");
49                 exit(1);
50         }
51 }
52 //**********************************************************************************************************************
53 vector<string> SffInfoCommand::getRequiredFiles(){      
54         try {
55                 vector<string> myArray;
56                 return myArray;
57         }
58         catch(exception& e) {
59                 m->errorOut(e, "SffInfoCommand", "getRequiredFiles");
60                 exit(1);
61         }
62 }
63 //**********************************************************************************************************************
64
65 SffInfoCommand::SffInfoCommand(string option)  {
66         try {
67                 abort = false; calledHelp = false;   
68                 hasAccnos = false;
69                 
70                 //allow user to run help
71                 if(option == "help") { help(); abort = true; calledHelp = true; }
72                 
73                 else {
74                         //valid paramters for this command
75                         string Array[] =  {"sff","qfile","fasta","flow","trim","accnos","sfftxt","outputdir","inputdir", "outputdir"};
76                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
77                         
78                         OptionParser parser(option);
79                         map<string, string> parameters = parser.getParameters();
80                         
81                         ValidParameters validParameter;
82                         //check to make sure all parameters are valid for command
83                         for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
84                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
85                         }
86                         
87                         //initialize outputTypes
88                         vector<string> tempOutNames;
89                         outputTypes["fasta"] = tempOutNames;
90                         outputTypes["flow"] = tempOutNames;
91                         outputTypes["sfftxt"] = tempOutNames;
92                         outputTypes["qfile"] = tempOutNames;
93                         
94                         //if the user changes the output directory command factory will send this info to us in the output parameter 
95                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
96                         
97                         //if the user changes the input directory command factory will send this info to us in the output parameter 
98                         string inputDir = validParameter.validFile(parameters, "inputdir", false);        if (inputDir == "not found"){ inputDir = "";          }
99
100                         sffFilename = validParameter.validFile(parameters, "sff", false);
101                         if (sffFilename == "not found") { sffFilename = "";  }
102                         else { 
103                                 m->splitAtDash(sffFilename, filenames);
104                                 
105                                 //go through files and make sure they are good, if not, then disregard them
106                                 for (int i = 0; i < filenames.size(); i++) {
107                                         if (inputDir != "") {
108                                                 string path = m->hasPath(filenames[i]);
109                                                 //if the user has not given a path then, add inputdir. else leave path alone.
110                                                 if (path == "") {       filenames[i] = inputDir + filenames[i];         }
111                                         }
112         
113                                         ifstream in;
114                                         int ableToOpen = m->openInputFile(filenames[i], in, "noerror");
115                                 
116                                         //if you can't open it, try default location
117                                         if (ableToOpen == 1) {
118                                                 if (m->getDefaultPath() != "") { //default path is set
119                                                         string tryPath = m->getDefaultPath() + m->getSimpleName(filenames[i]);
120                                                         m->mothurOut("Unable to open " + filenames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
121                                                         ifstream in2;
122                                                         ableToOpen = m->openInputFile(tryPath, in2, "noerror");
123                                                         in2.close();
124                                                         filenames[i] = tryPath;
125                                                 }
126                                         }
127                                         
128                                         //if you can't open it, try default location
129                                         if (ableToOpen == 1) {
130                                                 if (m->getOutputDir() != "") { //default path is set
131                                                         string tryPath = m->getOutputDir() + m->getSimpleName(filenames[i]);
132                                                         m->mothurOut("Unable to open " + filenames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
133                                                         ifstream in2;
134                                                         ableToOpen = m->openInputFile(tryPath, in2, "noerror");
135                                                         in2.close();
136                                                         filenames[i] = tryPath;
137                                                 }
138                                         }
139                                         
140                                         in.close();
141                                         
142                                         if (ableToOpen == 1) { 
143                                                 m->mothurOut("Unable to open " + filenames[i] + ". It will be disregarded."); m->mothurOutEndLine();
144                                                 //erase from file list
145                                                 filenames.erase(filenames.begin()+i);
146                                                 i--;
147                                         }
148                                 }
149                                 
150                                 //make sure there is at least one valid file left
151                                 if (filenames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; }
152                         }
153                         
154                         accnosName = validParameter.validFile(parameters, "accnos", false);
155                         if (accnosName == "not found") { accnosName = "";  }
156                         else { 
157                                 hasAccnos = true;
158                                 m->splitAtDash(accnosName, accnosFileNames);
159                                 
160                                 //go through files and make sure they are good, if not, then disregard them
161                                 for (int i = 0; i < accnosFileNames.size(); i++) {
162                                         if (inputDir != "") {
163                                                 string path = m->hasPath(accnosFileNames[i]);
164                                                 //if the user has not given a path then, add inputdir. else leave path alone.
165                                                 if (path == "") {       accnosFileNames[i] = inputDir + accnosFileNames[i];             }
166                                         }
167         
168                                         ifstream in;
169                                         int ableToOpen = m->openInputFile(accnosFileNames[i], in, "noerror");
170                                 
171                                         //if you can't open it, try default location
172                                         if (ableToOpen == 1) {
173                                                 if (m->getDefaultPath() != "") { //default path is set
174                                                         string tryPath = m->getDefaultPath() + m->getSimpleName(accnosFileNames[i]);
175                                                         m->mothurOut("Unable to open " + accnosFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
176                                                         ifstream in2;
177                                                         ableToOpen = m->openInputFile(tryPath, in2, "noerror");
178                                                         in2.close();
179                                                         accnosFileNames[i] = tryPath;
180                                                 }
181                                         }
182                                         //if you can't open it, try default location
183                                         if (ableToOpen == 1) {
184                                                 if (m->getOutputDir() != "") { //default path is set
185                                                         string tryPath = m->getOutputDir() + m->getSimpleName(accnosFileNames[i]);
186                                                         m->mothurOut("Unable to open " + accnosFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
187                                                         ifstream in2;
188                                                         ableToOpen = m->openInputFile(tryPath, in2, "noerror");
189                                                         in2.close();
190                                                         accnosFileNames[i] = tryPath;
191                                                 }
192                                         }
193                                         in.close();
194                                         
195                                         if (ableToOpen == 1) { 
196                                                 m->mothurOut("Unable to open " + accnosFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();
197                                                 //erase from file list
198                                                 accnosFileNames.erase(accnosFileNames.begin()+i);
199                                                 i--;
200                                         }
201                                 }
202                                 
203                                 //make sure there is at least one valid file left
204                                 if (accnosFileNames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; }
205                         }
206                         
207                         if (hasAccnos) {
208                                 if (accnosFileNames.size() != filenames.size()) { abort = true; m->mothurOut("If you provide a accnos file, you must have one for each sff file."); m->mothurOutEndLine(); }
209                         }
210                         
211                         string temp = validParameter.validFile(parameters, "qfile", false);                     if (temp == "not found"){       temp = "T";                             }
212                         qual = m->isTrue(temp); 
213                         
214                         temp = validParameter.validFile(parameters, "fasta", false);                            if (temp == "not found"){       temp = "T";                             }
215                         fasta = m->isTrue(temp); 
216                         
217                         temp = validParameter.validFile(parameters, "flow", false);                                     if (temp == "not found"){       temp = "F";                             }
218                         flow = m->isTrue(temp); 
219                         
220                         temp = validParameter.validFile(parameters, "trim", false);                                     if (temp == "not found"){       temp = "T";                             }
221                         trim = m->isTrue(temp); 
222                         
223                         temp = validParameter.validFile(parameters, "sfftxt", false);                           
224                         if (temp == "not found")        {       temp = "F";      sfftxt = false; sfftxtFilename = "";           }
225                         else if (m->isTrue(temp))       {       sfftxt = true;          sfftxtFilename = "";                            }
226                         else {
227                                 //you are a filename
228                                 if (inputDir != "") {
229                                         map<string,string>::iterator it = parameters.find("sfftxt");
230                                         //user has given a template file
231                                         if(it != parameters.end()){ 
232                                                 string path = m->hasPath(it->second);
233                                                 //if the user has not given a path then, add inputdir. else leave path alone.
234                                                 if (path == "") {       parameters["sfftxt"] = inputDir + it->second;           }
235                                         }
236                                 }
237                                 
238                                 sfftxtFilename = validParameter.validFile(parameters, "sfftxt", true);
239                                 if (sfftxtFilename == "not found") { sfftxtFilename = "";  }
240                                 else if (sfftxtFilename == "not open") { sfftxtFilename = "";  }
241                         }
242                         
243                         if ((sfftxtFilename == "") && (filenames.size() == 0)) {  m->mothurOut("[ERROR]: you must provide a valid sff or sfftxt file."); m->mothurOutEndLine(); abort=true; }
244                 }
245         }
246         catch(exception& e) {
247                 m->errorOut(e, "SffInfoCommand", "SffInfoCommand");
248                 exit(1);
249         }
250 }
251 //**********************************************************************************************************************
252
253 void SffInfoCommand::help(){
254         try {
255                 m->mothurOut("The sffinfo command reads a sff file and extracts the sequence data, or you can use it to parse a sfftxt file..\n");
256                 m->mothurOut("The sffinfo command parameters are sff, fasta, qfile, accnos, flow, sfftxt, and trim. sff is required. \n");
257                 m->mothurOut("The sff parameter allows you to enter the sff file you would like to extract data from.  You may enter multiple files by separating them by -'s.\n");
258                 m->mothurOut("The fasta parameter allows you to indicate if you would like a fasta formatted file generated.  Default=True. \n");
259                 m->mothurOut("The qfile parameter allows you to indicate if you would like a quality file generated.  Default=True. \n");
260                 m->mothurOut("The flow parameter allows you to indicate if you would like a flowgram file generated.  Default=False. \n");
261                 m->mothurOut("The sfftxt parameter allows you to indicate if you would like a sff.txt file generated.  Default=False. \n");
262                 m->mothurOut("If you want to parse an existing sfftxt file into flow, fasta and quality file, enter the file name using the sfftxt parameter. \n");
263                 m->mothurOut("The trim parameter allows you to indicate if you would like a sequences and quality scores trimmed to the clipQualLeft and clipQualRight values.  Default=True. \n");
264                 m->mothurOut("The accnos parameter allows you to provide a accnos file containing the names of the sequences you would like extracted. You may enter multiple files by separating them by -'s. \n");
265                 m->mothurOut("Example sffinfo(sff=mySffFile.sff, trim=F).\n");
266                 m->mothurOut("Note: No spaces between parameter labels (i.e. sff), '=' and parameters (i.e.yourSffFileName).\n\n");
267         }
268         catch(exception& e) {
269                 m->errorOut(e, "SffInfoCommand", "help");
270                 exit(1);
271         }
272 }
273 //**********************************************************************************************************************
274
275 SffInfoCommand::~SffInfoCommand(){}
276
277 //**********************************************************************************************************************
278 int SffInfoCommand::execute(){
279         try {
280                 
281                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
282                 
283                 for (int s = 0; s < filenames.size(); s++) {
284                         
285                         if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) {       remove(outputNames[i].c_str());         } return 0; }
286                         
287                         int start = time(NULL);
288                         
289                         m->mothurOut("Extracting info from " + filenames[s] + " ..." ); m->mothurOutEndLine();
290                         
291                         string accnos = "";
292                         if (hasAccnos) { accnos = accnosFileNames[s]; }
293                         
294                         int numReads = extractSffInfo(filenames[s], accnos);
295
296                         m->mothurOut("It took " + toString(time(NULL) - start) + " secs to extract " + toString(numReads) + ".");
297                 }
298                 
299                 if (sfftxtFilename != "") {  parseSffTxt(); }
300                 
301                 if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) {       remove(outputNames[i].c_str());         } return 0; }
302                 
303                 //set fasta file as new current fastafile
304                 string current = "";
305                 itTypes = outputTypes.find("fasta");
306                 if (itTypes != outputTypes.end()) {
307                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
308                 }
309                 
310                 itTypes = outputTypes.find("qfile");
311                 if (itTypes != outputTypes.end()) {
312                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); }
313                 }       
314                 
315                 //report output filenames
316                 m->mothurOutEndLine();
317                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
318                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
319                 m->mothurOutEndLine();
320
321                 return 0;
322         }
323         catch(exception& e) {
324                 m->errorOut(e, "SffInfoCommand", "execute");
325                 exit(1);
326         }
327 }
328 //**********************************************************************************************************************
329 int SffInfoCommand::extractSffInfo(string input, string accnos){
330         try {
331                 
332                 if (outputDir == "") {  outputDir += m->hasPath(input); }
333                 
334                 if (accnos != "")       {  readAccnosFile(accnos);  }
335                 else                            {       seqNames.clear();               }
336
337                 ofstream outSfftxt, outFasta, outQual, outFlow;
338                 string outFastaFileName, outQualFileName;
339                 string sfftxtFileName = outputDir + m->getRootName(m->getSimpleName(input)) + "sff.txt";
340                 string outFlowFileName = outputDir + m->getRootName(m->getSimpleName(input)) + "flow";
341                 if (trim) {
342                         outFastaFileName = outputDir + m->getRootName(m->getSimpleName(input)) + "fasta";
343                         outQualFileName = outputDir + m->getRootName(m->getSimpleName(input)) + "qual";
344                 }else{
345                         outFastaFileName = outputDir + m->getRootName(m->getSimpleName(input)) + "raw.fasta";
346                         outQualFileName = outputDir + m->getRootName(m->getSimpleName(input)) + "raw.qual";
347                 }
348                 
349                 if (sfftxt) { m->openOutputFile(sfftxtFileName, outSfftxt); outSfftxt.setf(ios::fixed, ios::floatfield); outSfftxt.setf(ios::showpoint);  outputNames.push_back(sfftxtFileName);  outputTypes["sfftxt"].push_back(sfftxtFileName); }
350                 if (fasta)      { m->openOutputFile(outFastaFileName, outFasta);        outputNames.push_back(outFastaFileName); outputTypes["fasta"].push_back(outFastaFileName); }
351                 if (qual)       { m->openOutputFile(outQualFileName, outQual);          outputNames.push_back(outQualFileName); outputTypes["qfile"].push_back(outQualFileName);  }
352                 if (flow)       { m->openOutputFile(outFlowFileName, outFlow);          outputNames.push_back(outFlowFileName);  outFlow.setf(ios::fixed, ios::floatfield); outFlow.setf(ios::showpoint); outputTypes["flow"].push_back(outFlowFileName);  }
353                 
354                 ifstream in;
355                 in.open(input.c_str(), ios::binary);
356                 
357                 CommonHeader header; 
358                 readCommonHeader(in, header);
359                 
360                 int count = 0;
361                 
362                 //check magic number and version
363                 if (header.magicNumber != 779314790) { m->mothurOut("Magic Number is not correct, not a valid .sff file"); m->mothurOutEndLine(); return count; }
364                 if (header.version != "0001") { m->mothurOut("Version is not supported, only support version 0001."); m->mothurOutEndLine(); return count; }
365         
366                 //print common header
367                 if (sfftxt) {   printCommonHeader(outSfftxt, header);           }
368                 if (flow)       {       outFlow << header.numFlowsPerRead << endl;      }
369                         
370                 //read through the sff file
371                 while (!in.eof()) {
372                         
373                         bool print = true;
374                         
375                         //read header
376                         Header readheader;
377                         readHeader(in, readheader);
378                         
379                         //read data
380                         seqRead read; 
381                         readSeqData(in, read, header.numFlowsPerRead, readheader.numBases);
382                                 
383                         //if you have provided an accosfile and this seq is not in it, then dont print
384                         if (seqNames.size() != 0) {   if (seqNames.count(readheader.name) == 0) { print = false; }  }
385                         
386                         //print 
387                         if (print) {
388                                 if (sfftxt) { printHeader(outSfftxt, readheader); printSffTxtSeqData(outSfftxt, read, readheader); }
389                                 if (fasta)      {       printFastaSeqData(outFasta, read, readheader);  }
390                                 if (qual)       {       printQualSeqData(outQual, read, readheader);    }
391                                 if (flow)       {       printFlowSeqData(outFlow, read, readheader);    }
392                         }
393                         
394                         count++;
395                 
396                         //report progress
397                         if((count+1) % 10000 == 0){     m->mothurOut(toString(count+1)); m->mothurOutEndLine();         }
398                 
399                         if (m->control_pressed) { count = 0; break;   }
400                         
401                         if (count >= header.numReads) { break; }
402                 }
403                 
404                 //report progress
405                 if (!m->control_pressed) {   if((count) % 10000 != 0){  m->mothurOut(toString(count)); m->mothurOutEndLine();           }  }
406                 
407                 in.close();
408                 
409                 if (sfftxt) {  outSfftxt.close();       }
410                 if (fasta)      {  outFasta.close();    }
411                 if (qual)       {  outQual.close();             }
412                 if (flow)       {  outFlow.close();             }
413                 
414                 return count;
415         }
416         catch(exception& e) {
417                 m->errorOut(e, "SffInfoCommand", "extractSffInfo");
418                 exit(1);
419         }
420 }
421 //**********************************************************************************************************************
422 int SffInfoCommand::readCommonHeader(ifstream& in, CommonHeader& header){
423         try {
424
425                 if (!in.eof()) {
426
427                         //read magic number
428                         char buffer[4];
429                         in.read(buffer, 4);
430                         header.magicNumber = be_int4(*(unsigned int *)(&buffer));
431                 
432                         //read version
433                         char buffer9[4];
434                         in.read(buffer9, 4);
435                         header.version = "";
436                         for (int i = 0; i < 4; i++) {  header.version += toString((int)(buffer9[i])); }
437                                 
438                         //read offset
439                         char buffer2 [8];
440                         in.read(buffer2, 8);
441                         header.indexOffset =  be_int8(*(unsigned long int *)(&buffer2));
442                         
443                         //read index length
444                         char buffer3 [4];
445                         in.read(buffer3, 4);
446                         header.indexLength =  be_int4(*(unsigned int *)(&buffer3));
447                         
448                         //read num reads
449                         char buffer4 [4];
450                         in.read(buffer4, 4);
451                         header.numReads =  be_int4(*(unsigned int *)(&buffer4));
452                                 
453                         //read header length
454                         char buffer5 [2];
455                         in.read(buffer5, 2);
456                         header.headerLength =  be_int2(*(unsigned short *)(&buffer5));
457                                         
458                         //read key length
459                         char buffer6 [2];
460                         in.read(buffer6, 2);
461                         header.keyLength = be_int2(*(unsigned short *)(&buffer6));
462                         
463                         //read number of flow reads
464                         char buffer7 [2];
465                         in.read(buffer7, 2);
466                         header.numFlowsPerRead =  be_int2(*(unsigned short *)(&buffer7));
467                                 
468                         //read format code
469                         char buffer8 [1];
470                         in.read(buffer8, 1);
471                         header.flogramFormatCode = (int)(buffer8[0]);
472                         
473                         //read flow chars
474                         char* tempBuffer = new char[header.numFlowsPerRead];
475                         in.read(&(*tempBuffer), header.numFlowsPerRead); 
476                         header.flowChars = tempBuffer;
477                         if (header.flowChars.length() > header.numFlowsPerRead) { header.flowChars = header.flowChars.substr(0, header.numFlowsPerRead);  }
478                         delete[] tempBuffer;
479                         
480                         //read key
481                         char* tempBuffer2 = new char[header.keyLength];
482                         in.read(&(*tempBuffer2), header.keyLength);
483                         header.keySequence = tempBuffer2;
484                         if (header.keySequence.length() > header.keyLength) { header.keySequence = header.keySequence.substr(0, header.keyLength);  }
485                         delete[] tempBuffer2;
486                                 
487                         /* Pad to 8 chars */
488                         unsigned long int spotInFile = in.tellg();
489                         unsigned long int spot = (spotInFile + 7)& ~7;  // ~ inverts
490                         in.seekg(spot);
491                         
492                 }else{
493                         m->mothurOut("Error reading sff common header."); m->mothurOutEndLine();
494                 }
495
496                 return 0;
497         }
498         catch(exception& e) {
499                 m->errorOut(e, "SffInfoCommand", "readCommonHeader");
500                 exit(1);
501         }
502 }
503 //**********************************************************************************************************************
504 int SffInfoCommand::readHeader(ifstream& in, Header& header){
505         try {
506         
507                 if (!in.eof()) {
508                         
509                         //read header length
510                         char buffer [2];
511                         in.read(buffer, 2);
512                         header.headerLength = be_int2(*(unsigned short *)(&buffer));
513                                                 
514                         //read name length
515                         char buffer2 [2];
516                         in.read(buffer2, 2);
517                         header.nameLength = be_int2(*(unsigned short *)(&buffer2));
518
519                         //read num bases
520                         char buffer3 [4];
521                         in.read(buffer3, 4);
522                         header.numBases =  be_int4(*(unsigned int *)(&buffer3));
523                         
524                         //read clip qual left
525                         char buffer4 [2];
526                         in.read(buffer4, 2);
527                         header.clipQualLeft =  be_int2(*(unsigned short *)(&buffer4));
528                         header.clipQualLeft = 5; 
529                         
530                         //read clip qual right
531                         char buffer5 [2];
532                         in.read(buffer5, 2);
533                         header.clipQualRight =  be_int2(*(unsigned short *)(&buffer5));
534                         
535                         //read clipAdapterLeft
536                         char buffer6 [2];
537                         in.read(buffer6, 2);
538                         header.clipAdapterLeft = be_int2(*(unsigned short *)(&buffer6));
539
540                         //read clipAdapterRight
541                         char buffer7 [2];
542                         in.read(buffer7, 2);
543                         header.clipAdapterRight = be_int2(*(unsigned short *)(&buffer7));
544                 
545                         //read name
546                         char* tempBuffer = new char[header.nameLength];
547                         in.read(&(*tempBuffer), header.nameLength);
548                         header.name = tempBuffer;
549                         if (header.name.length() > header.nameLength) { header.name = header.name.substr(0, header.nameLength);  }
550                         delete[] tempBuffer;
551                         
552                         //extract info from name
553                         decodeName(header.timestamp, header.region, header.xy, header.name);
554                         
555                         /* Pad to 8 chars */
556                         unsigned long int spotInFile = in.tellg();
557                         unsigned long int spot = (spotInFile + 7)& ~7;
558                         in.seekg(spot);
559                         
560                 }else{
561                         m->mothurOut("Error reading sff header info."); m->mothurOutEndLine();
562                 }
563
564                 return 0;
565         }
566         catch(exception& e) {
567                 m->errorOut(e, "SffInfoCommand", "readHeader");
568                 exit(1);
569         }
570 }
571 //**********************************************************************************************************************
572 int SffInfoCommand::readSeqData(ifstream& in, seqRead& read, int numFlowReads, int numBases){
573         try {
574         
575                 if (!in.eof()) {
576         
577                         //read flowgram
578                         read.flowgram.resize(numFlowReads);
579                         for (int i = 0; i < numFlowReads; i++) {  
580                                 char buffer [2];
581                                 in.read(buffer, 2);
582                                 read.flowgram[i] = be_int2(*(unsigned short *)(&buffer));
583                         }
584         
585                         //read flowIndex
586                         read.flowIndex.resize(numBases);
587                         for (int i = 0; i < numBases; i++) {  
588                                 char temp[1];
589                                 in.read(temp, 1);
590                                 read.flowIndex[i] = be_int1(*(unsigned char *)(&temp));
591                         }
592         
593                         //read bases
594                         char* tempBuffer = new char[numBases];
595                         in.read(&(*tempBuffer), numBases);
596                         read.bases = tempBuffer;
597                         if (read.bases.length() > numBases) { read.bases = read.bases.substr(0, numBases);  }
598                         delete[] tempBuffer;
599
600                         //read qual scores
601                         read.qualScores.resize(numBases);
602                         for (int i = 0; i < numBases; i++) {  
603                                 char temp[1];
604                                 in.read(temp, 1);
605                                 read.qualScores[i] = be_int1(*(unsigned char *)(&temp));
606                         }
607         
608                         /* Pad to 8 chars */
609                         unsigned long int spotInFile = in.tellg();
610                         unsigned long int spot = (spotInFile + 7)& ~7;
611                         in.seekg(spot);
612                         
613                 }else{
614                         m->mothurOut("Error reading."); m->mothurOutEndLine();
615                 }
616
617                 return 0;
618         }
619         catch(exception& e) {
620                 m->errorOut(e, "SffInfoCommand", "readSeqData");
621                 exit(1);
622         }
623 }
624 //**********************************************************************************************************************
625 int SffInfoCommand::decodeName(string& timestamp, string& region, string& xy, string name) {
626         try {
627                 
628                 if (name.length() >= 6) {
629                         string time = name.substr(0, 6);
630                         unsigned int timeNum = m->fromBase36(time);
631                         
632                         int q1 = timeNum / 60;
633                         int sec = timeNum - 60 * q1;
634                         int q2 = q1 / 60;
635                         int minute = q1 - 60 * q2;
636                         int q3 = q2 / 24;
637                         int hr = q2 - 24 * q3;
638                         int q4 = q3 / 32;
639                         int day = q3 - 32 * q4;
640                         int q5 = q4 / 13;
641                         int mon = q4 - 13 * q5;
642                         int year = 2000 + q5;
643                 
644                         timestamp = toString(year) + "_" + toString(mon) + "_" + toString(day) + "_" + toString(hr) + "_" + toString(minute) + "_" + toString(sec);
645                 }
646                 
647                 if (name.length() >= 9) {
648                         region = name.substr(7, 2);
649                 
650                         string xyNum = name.substr(9);
651                         unsigned int myXy = m->fromBase36(xyNum);
652                         int x = myXy >> 12;
653                         int y = myXy & 4095;
654                 
655                         xy = toString(x) + "_" + toString(y);
656                 }
657                 
658                 return 0;
659         }
660         catch(exception& e) {
661                 m->errorOut(e, "SffInfoCommand", "decodeName");
662                 exit(1);
663         }
664 }
665 //**********************************************************************************************************************
666 int SffInfoCommand::printCommonHeader(ofstream& out, CommonHeader& header) {
667         try {
668         
669                 out << "Common Header:\nMagic Number: " << header.magicNumber << endl;
670                 out << "Version: " << header.version << endl;
671                 out << "Index Offset: " << header.indexOffset << endl;
672                 out << "Index Length: " << header.indexLength << endl;
673                 out << "Number of Reads: " << header.numReads << endl;
674                 out << "Header Length: " << header.headerLength << endl;
675                 out << "Key Length: " << header.keyLength << endl;
676                 out << "Number of Flows: " << header.numFlowsPerRead << endl;
677                 out << "Format Code: " << header.flogramFormatCode << endl;
678                 out << "Flow Chars: " << header.flowChars << endl;
679                 out << "Key Sequence: " << header.keySequence << endl << endl;
680                         
681                 return 0;
682         }
683         catch(exception& e) {
684                 m->errorOut(e, "SffInfoCommand", "printCommonHeader");
685                 exit(1);
686         }
687 }
688 //**********************************************************************************************************************
689 int SffInfoCommand::printHeader(ofstream& out, Header& header) {
690         try {
691                 
692                 out << ">" << header.name << endl;
693                 out << "Run Prefix: " << header.timestamp << endl;
694                 out << "Region #:  " << header.region << endl;
695                 out << "XY Location: " << header.xy << endl << endl;
696                 
697                 out << "Run Name:  " << endl;
698                 out << "Analysis Name:  " << endl;
699                 out << "Full Path: " << endl << endl;
700                 
701                 out << "Read Header Len: " << header.headerLength << endl;
702                 out << "Name Length: " << header.nameLength << endl;
703                 out << "# of Bases: " << header.numBases << endl;
704                 out << "Clip Qual Left: " << header.clipQualLeft << endl;
705                 out << "Clip Qual Right: " << header.clipQualRight << endl;
706                 out << "Clip Adap Left: " << header.clipAdapterLeft << endl;
707                 out << "Clip Adap Right: " << header.clipAdapterRight << endl << endl;
708                 
709                 return 0;
710         }
711         catch(exception& e) {
712                 m->errorOut(e, "SffInfoCommand", "printHeader");
713                 exit(1);
714         }
715 }
716
717 //**********************************************************************************************************************
718 int SffInfoCommand::printSffTxtSeqData(ofstream& out, seqRead& read, Header& header) {
719         try {
720                 
721                 out << "Flowgram: ";
722                 for (int i = 0; i < read.flowgram.size(); i++) { out << setprecision(2) << (read.flowgram[i]/(float)100) << '\t';  }
723                 
724                 out << endl <<  "Flow Indexes: ";
725                 int sum = 0;
726                 for (int i = 0; i < read.flowIndex.size(); i++) {  sum +=  read.flowIndex[i];  out << sum << '\t'; }
727                 
728                 //make the bases you want to clip lowercase and the bases you want to keep upper case
729                 if(header.clipQualRight == 0){  header.clipQualRight = read.bases.length();     }
730                 for (int i = 0; i < (header.clipQualLeft-1); i++) { read.bases[i] = tolower(read.bases[i]); }
731                 for (int i = (header.clipQualLeft-1); i < (header.clipQualRight-1); i++) {   read.bases[i] = toupper(read.bases[i]);  }
732                 for (int i = (header.clipQualRight-1); i < read.bases.length(); i++) {   read.bases[i] = tolower(read.bases[i]);  }
733                 
734                 out << endl <<  "Bases: " << read.bases << endl << "Quality Scores: ";
735                 for (int i = 0; i < read.qualScores.size(); i++) {   out << read.qualScores[i] << '\t';  }
736         
737                 
738                 out << endl << endl;
739                 
740                 return 0;
741         }
742         catch(exception& e) {
743                 m->errorOut(e, "SffInfoCommand", "printSffTxtSeqData");
744                 exit(1);
745         }
746 }
747 //**********************************************************************************************************************
748 int SffInfoCommand::printFastaSeqData(ofstream& out, seqRead& read, Header& header) {
749         try {
750                 
751                 string seq = read.bases;
752                 
753                 if (trim) {
754                         if(header.clipQualRight < header.clipQualLeft){
755                                 seq = "NNNN";
756                         }
757                         else if((header.clipQualRight != 0) && ((header.clipQualRight-header.clipQualLeft) >= 0)){
758                                 seq = seq.substr((header.clipQualLeft-1), (header.clipQualRight-header.clipQualLeft));
759                         }
760                         else {
761                                 seq = seq.substr(header.clipQualLeft-1);
762                         }
763                 }else{
764                         //if you wanted the sfftxt then you already converted the bases to the right case
765                         if (!sfftxt) {
766                                 //make the bases you want to clip lowercase and the bases you want to keep upper case
767                                 if(header.clipQualRight == 0){  header.clipQualRight = seq.length();    }
768                                 for (int i = 0; i < (header.clipQualLeft-1); i++) { seq[i] = tolower(seq[i]);  }
769                                 for (int i = (header.clipQualLeft-1); i < (header.clipQualRight-1); i++)  {   seq[i] = toupper(seq[i]);  }
770                                 for (int i = (header.clipQualRight-1); i < seq.length(); i++) {   seq[i] = tolower(seq[i]);  }
771                         }
772                 }
773                 
774                 out << ">" << header.name  << " xy=" << header.xy << endl;
775                 out << seq << endl;
776                 
777                 return 0;
778         }
779         catch(exception& e) {
780                 m->errorOut(e, "SffInfoCommand", "printFastaSeqData");
781                 exit(1);
782         }
783 }
784
785 //**********************************************************************************************************************
786 int SffInfoCommand::printQualSeqData(ofstream& out, seqRead& read, Header& header) {
787         try {
788                 
789                 if (trim) {
790                         if(header.clipQualRight < header.clipQualLeft){
791                                 out << "0\t0\t0\t0";
792                         }
793                         else if((header.clipQualRight != 0) && ((header.clipQualRight-header.clipQualLeft) >= 0)){
794                                 out << ">" << header.name << " xy=" << header.xy << " length=" << (header.clipQualRight-header.clipQualLeft) << endl;
795                                 for (int i = (header.clipQualLeft-1); i < (header.clipQualRight-1); i++) {   out << read.qualScores[i] << '\t'; }
796                         }
797                         else{
798                                 out << ">" << header.name << " xy=" << header.xy << " length=" << (header.clipQualRight-header.clipQualLeft) << endl;
799                                 for (int i = (header.clipQualLeft-1); i < read.qualScores.size(); i++) {   out << read.qualScores[i] << '\t';   }                       
800                         }
801                 }else{
802                         out << ">" << header.name << " xy=" << header.xy << " length=" << read.qualScores.size() << endl;
803                         for (int i = 0; i < read.qualScores.size(); i++) {   out << read.qualScores[i] << '\t';  }
804                 }
805                 
806                 out << endl;
807                 
808                 return 0;
809         }
810         catch(exception& e) {
811                 m->errorOut(e, "SffInfoCommand", "printQualSeqData");
812                 exit(1);
813         }
814 }
815
816 //**********************************************************************************************************************
817 int SffInfoCommand::printFlowSeqData(ofstream& out, seqRead& read, Header& header) {
818         try {
819                 if(header.clipQualRight > header.clipQualLeft){
820                         
821                         int rightIndex = 0;
822                         for (int i = 0; i < header.clipQualRight; i++) {  rightIndex +=  read.flowIndex[i];     }
823
824                         out << header.name << ' ' << rightIndex;
825                         for (int i = 0; i < read.flowgram.size(); i++) { out << setprecision(2) << ' ' << (read.flowgram[i]/(float)100);  }
826                         out << endl;
827                 }
828                 
829                 
830                 return 0;
831         }
832         catch(exception& e) {
833                 m->errorOut(e, "SffInfoCommand", "printFlowSeqData");
834                 exit(1);
835         }
836 }
837 //**********************************************************************************************************************
838 int SffInfoCommand::readAccnosFile(string filename) {
839         try {
840                 //remove old names
841                 seqNames.clear();
842                 
843                 ifstream in;
844                 m->openInputFile(filename, in);
845                 string name;
846                 
847                 while(!in.eof()){
848                         in >> name; m->gobble(in);
849                                                 
850                         seqNames.insert(name);
851                         
852                         if (m->control_pressed) { seqNames.clear(); break; }
853                 }
854                 in.close();             
855                 
856                 return 0;
857         }
858         catch(exception& e) {
859                 m->errorOut(e, "SffInfoCommand", "readAccnosFile");
860                 exit(1);
861         }
862 }
863 //**********************************************************************************************************************
864 int SffInfoCommand::parseSffTxt() {
865         try {
866                 
867                 ifstream inSFF;
868                 m->openInputFile(sfftxtFilename, inSFF);
869                 
870                 if (outputDir == "") {  outputDir += m->hasPath(sfftxtFilename); }
871                 
872                 //output file names
873                 ofstream outFasta, outQual, outFlow;
874                 string outFastaFileName, outQualFileName;
875                 string outFlowFileName = outputDir + m->getRootName(m->getSimpleName(sfftxtFilename)) + "flow";
876                 if (trim) {
877                         outFastaFileName = outputDir + m->getRootName(m->getSimpleName(sfftxtFilename)) + "fasta";
878                         outQualFileName = outputDir + m->getRootName(m->getSimpleName(sfftxtFilename)) + "qual";
879                 }else{
880                         outFastaFileName = outputDir + m->getRootName(m->getSimpleName(sfftxtFilename)) + "raw.fasta";
881                         outQualFileName = outputDir + m->getRootName(m->getSimpleName(sfftxtFilename)) + "raw.qual";
882                 }
883                 
884                 if (fasta)      { m->openOutputFile(outFastaFileName, outFasta);        outputNames.push_back(outFastaFileName); outputTypes["fasta"].push_back(outFastaFileName); }
885                 if (qual)       { m->openOutputFile(outQualFileName, outQual);          outputNames.push_back(outQualFileName); outputTypes["qual"].push_back(outQualFileName);  }
886                 if (flow)       { m->openOutputFile(outFlowFileName, outFlow);          outputNames.push_back(outFlowFileName);  outFlow.setf(ios::fixed, ios::floatfield); outFlow.setf(ios::showpoint); outputTypes["flow"].push_back(outFlowFileName);  }
887                 
888                 //read common header
889                 string commonHeader = m->getline(inSFF);
890                 string magicNumber = m->getline(inSFF); 
891                 string version = m->getline(inSFF);
892                 string indexOffset = m->getline(inSFF);
893                 string indexLength = m->getline(inSFF);
894                 int numReads = parseHeaderLineToInt(inSFF);
895                 string headerLength = m->getline(inSFF);
896                 string keyLength = m->getline(inSFF);
897                 int numFlows = parseHeaderLineToInt(inSFF);
898                 string flowgramCode = m->getline(inSFF);
899                 string flowChars = m->getline(inSFF);
900                 string keySequence = m->getline(inSFF);
901                 m->gobble(inSFF);
902                 
903                 string seqName;
904                 
905                 if (flow)       {       outFlow << numFlows << endl;    }
906                 
907                 for(int i=0;i<numReads;i++){
908                         
909                         //sanity check
910                         if (inSFF.eof()) { m->mothurOut("[ERROR]: Expected " + toString(numReads) + " but reached end of file at " + toString(i+1) + "."); m->mothurOutEndLine(); break; }
911                         
912                         Header header;
913                         
914                         //parse read header
915                         inSFF >> seqName;
916                         seqName = seqName.substr(1);
917                         m->gobble(inSFF);
918                         header.name = seqName;
919                         
920                         string runPrefix = parseHeaderLineToString(inSFF);              header.timestamp = runPrefix;
921                         string regionNumber = parseHeaderLineToString(inSFF);   header.region = regionNumber;
922                         string xyLocation = parseHeaderLineToString(inSFF);             header.xy = xyLocation;
923                         m->gobble(inSFF);
924                                 
925                         string runName = parseHeaderLineToString(inSFF);
926                         string analysisName = parseHeaderLineToString(inSFF);
927                         string fullPath = parseHeaderLineToString(inSFF);
928                         m->gobble(inSFF);
929                         
930                         string readHeaderLen = parseHeaderLineToString(inSFF);  convert(readHeaderLen, header.headerLength);
931                         string nameLength = parseHeaderLineToString(inSFF);             convert(nameLength, header.nameLength);
932                         int numBases = parseHeaderLineToInt(inSFF);                             header.numBases = numBases;
933                         string clipQualLeft = parseHeaderLineToString(inSFF);   convert(clipQualLeft, header.clipQualLeft);
934                         int clipQualRight = parseHeaderLineToInt(inSFF);                header.clipQualRight = clipQualRight;
935                         string clipAdapLeft = parseHeaderLineToString(inSFF);   convert(clipAdapLeft, header.clipAdapterLeft);
936                         string clipAdapRight = parseHeaderLineToString(inSFF);  convert(clipAdapRight, header.clipAdapterRight);
937                         m->gobble(inSFF);
938                                 
939                         seqRead read;
940                         
941                         //parse read
942                         vector<unsigned short> flowVector = parseHeaderLineToFloatVector(inSFF, numFlows);      read.flowgram = flowVector;
943                         vector<unsigned int> flowIndices = parseHeaderLineToIntVector(inSFF, numBases); 
944                         
945                         //adjust for print
946                         vector<unsigned int> flowIndicesAdjusted; flowIndicesAdjusted.push_back(flowIndices[0]);
947                         for (int j = 1; j < flowIndices.size(); j++) {   flowIndicesAdjusted.push_back(flowIndices[j] - flowIndices[j-1]);   }
948                         read.flowIndex = flowIndicesAdjusted;
949                         
950                         string bases = parseHeaderLineToString(inSFF);                                                                          read.bases = bases;
951                         vector<unsigned int> qualityScores = parseHeaderLineToIntVector(inSFF, numBases);       read.qualScores = qualityScores;
952                         m->gobble(inSFF);
953                                         
954                         //if you have provided an accosfile and this seq is not in it, then dont print
955                         bool print = true;
956                         if (seqNames.size() != 0) {   if (seqNames.count(header.name) == 0) { print = false; }  }
957                         
958                         //print 
959                         if (print) {
960                                 if (fasta)      {       printFastaSeqData(outFasta, read, header);      }
961                                 if (qual)       {       printQualSeqData(outQual, read, header);        }
962                                 if (flow)       {       printFlowSeqData(outFlow, read, header);        }
963                         }
964                         
965                         //report progress
966                         if((i+1) % 10000 == 0){ m->mothurOut(toString(i+1)); m->mothurOutEndLine();             }
967                         
968                         if (m->control_pressed) {  break;  }
969                 }
970                 
971                 //report progress
972                 if (!m->control_pressed) {   if((numReads) % 10000 != 0){       m->mothurOut(toString(numReads)); m->mothurOutEndLine();                }  }
973                 
974                 inSFF.close();
975                 
976                 if (fasta)      {  outFasta.close();    }
977                 if (qual)       {  outQual.close();             }
978                 if (flow)       {  outFlow.close();             }
979                 
980                 return 0;
981         }
982         catch(exception& e) {
983                 m->errorOut(e, "SffInfoCommand", "parseSffTxt");
984                 exit(1);
985         }
986 }
987 //**********************************************************************************************************************
988
989 int SffInfoCommand::parseHeaderLineToInt(ifstream& file){
990         try {
991                 int number;
992                 
993                 while (!file.eof())     {
994                         
995                         char c = file.get(); 
996                         if (c == ':'){
997                                 file >> number;
998                                 break;
999                         }
1000                         
1001                 }
1002                 m->gobble(file);
1003                 return number;
1004         }
1005         catch(exception& e) {
1006                 m->errorOut(e, "SffInfoCommand", "parseHeaderLineToInt");
1007                 exit(1);
1008         }
1009         
1010 }
1011
1012 //**********************************************************************************************************************
1013
1014 string SffInfoCommand::parseHeaderLineToString(ifstream& file){
1015         try {
1016                 string text;
1017                 
1018                 while (!file.eof())     {
1019                         char c = file.get(); 
1020                         
1021                         if (c == ':'){
1022                                 //m->gobble(file);
1023                                 //text = m->getline(file);      
1024                                 file >> text;
1025                                 break;
1026                         }
1027                 }
1028                 m->gobble(file);
1029                 
1030                 return text;
1031         }
1032         catch(exception& e) {
1033                 m->errorOut(e, "SffInfoCommand", "parseHeaderLineToString");
1034                 exit(1);
1035         }
1036 }
1037
1038 //**********************************************************************************************************************
1039
1040 vector<unsigned short> SffInfoCommand::parseHeaderLineToFloatVector(ifstream& file, int length){
1041         try {
1042                 vector<unsigned short> floatVector(length);
1043                 
1044                 while (!file.eof())     {
1045                         char c = file.get(); 
1046                         if (c == ':'){
1047                                 float temp;
1048                                 for(int i=0;i<length;i++){
1049                                         file >> temp;
1050                                         floatVector[i] = temp * 100;
1051                                 }
1052                                 break;
1053                         }
1054                 }
1055                 m->gobble(file);        
1056                 return floatVector;
1057         }
1058         catch(exception& e) {
1059                 m->errorOut(e, "SffInfoCommand", "parseHeaderLineToFloatVector");
1060                 exit(1);
1061         }
1062 }
1063
1064 //**********************************************************************************************************************
1065
1066 vector<unsigned int> SffInfoCommand::parseHeaderLineToIntVector(ifstream& file, int length){
1067         try {
1068                 vector<unsigned int> intVector(length);
1069                 
1070                 while (!file.eof())     {
1071                         char c = file.get(); 
1072                         if (c == ':'){
1073                                 for(int i=0;i<length;i++){
1074                                         file >> intVector[i];
1075                                 }
1076                                 break;
1077                         }
1078                 }
1079                 m->gobble(file);        
1080                 return intVector;
1081         }
1082         catch(exception& e) {
1083                 m->errorOut(e, "SffInfoCommand", "parseHeaderLineToIntVector");
1084                 exit(1);
1085         }
1086 }
1087
1088 //**********************************************************************************************************************
1089
1090
1091                                 
1092