]> git.donarmstrong.com Git - mothur.git/blob - sffinfocommand.cpp
added current as option is lists of file names, processors now outputted with current...
[mothur.git] / sffinfocommand.cpp
1 /*
2  *  sffinfocommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 7/7/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "sffinfocommand.h"
11 #include "endiannessmacros.h"
12
13 //**********************************************************************************************************************
14 vector<string> SffInfoCommand::setParameters(){ 
15         try {           
16                 CommandParameter psff("sff", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(psff);
17                 CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(paccnos);
18                 CommandParameter psfftxt("sfftxt", "String", "", "", "", "", "",false,false); parameters.push_back(psfftxt);
19                 CommandParameter pflow("flow", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pflow);
20                 CommandParameter ptrim("trim", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(ptrim);
21                 CommandParameter pfasta("fasta", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pfasta);
22                 CommandParameter pqfile("name", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pqfile);
23                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
24                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
25                 
26                 vector<string> myArray;
27                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
28                 return myArray;
29         }
30         catch(exception& e) {
31                 m->errorOut(e, "SffInfoCommand", "setParameters");
32                 exit(1);
33         }
34 }
35 //**********************************************************************************************************************
36 string SffInfoCommand::getHelpString(){ 
37         try {
38                 string helpString = "";
39                 helpString += "The sffinfo command reads a sff file and extracts the sequence data, or you can use it to parse a sfftxt file.\n";
40                 helpString += "The sffinfo command parameters are sff, fasta, qfile, accnos, flow, sfftxt, and trim. sff is required. \n";
41                 helpString += "The sff parameter allows you to enter the sff file you would like to extract data from.  You may enter multiple files by separating them by -'s.\n";
42                 helpString += "The fasta parameter allows you to indicate if you would like a fasta formatted file generated.  Default=True. \n";
43                 helpString += "The qfile parameter allows you to indicate if you would like a quality file generated.  Default=True. \n";
44                 helpString += "The flow parameter allows you to indicate if you would like a flowgram file generated.  Default=False. \n";
45                 helpString += "The sfftxt parameter allows you to indicate if you would like a sff.txt file generated.  Default=False. \n";
46                 helpString += "If you want to parse an existing sfftxt file into flow, fasta and quality file, enter the file name using the sfftxt parameter. \n";
47                 helpString += "The trim parameter allows you to indicate if you would like a sequences and quality scores trimmed to the clipQualLeft and clipQualRight values.  Default=True. \n";
48                 helpString += "The accnos parameter allows you to provide a accnos file containing the names of the sequences you would like extracted. You may enter multiple files by separating them by -'s. \n";
49                 helpString += "Example sffinfo(sff=mySffFile.sff, trim=F).\n";
50                 helpString += "Note: No spaces between parameter labels (i.e. sff), '=' and parameters (i.e.yourSffFileName).\n";
51                 return helpString;
52         }
53         catch(exception& e) {
54                 m->errorOut(e, "SffInfoCommand", "getHelpString");
55                 exit(1);
56         }
57 }
58
59
60 //**********************************************************************************************************************
61 SffInfoCommand::SffInfoCommand(){       
62         try {
63                 abort = true; calledHelp = true; 
64                 setParameters();
65                 vector<string> tempOutNames;
66                 outputTypes["fasta"] = tempOutNames;
67                 outputTypes["flow"] = tempOutNames;
68                 outputTypes["sfftxt"] = tempOutNames;
69                 outputTypes["qfile"] = tempOutNames;
70         }
71         catch(exception& e) {
72                 m->errorOut(e, "SffInfoCommand", "SffInfoCommand");
73                 exit(1);
74         }
75 }
76 //**********************************************************************************************************************
77
78 SffInfoCommand::SffInfoCommand(string option)  {
79         try {
80                 abort = false; calledHelp = false;   
81                 hasAccnos = false;
82                 
83                 //allow user to run help
84                 if(option == "help") { help(); abort = true; calledHelp = true; }
85                 
86                 else {
87                         //valid paramters for this command
88                         vector<string> myArray = setParameters();
89                         
90                         OptionParser parser(option);
91                         map<string, string> parameters = parser.getParameters();
92                         
93                         ValidParameters validParameter;
94                         //check to make sure all parameters are valid for command
95                         for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
96                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
97                         }
98                         
99                         //initialize outputTypes
100                         vector<string> tempOutNames;
101                         outputTypes["fasta"] = tempOutNames;
102                         outputTypes["flow"] = tempOutNames;
103                         outputTypes["sfftxt"] = tempOutNames;
104                         outputTypes["qfile"] = tempOutNames;
105                         
106                         //if the user changes the output directory command factory will send this info to us in the output parameter 
107                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
108                         
109                         //if the user changes the input directory command factory will send this info to us in the output parameter 
110                         string inputDir = validParameter.validFile(parameters, "inputdir", false);        if (inputDir == "not found"){ inputDir = "";          }
111
112                         sffFilename = validParameter.validFile(parameters, "sff", false);
113                         if (sffFilename == "not found") { sffFilename = "";  }
114                         else { 
115                                 m->splitAtDash(sffFilename, filenames);
116                                 
117                                 //go through files and make sure they are good, if not, then disregard them
118                                 for (int i = 0; i < filenames.size(); i++) {
119                                         bool ignore = false;
120                                         if (filenames[i] == "current") { 
121                                                 filenames[i] = m->getSFFFile(); 
122                                                 if (filenames[i] != "") {  m->mothurOut("Using " + filenames[i] + " as input file for the sff parameter where you had given current."); m->mothurOutEndLine(); }
123                                                 else {  
124                                                         m->mothurOut("You have no current sfffile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
125                                                         //erase from file list
126                                                         filenames.erase(filenames.begin()+i);
127                                                         i--;
128                                                 }
129                                         }
130                                         
131                                         if (!ignore) {
132                                                 if (inputDir != "") {
133                                                         string path = m->hasPath(filenames[i]);
134                                                         //if the user has not given a path then, add inputdir. else leave path alone.
135                                                         if (path == "") {       filenames[i] = inputDir + filenames[i];         }
136                                                 }
137                 
138                                                 ifstream in;
139                                                 int ableToOpen = m->openInputFile(filenames[i], in, "noerror");
140                                         
141                                                 //if you can't open it, try default location
142                                                 if (ableToOpen == 1) {
143                                                         if (m->getDefaultPath() != "") { //default path is set
144                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(filenames[i]);
145                                                                 m->mothurOut("Unable to open " + filenames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
146                                                                 ifstream in2;
147                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
148                                                                 in2.close();
149                                                                 filenames[i] = tryPath;
150                                                         }
151                                                 }
152                                                 
153                                                 //if you can't open it, try default location
154                                                 if (ableToOpen == 1) {
155                                                         if (m->getOutputDir() != "") { //default path is set
156                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(filenames[i]);
157                                                                 m->mothurOut("Unable to open " + filenames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
158                                                                 ifstream in2;
159                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
160                                                                 in2.close();
161                                                                 filenames[i] = tryPath;
162                                                         }
163                                                 }
164                                                 
165                                                 in.close();
166                                                 
167                                                 if (ableToOpen == 1) { 
168                                                         m->mothurOut("Unable to open " + filenames[i] + ". It will be disregarded."); m->mothurOutEndLine();
169                                                         //erase from file list
170                                                         filenames.erase(filenames.begin()+i);
171                                                         i--;
172                                                 }
173                                         }
174                                 }
175                                 
176                                 //make sure there is at least one valid file left
177                                 if (filenames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; }
178                         }
179                         
180                         accnosName = validParameter.validFile(parameters, "accnos", false);
181                         if (accnosName == "not found") { accnosName = "";  }
182                         else { 
183                                 hasAccnos = true;
184                                 m->splitAtDash(accnosName, accnosFileNames);
185                                 
186                                 //go through files and make sure they are good, if not, then disregard them
187                                 for (int i = 0; i < accnosFileNames.size(); i++) {
188                                         bool ignore = false;
189                                         if (accnosFileNames[i] == "current") { 
190                                                 accnosFileNames[i] = m->getAccnosFile(); 
191                                                 if (accnosFileNames[i] != "") {  m->mothurOut("Using " + accnosFileNames[i] + " as input file for the accnos parameter where you had given current."); m->mothurOutEndLine(); }
192                                                 else {  
193                                                         m->mothurOut("You have no current accnosfile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
194                                                         //erase from file list
195                                                         accnosFileNames.erase(accnosFileNames.begin()+i);
196                                                         i--;
197                                                 }
198                                         }
199                                         
200                                         if (!ignore) {
201                                         
202                                                 if (inputDir != "") {
203                                                         string path = m->hasPath(accnosFileNames[i]);
204                                                         //if the user has not given a path then, add inputdir. else leave path alone.
205                                                         if (path == "") {       accnosFileNames[i] = inputDir + accnosFileNames[i];             }
206                                                 }
207                 
208                                                 ifstream in;
209                                                 int ableToOpen = m->openInputFile(accnosFileNames[i], in, "noerror");
210                                         
211                                                 //if you can't open it, try default location
212                                                 if (ableToOpen == 1) {
213                                                         if (m->getDefaultPath() != "") { //default path is set
214                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(accnosFileNames[i]);
215                                                                 m->mothurOut("Unable to open " + accnosFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
216                                                                 ifstream in2;
217                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
218                                                                 in2.close();
219                                                                 accnosFileNames[i] = tryPath;
220                                                         }
221                                                 }
222                                                 //if you can't open it, try default location
223                                                 if (ableToOpen == 1) {
224                                                         if (m->getOutputDir() != "") { //default path is set
225                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(accnosFileNames[i]);
226                                                                 m->mothurOut("Unable to open " + accnosFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
227                                                                 ifstream in2;
228                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
229                                                                 in2.close();
230                                                                 accnosFileNames[i] = tryPath;
231                                                         }
232                                                 }
233                                                 in.close();
234                                                 
235                                                 if (ableToOpen == 1) { 
236                                                         m->mothurOut("Unable to open " + accnosFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();
237                                                         //erase from file list
238                                                         accnosFileNames.erase(accnosFileNames.begin()+i);
239                                                         i--;
240                                                 }
241                                         }
242                                 }
243                                 
244                                 //make sure there is at least one valid file left
245                                 if (accnosFileNames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; }
246                         }
247                         
248                         if (hasAccnos) {
249                                 if (accnosFileNames.size() != filenames.size()) { abort = true; m->mothurOut("If you provide a accnos file, you must have one for each sff file."); m->mothurOutEndLine(); }
250                         }
251                         
252                         string temp = validParameter.validFile(parameters, "qfile", false);                     if (temp == "not found"){       temp = "T";                             }
253                         qual = m->isTrue(temp); 
254                         
255                         temp = validParameter.validFile(parameters, "fasta", false);                            if (temp == "not found"){       temp = "T";                             }
256                         fasta = m->isTrue(temp); 
257                         
258                         temp = validParameter.validFile(parameters, "flow", false);                                     if (temp == "not found"){       temp = "F";                             }
259                         flow = m->isTrue(temp); 
260                         
261                         temp = validParameter.validFile(parameters, "trim", false);                                     if (temp == "not found"){       temp = "T";                             }
262                         trim = m->isTrue(temp); 
263                         
264                         temp = validParameter.validFile(parameters, "sfftxt", false);                           
265                         if (temp == "not found")        {       temp = "F";      sfftxt = false; sfftxtFilename = "";           }
266                         else if (m->isTrue(temp))       {       sfftxt = true;          sfftxtFilename = "";                            }
267                         else {
268                                 //you are a filename
269                                 if (inputDir != "") {
270                                         map<string,string>::iterator it = parameters.find("sfftxt");
271                                         //user has given a template file
272                                         if(it != parameters.end()){ 
273                                                 string path = m->hasPath(it->second);
274                                                 //if the user has not given a path then, add inputdir. else leave path alone.
275                                                 if (path == "") {       parameters["sfftxt"] = inputDir + it->second;           }
276                                         }
277                                 }
278                                 
279                                 sfftxtFilename = validParameter.validFile(parameters, "sfftxt", true);
280                                 if (sfftxtFilename == "not found") { sfftxtFilename = "";  }
281                                 else if (sfftxtFilename == "not open") { sfftxtFilename = "";  }
282                         }
283                         
284                         if ((sfftxtFilename == "") && (filenames.size() == 0)) {  
285                                 //if there is a current fasta file, use it
286                                 string filename = m->getSFFFile(); 
287                                 if (filename != "") { filenames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the sff parameter."); m->mothurOutEndLine(); }
288                                 else {  m->mothurOut("[ERROR]: you must provide a valid sff or sfftxt file."); m->mothurOutEndLine(); abort=true;  }
289                         }
290                 }
291         }
292         catch(exception& e) {
293                 m->errorOut(e, "SffInfoCommand", "SffInfoCommand");
294                 exit(1);
295         }
296 }
297 //**********************************************************************************************************************
298 int SffInfoCommand::execute(){
299         try {
300                 
301                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
302                 
303                 for (int s = 0; s < filenames.size(); s++) {
304                         
305                         if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) {       remove(outputNames[i].c_str());         } return 0; }
306                         
307                         int start = time(NULL);
308                         
309                         m->mothurOut("Extracting info from " + filenames[s] + " ..." ); m->mothurOutEndLine();
310                         
311                         string accnos = "";
312                         if (hasAccnos) { accnos = accnosFileNames[s]; }
313                         
314                         int numReads = extractSffInfo(filenames[s], accnos);
315
316                         m->mothurOut("It took " + toString(time(NULL) - start) + " secs to extract " + toString(numReads) + ".");
317                 }
318                 
319                 if (sfftxtFilename != "") {  parseSffTxt(); }
320                 
321                 if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) {       remove(outputNames[i].c_str());         } return 0; }
322                 
323                 //set fasta file as new current fastafile
324                 string current = "";
325                 itTypes = outputTypes.find("fasta");
326                 if (itTypes != outputTypes.end()) {
327                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
328                 }
329                 
330                 itTypes = outputTypes.find("qfile");
331                 if (itTypes != outputTypes.end()) {
332                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); }
333                 }
334                 
335                 itTypes = outputTypes.find("flow");
336                 if (itTypes != outputTypes.end()) {
337                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFlowFile(current); }
338                 }
339                 
340                 //report output filenames
341                 m->mothurOutEndLine();
342                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
343                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
344                 m->mothurOutEndLine();
345
346                 return 0;
347         }
348         catch(exception& e) {
349                 m->errorOut(e, "SffInfoCommand", "execute");
350                 exit(1);
351         }
352 }
353 //**********************************************************************************************************************
354 int SffInfoCommand::extractSffInfo(string input, string accnos){
355         try {
356                 
357                 if (outputDir == "") {  outputDir += m->hasPath(input); }
358                 
359                 if (accnos != "")       {  readAccnosFile(accnos);  }
360                 else                            {       seqNames.clear();               }
361
362                 ofstream outSfftxt, outFasta, outQual, outFlow;
363                 string outFastaFileName, outQualFileName;
364                 string sfftxtFileName = outputDir + m->getRootName(m->getSimpleName(input)) + "sff.txt";
365                 string outFlowFileName = outputDir + m->getRootName(m->getSimpleName(input)) + "flow";
366                 if (trim) {
367                         outFastaFileName = outputDir + m->getRootName(m->getSimpleName(input)) + "fasta";
368                         outQualFileName = outputDir + m->getRootName(m->getSimpleName(input)) + "qual";
369                 }else{
370                         outFastaFileName = outputDir + m->getRootName(m->getSimpleName(input)) + "raw.fasta";
371                         outQualFileName = outputDir + m->getRootName(m->getSimpleName(input)) + "raw.qual";
372                 }
373                 
374                 if (sfftxt) { m->openOutputFile(sfftxtFileName, outSfftxt); outSfftxt.setf(ios::fixed, ios::floatfield); outSfftxt.setf(ios::showpoint);  outputNames.push_back(sfftxtFileName);  outputTypes["sfftxt"].push_back(sfftxtFileName); }
375                 if (fasta)      { m->openOutputFile(outFastaFileName, outFasta);        outputNames.push_back(outFastaFileName); outputTypes["fasta"].push_back(outFastaFileName); }
376                 if (qual)       { m->openOutputFile(outQualFileName, outQual);          outputNames.push_back(outQualFileName); outputTypes["qfile"].push_back(outQualFileName);  }
377                 if (flow)       { m->openOutputFile(outFlowFileName, outFlow);          outputNames.push_back(outFlowFileName);  outFlow.setf(ios::fixed, ios::floatfield); outFlow.setf(ios::showpoint); outputTypes["flow"].push_back(outFlowFileName);  }
378                 
379                 ifstream in;
380                 in.open(input.c_str(), ios::binary);
381                 
382                 CommonHeader header; 
383                 readCommonHeader(in, header);
384         
385                 int count = 0;
386                 mycount = 0;
387                 
388                 //check magic number and version
389                 if (header.magicNumber != 779314790) { m->mothurOut("Magic Number is not correct, not a valid .sff file"); m->mothurOutEndLine(); return count; }
390                 if (header.version != "0001") { m->mothurOut("Version is not supported, only support version 0001."); m->mothurOutEndLine(); return count; }
391         
392                 //print common header
393                 if (sfftxt) {   printCommonHeader(outSfftxt, header);           }
394                 if (flow)       {       outFlow << header.numFlowsPerRead << endl;      }
395                         
396                 //read through the sff file
397                 while (!in.eof()) {
398                         
399                         bool print = true;
400                         
401                         //read header
402                         Header readheader;
403                         readHeader(in, readheader);
404                         
405                         //read data
406                         seqRead read; 
407                         readSeqData(in, read, header.numFlowsPerRead, readheader.numBases);
408                                 
409                         //if you have provided an accosfile and this seq is not in it, then dont print
410                         if (seqNames.size() != 0) {   if (seqNames.count(readheader.name) == 0) { print = false; }  }
411                         
412                         //print 
413                         if (print) {
414                                 if (sfftxt) { printHeader(outSfftxt, readheader); printSffTxtSeqData(outSfftxt, read, readheader); }
415                                 if (fasta)      {       printFastaSeqData(outFasta, read, readheader);  }
416                                 if (qual)       {       printQualSeqData(outQual, read, readheader);    }
417                                 if (flow)       {       printFlowSeqData(outFlow, read, readheader);    }
418                         }
419                         
420                         count++;
421                         mycount++;
422                 
423                         //report progress
424                         if((count+1) % 10000 == 0){     m->mothurOut(toString(count+1)); m->mothurOutEndLine();         }
425                 
426                         if (m->control_pressed) { count = 0; break;   }
427                         
428                         if (count >= header.numReads) { break; }
429                 }
430                 
431                 //report progress
432                 if (!m->control_pressed) {   if((count) % 10000 != 0){  m->mothurOut(toString(count)); m->mothurOutEndLine();           }  }
433                 
434                 in.close();
435                 
436                 if (sfftxt) {  outSfftxt.close();       }
437                 if (fasta)      {  outFasta.close();    }
438                 if (qual)       {  outQual.close();             }
439                 if (flow)       {  outFlow.close();             }
440                 
441                 return count;
442         }
443         catch(exception& e) {
444                 m->errorOut(e, "SffInfoCommand", "extractSffInfo");
445                 exit(1);
446         }
447 }
448 //**********************************************************************************************************************
449 int SffInfoCommand::readCommonHeader(ifstream& in, CommonHeader& header){
450         try {
451
452                 if (!in.eof()) {
453
454                         //read magic number
455                         char buffer[4];
456                         in.read(buffer, 4);
457                         header.magicNumber = be_int4(*(unsigned int *)(&buffer));
458                 
459                         //read version
460                         char buffer9[4];
461                         in.read(buffer9, 4);
462                         header.version = "";
463                         for (int i = 0; i < 4; i++) {  header.version += toString((int)(buffer9[i])); }
464                                 
465                         //read offset
466                         char buffer2 [8];
467                         in.read(buffer2, 8);
468                         header.indexOffset =  be_int8(*(unsigned long int *)(&buffer2));
469                         
470                         //read index length
471                         char buffer3 [4];
472                         in.read(buffer3, 4);
473                         header.indexLength =  be_int4(*(unsigned int *)(&buffer3));
474                         
475                         //read num reads
476                         char buffer4 [4];
477                         in.read(buffer4, 4);
478                         header.numReads =  be_int4(*(unsigned int *)(&buffer4));
479                                 
480                         //read header length
481                         char buffer5 [2];
482                         in.read(buffer5, 2);
483                         header.headerLength =  be_int2(*(unsigned short *)(&buffer5));
484                                         
485                         //read key length
486                         char buffer6 [2];
487                         in.read(buffer6, 2);
488                         header.keyLength = be_int2(*(unsigned short *)(&buffer6));
489                         
490                         //read number of flow reads
491                         char buffer7 [2];
492                         in.read(buffer7, 2);
493                         header.numFlowsPerRead =  be_int2(*(unsigned short *)(&buffer7));
494                                 
495                         //read format code
496                         char buffer8 [1];
497                         in.read(buffer8, 1);
498                         header.flogramFormatCode = (int)(buffer8[0]);
499                         
500                         //read flow chars
501                         char* tempBuffer = new char[header.numFlowsPerRead];
502                         in.read(&(*tempBuffer), header.numFlowsPerRead); 
503                         header.flowChars = tempBuffer;
504                         if (header.flowChars.length() > header.numFlowsPerRead) { header.flowChars = header.flowChars.substr(0, header.numFlowsPerRead);  }
505                         delete[] tempBuffer;
506                         
507                         //read key
508                         char* tempBuffer2 = new char[header.keyLength];
509                         in.read(&(*tempBuffer2), header.keyLength);
510                         header.keySequence = tempBuffer2;
511                         if (header.keySequence.length() > header.keyLength) { header.keySequence = header.keySequence.substr(0, header.keyLength);  }
512                         delete[] tempBuffer2;
513                                 
514                         /* Pad to 8 chars */
515                         unsigned long int spotInFile = in.tellg();
516                         unsigned long int spot = (spotInFile + 7)& ~7;  // ~ inverts
517                         in.seekg(spot);
518                         
519                 }else{
520                         m->mothurOut("Error reading sff common header."); m->mothurOutEndLine();
521                 }
522
523                 return 0;
524         }
525         catch(exception& e) {
526                 m->errorOut(e, "SffInfoCommand", "readCommonHeader");
527                 exit(1);
528         }
529 }
530 //**********************************************************************************************************************
531 int SffInfoCommand::readHeader(ifstream& in, Header& header){
532         try {
533         
534                 if (!in.eof()) {
535                         
536                         //read header length
537                         char buffer [2];
538                         in.read(buffer, 2);
539                         header.headerLength = be_int2(*(unsigned short *)(&buffer));
540                                                 
541                         //read name length
542                         char buffer2 [2];
543                         in.read(buffer2, 2);
544                         header.nameLength = be_int2(*(unsigned short *)(&buffer2));
545
546                         //read num bases
547                         char buffer3 [4];
548                         in.read(buffer3, 4);
549                         header.numBases =  be_int4(*(unsigned int *)(&buffer3));
550                         
551                         //read clip qual left
552                         char buffer4 [2];
553                         in.read(buffer4, 2);
554                         header.clipQualLeft =  be_int2(*(unsigned short *)(&buffer4));
555                         header.clipQualLeft = 5; 
556                         
557                         //read clip qual right
558                         char buffer5 [2];
559                         in.read(buffer5, 2);
560                         header.clipQualRight =  be_int2(*(unsigned short *)(&buffer5));
561                         
562                         //read clipAdapterLeft
563                         char buffer6 [2];
564                         in.read(buffer6, 2);
565                         header.clipAdapterLeft = be_int2(*(unsigned short *)(&buffer6));
566
567                         //read clipAdapterRight
568                         char buffer7 [2];
569                         in.read(buffer7, 2);
570                         header.clipAdapterRight = be_int2(*(unsigned short *)(&buffer7));
571                 
572                         //read name
573                         char* tempBuffer = new char[header.nameLength];
574                         in.read(&(*tempBuffer), header.nameLength);
575                         header.name = tempBuffer;
576                         if (header.name.length() > header.nameLength) { header.name = header.name.substr(0, header.nameLength);  }
577                         delete[] tempBuffer;
578                         
579                         //extract info from name
580                         decodeName(header.timestamp, header.region, header.xy, header.name);
581                         
582                         /* Pad to 8 chars */
583                         unsigned long int spotInFile = in.tellg();
584                         unsigned long int spot = (spotInFile + 7)& ~7;
585                         in.seekg(spot);
586                         
587                 }else{
588                         m->mothurOut("Error reading sff header info."); m->mothurOutEndLine();
589                 }
590
591                 return 0;
592         }
593         catch(exception& e) {
594                 m->errorOut(e, "SffInfoCommand", "readHeader");
595                 exit(1);
596         }
597 }
598 //**********************************************************************************************************************
599 int SffInfoCommand::readSeqData(ifstream& in, seqRead& read, int numFlowReads, int numBases){
600         try {
601         
602                 if (!in.eof()) {
603         
604                         //read flowgram
605                         read.flowgram.resize(numFlowReads);
606                         for (int i = 0; i < numFlowReads; i++) {  
607                                 char buffer [2];
608                                 in.read(buffer, 2);
609                                 read.flowgram[i] = be_int2(*(unsigned short *)(&buffer));
610                         }
611         
612                         //read flowIndex
613                         read.flowIndex.resize(numBases);
614                         for (int i = 0; i < numBases; i++) {  
615                                 char temp[1];
616                                 in.read(temp, 1);
617                                 read.flowIndex[i] = be_int1(*(unsigned char *)(&temp));
618                         }
619         
620                         //read bases
621                         char* tempBuffer = new char[numBases];
622                         in.read(&(*tempBuffer), numBases);
623                         read.bases = tempBuffer;
624                         if (read.bases.length() > numBases) { read.bases = read.bases.substr(0, numBases);  }
625                         delete[] tempBuffer;
626
627                         //read qual scores
628                         read.qualScores.resize(numBases);
629                         for (int i = 0; i < numBases; i++) {  
630                                 char temp[1];
631                                 in.read(temp, 1);
632                                 read.qualScores[i] = be_int1(*(unsigned char *)(&temp));
633                         }
634         
635                         /* Pad to 8 chars */
636                         unsigned long int spotInFile = in.tellg();
637                         unsigned long int spot = (spotInFile + 7)& ~7;
638                         in.seekg(spot);
639                         
640                 }else{
641                         m->mothurOut("Error reading."); m->mothurOutEndLine();
642                 }
643
644                 return 0;
645         }
646         catch(exception& e) {
647                 m->errorOut(e, "SffInfoCommand", "readSeqData");
648                 exit(1);
649         }
650 }
651 //**********************************************************************************************************************
652 int SffInfoCommand::decodeName(string& timestamp, string& region, string& xy, string name) {
653         try {
654                 
655                 if (name.length() >= 6) {
656                         string time = name.substr(0, 6);
657                         unsigned int timeNum = m->fromBase36(time);
658                         
659                         int q1 = timeNum / 60;
660                         int sec = timeNum - 60 * q1;
661                         int q2 = q1 / 60;
662                         int minute = q1 - 60 * q2;
663                         int q3 = q2 / 24;
664                         int hr = q2 - 24 * q3;
665                         int q4 = q3 / 32;
666                         int day = q3 - 32 * q4;
667                         int q5 = q4 / 13;
668                         int mon = q4 - 13 * q5;
669                         int year = 2000 + q5;
670                 
671                         timestamp = toString(year) + "_" + toString(mon) + "_" + toString(day) + "_" + toString(hr) + "_" + toString(minute) + "_" + toString(sec);
672                 }
673                 
674                 if (name.length() >= 9) {
675                         region = name.substr(7, 2);
676                 
677                         string xyNum = name.substr(9);
678                         unsigned int myXy = m->fromBase36(xyNum);
679                         int x = myXy >> 12;
680                         int y = myXy & 4095;
681                 
682                         xy = toString(x) + "_" + toString(y);
683                 }
684                 
685                 return 0;
686         }
687         catch(exception& e) {
688                 m->errorOut(e, "SffInfoCommand", "decodeName");
689                 exit(1);
690         }
691 }
692 //**********************************************************************************************************************
693 int SffInfoCommand::printCommonHeader(ofstream& out, CommonHeader& header) {
694         try {
695         
696                 out << "Common Header:\nMagic Number: " << header.magicNumber << endl;
697                 out << "Version: " << header.version << endl;
698                 out << "Index Offset: " << header.indexOffset << endl;
699                 out << "Index Length: " << header.indexLength << endl;
700                 out << "Number of Reads: " << header.numReads << endl;
701                 out << "Header Length: " << header.headerLength << endl;
702                 out << "Key Length: " << header.keyLength << endl;
703                 out << "Number of Flows: " << header.numFlowsPerRead << endl;
704                 out << "Format Code: " << header.flogramFormatCode << endl;
705                 out << "Flow Chars: " << header.flowChars << endl;
706                 out << "Key Sequence: " << header.keySequence << endl << endl;
707                         
708                 return 0;
709         }
710         catch(exception& e) {
711                 m->errorOut(e, "SffInfoCommand", "printCommonHeader");
712                 exit(1);
713         }
714 }
715 //**********************************************************************************************************************
716 int SffInfoCommand::printHeader(ofstream& out, Header& header) {
717         try {
718                 
719                 out << ">" << header.name << endl;
720                 out << "Run Prefix: " << header.timestamp << endl;
721                 out << "Region #:  " << header.region << endl;
722                 out << "XY Location: " << header.xy << endl << endl;
723                 
724                 out << "Run Name:  " << endl;
725                 out << "Analysis Name:  " << endl;
726                 out << "Full Path: " << endl << endl;
727                 
728                 out << "Read Header Len: " << header.headerLength << endl;
729                 out << "Name Length: " << header.nameLength << endl;
730                 out << "# of Bases: " << header.numBases << endl;
731                 out << "Clip Qual Left: " << header.clipQualLeft << endl;
732                 out << "Clip Qual Right: " << header.clipQualRight << endl;
733                 out << "Clip Adap Left: " << header.clipAdapterLeft << endl;
734                 out << "Clip Adap Right: " << header.clipAdapterRight << endl << endl;
735                 
736                 return 0;
737         }
738         catch(exception& e) {
739                 m->errorOut(e, "SffInfoCommand", "printHeader");
740                 exit(1);
741         }
742 }
743
744 //**********************************************************************************************************************
745 int SffInfoCommand::printSffTxtSeqData(ofstream& out, seqRead& read, Header& header) {
746         try {
747                 
748                 out << "Flowgram: ";
749                 for (int i = 0; i < read.flowgram.size(); i++) { out << setprecision(2) << (read.flowgram[i]/(float)100) << '\t';  }
750                 
751                 out << endl <<  "Flow Indexes: ";
752                 int sum = 0;
753                 for (int i = 0; i < read.flowIndex.size(); i++) {  sum +=  read.flowIndex[i];  out << sum << '\t'; }
754                 
755                 //make the bases you want to clip lowercase and the bases you want to keep upper case
756                 if(header.clipQualRight == 0){  header.clipQualRight = read.bases.length();     }
757                 for (int i = 0; i < (header.clipQualLeft-1); i++) { read.bases[i] = tolower(read.bases[i]); }
758                 for (int i = (header.clipQualLeft-1); i < (header.clipQualRight-1); i++) {   read.bases[i] = toupper(read.bases[i]);  }
759                 for (int i = (header.clipQualRight-1); i < read.bases.length(); i++) {   read.bases[i] = tolower(read.bases[i]);  }
760                 
761                 out << endl <<  "Bases: " << read.bases << endl << "Quality Scores: ";
762                 for (int i = 0; i < read.qualScores.size(); i++) {   out << read.qualScores[i] << '\t';  }
763         
764                 
765                 out << endl << endl;
766                 
767                 return 0;
768         }
769         catch(exception& e) {
770                 m->errorOut(e, "SffInfoCommand", "printSffTxtSeqData");
771                 exit(1);
772         }
773 }
774 //**********************************************************************************************************************
775 int SffInfoCommand::printFastaSeqData(ofstream& out, seqRead& read, Header& header) {
776         try {
777                 
778                 string seq = read.bases;
779                 
780                 if (trim) {
781                         if(header.clipQualRight < header.clipQualLeft){
782                                 seq = "NNNN";
783                         }
784                         else if((header.clipQualRight != 0) && ((header.clipQualRight-header.clipQualLeft) >= 0)){
785                                 seq = seq.substr((header.clipQualLeft-1), (header.clipQualRight-header.clipQualLeft));
786                         }
787                         else {
788                                 seq = seq.substr(header.clipQualLeft-1);
789                         }
790                 }else{
791                         //if you wanted the sfftxt then you already converted the bases to the right case
792                         if (!sfftxt) {
793                                 //make the bases you want to clip lowercase and the bases you want to keep upper case
794                                 if(header.clipQualRight == 0){  header.clipQualRight = seq.length();    }
795                                 for (int i = 0; i < (header.clipQualLeft-1); i++) { seq[i] = tolower(seq[i]);  }
796                                 for (int i = (header.clipQualLeft-1); i < (header.clipQualRight-1); i++)  {   seq[i] = toupper(seq[i]);  }
797                                 for (int i = (header.clipQualRight-1); i < seq.length(); i++) {   seq[i] = tolower(seq[i]);  }
798                         }
799                 }
800                 
801                 out << ">" << header.name  << " xy=" << header.xy << endl;
802                 out << seq << endl;
803                 
804                 return 0;
805         }
806         catch(exception& e) {
807                 m->errorOut(e, "SffInfoCommand", "printFastaSeqData");
808                 exit(1);
809         }
810 }
811
812 //**********************************************************************************************************************
813 int SffInfoCommand::printQualSeqData(ofstream& out, seqRead& read, Header& header) {
814         try {
815                 
816                 if (trim) {
817                         if(header.clipQualRight < header.clipQualLeft){
818                                 out << ">" << header.name << " xy=" << header.xy << endl;
819                                 out << "0\t0\t0\t0";
820                         }
821                         else if((header.clipQualRight != 0) && ((header.clipQualRight-header.clipQualLeft) >= 0)){
822                                 out << ">" << header.name << " xy=" << header.xy << " length=" << (header.clipQualRight-header.clipQualLeft) << endl;
823                                 for (int i = (header.clipQualLeft-1); i < (header.clipQualRight-1); i++) {   out << read.qualScores[i] << '\t'; }
824                         }
825                         else{
826                                 out << ">" << header.name << " xy=" << header.xy << " length=" << (header.clipQualRight-header.clipQualLeft) << endl;
827                                 for (int i = (header.clipQualLeft-1); i < read.qualScores.size(); i++) {   out << read.qualScores[i] << '\t';   }                       
828                         }
829                 }else{
830                         out << ">" << header.name << " xy=" << header.xy << " length=" << read.qualScores.size() << endl;
831                         for (int i = 0; i < read.qualScores.size(); i++) {   out << read.qualScores[i] << '\t';  }
832                 }
833                 
834                 out << endl;
835                 
836                 return 0;
837         }
838         catch(exception& e) {
839                 m->errorOut(e, "SffInfoCommand", "printQualSeqData");
840                 exit(1);
841         }
842 }
843
844 //**********************************************************************************************************************
845 int SffInfoCommand::printFlowSeqData(ofstream& out, seqRead& read, Header& header) {
846         try {
847                 if(header.clipQualRight > header.clipQualLeft){
848                         
849                         int rightIndex = 0;
850                         for (int i = 0; i < header.clipQualRight; i++) {  rightIndex +=  read.flowIndex[i];     }
851
852                         out << header.name << ' ' << rightIndex;
853                         for (int i = 0; i < read.flowgram.size(); i++) { out << setprecision(2) << ' ' << (read.flowgram[i]/(float)100);  }
854                         out << endl;
855                 }
856                 
857                 
858                 return 0;
859         }
860         catch(exception& e) {
861                 m->errorOut(e, "SffInfoCommand", "printFlowSeqData");
862                 exit(1);
863         }
864 }
865 //**********************************************************************************************************************
866 int SffInfoCommand::readAccnosFile(string filename) {
867         try {
868                 //remove old names
869                 seqNames.clear();
870                 
871                 ifstream in;
872                 m->openInputFile(filename, in);
873                 string name;
874                 
875                 while(!in.eof()){
876                         in >> name; m->gobble(in);
877                                                 
878                         seqNames.insert(name);
879                         
880                         if (m->control_pressed) { seqNames.clear(); break; }
881                 }
882                 in.close();             
883                 
884                 return 0;
885         }
886         catch(exception& e) {
887                 m->errorOut(e, "SffInfoCommand", "readAccnosFile");
888                 exit(1);
889         }
890 }
891 //**********************************************************************************************************************
892 int SffInfoCommand::parseSffTxt() {
893         try {
894                 
895                 ifstream inSFF;
896                 m->openInputFile(sfftxtFilename, inSFF);
897                 
898                 if (outputDir == "") {  outputDir += m->hasPath(sfftxtFilename); }
899                 
900                 //output file names
901                 ofstream outFasta, outQual, outFlow;
902                 string outFastaFileName, outQualFileName;
903                 string outFlowFileName = outputDir + m->getRootName(m->getSimpleName(sfftxtFilename)) + "flow";
904                 if (trim) {
905                         outFastaFileName = outputDir + m->getRootName(m->getSimpleName(sfftxtFilename)) + "fasta";
906                         outQualFileName = outputDir + m->getRootName(m->getSimpleName(sfftxtFilename)) + "qual";
907                 }else{
908                         outFastaFileName = outputDir + m->getRootName(m->getSimpleName(sfftxtFilename)) + "raw.fasta";
909                         outQualFileName = outputDir + m->getRootName(m->getSimpleName(sfftxtFilename)) + "raw.qual";
910                 }
911                 
912                 if (fasta)      { m->openOutputFile(outFastaFileName, outFasta);        outputNames.push_back(outFastaFileName); outputTypes["fasta"].push_back(outFastaFileName); }
913                 if (qual)       { m->openOutputFile(outQualFileName, outQual);          outputNames.push_back(outQualFileName); outputTypes["qual"].push_back(outQualFileName);  }
914                 if (flow)       { m->openOutputFile(outFlowFileName, outFlow);          outputNames.push_back(outFlowFileName);  outFlow.setf(ios::fixed, ios::floatfield); outFlow.setf(ios::showpoint); outputTypes["flow"].push_back(outFlowFileName);  }
915                 
916                 //read common header
917                 string commonHeader = m->getline(inSFF);
918                 string magicNumber = m->getline(inSFF); 
919                 string version = m->getline(inSFF);
920                 string indexOffset = m->getline(inSFF);
921                 string indexLength = m->getline(inSFF);
922                 int numReads = parseHeaderLineToInt(inSFF);
923                 string headerLength = m->getline(inSFF);
924                 string keyLength = m->getline(inSFF);
925                 int numFlows = parseHeaderLineToInt(inSFF);
926                 string flowgramCode = m->getline(inSFF);
927                 string flowChars = m->getline(inSFF);
928                 string keySequence = m->getline(inSFF);
929                 m->gobble(inSFF);
930                 
931                 string seqName;
932                 
933                 if (flow)       {       outFlow << numFlows << endl;    }
934                 
935                 for(int i=0;i<numReads;i++){
936                         
937                         //sanity check
938                         if (inSFF.eof()) { m->mothurOut("[ERROR]: Expected " + toString(numReads) + " but reached end of file at " + toString(i+1) + "."); m->mothurOutEndLine(); break; }
939                         
940                         Header header;
941                         
942                         //parse read header
943                         inSFF >> seqName;
944                         seqName = seqName.substr(1);
945                         m->gobble(inSFF);
946                         header.name = seqName;
947                         
948                         string runPrefix = parseHeaderLineToString(inSFF);              header.timestamp = runPrefix;
949                         string regionNumber = parseHeaderLineToString(inSFF);   header.region = regionNumber;
950                         string xyLocation = parseHeaderLineToString(inSFF);             header.xy = xyLocation;
951                         m->gobble(inSFF);
952                                 
953                         string runName = parseHeaderLineToString(inSFF);
954                         string analysisName = parseHeaderLineToString(inSFF);
955                         string fullPath = parseHeaderLineToString(inSFF);
956                         m->gobble(inSFF);
957                         
958                         string readHeaderLen = parseHeaderLineToString(inSFF);  convert(readHeaderLen, header.headerLength);
959                         string nameLength = parseHeaderLineToString(inSFF);             convert(nameLength, header.nameLength);
960                         int numBases = parseHeaderLineToInt(inSFF);                             header.numBases = numBases;
961                         string clipQualLeft = parseHeaderLineToString(inSFF);   convert(clipQualLeft, header.clipQualLeft);
962                         int clipQualRight = parseHeaderLineToInt(inSFF);                header.clipQualRight = clipQualRight;
963                         string clipAdapLeft = parseHeaderLineToString(inSFF);   convert(clipAdapLeft, header.clipAdapterLeft);
964                         string clipAdapRight = parseHeaderLineToString(inSFF);  convert(clipAdapRight, header.clipAdapterRight);
965                         m->gobble(inSFF);
966                                 
967                         seqRead read;
968                         
969                         //parse read
970                         vector<unsigned short> flowVector = parseHeaderLineToFloatVector(inSFF, numFlows);      read.flowgram = flowVector;
971                         vector<unsigned int> flowIndices = parseHeaderLineToIntVector(inSFF, numBases); 
972                         
973                         //adjust for print
974                         vector<unsigned int> flowIndicesAdjusted; flowIndicesAdjusted.push_back(flowIndices[0]);
975                         for (int j = 1; j < flowIndices.size(); j++) {   flowIndicesAdjusted.push_back(flowIndices[j] - flowIndices[j-1]);   }
976                         read.flowIndex = flowIndicesAdjusted;
977                         
978                         string bases = parseHeaderLineToString(inSFF);                                                                          read.bases = bases;
979                         vector<unsigned int> qualityScores = parseHeaderLineToIntVector(inSFF, numBases);       read.qualScores = qualityScores;
980                         m->gobble(inSFF);
981                                         
982                         //if you have provided an accosfile and this seq is not in it, then dont print
983                         bool print = true;
984                         if (seqNames.size() != 0) {   if (seqNames.count(header.name) == 0) { print = false; }  }
985                         
986                         //print 
987                         if (print) {
988                                 if (fasta)      {       printFastaSeqData(outFasta, read, header);      }
989                                 if (qual)       {       printQualSeqData(outQual, read, header);        }
990                                 if (flow)       {       printFlowSeqData(outFlow, read, header);        }
991                         }
992                         
993                         //report progress
994                         if((i+1) % 10000 == 0){ m->mothurOut(toString(i+1)); m->mothurOutEndLine();             }
995                         
996                         if (m->control_pressed) {  break;  }
997                 }
998                 
999                 //report progress
1000                 if (!m->control_pressed) {   if((numReads) % 10000 != 0){       m->mothurOut(toString(numReads)); m->mothurOutEndLine();                }  }
1001                 
1002                 inSFF.close();
1003                 
1004                 if (fasta)      {  outFasta.close();    }
1005                 if (qual)       {  outQual.close();             }
1006                 if (flow)       {  outFlow.close();             }
1007                 
1008                 return 0;
1009         }
1010         catch(exception& e) {
1011                 m->errorOut(e, "SffInfoCommand", "parseSffTxt");
1012                 exit(1);
1013         }
1014 }
1015 //**********************************************************************************************************************
1016
1017 int SffInfoCommand::parseHeaderLineToInt(ifstream& file){
1018         try {
1019                 int number;
1020                 
1021                 while (!file.eof())     {
1022                         
1023                         char c = file.get(); 
1024                         if (c == ':'){
1025                                 file >> number;
1026                                 break;
1027                         }
1028                         
1029                 }
1030                 m->gobble(file);
1031                 return number;
1032         }
1033         catch(exception& e) {
1034                 m->errorOut(e, "SffInfoCommand", "parseHeaderLineToInt");
1035                 exit(1);
1036         }
1037         
1038 }
1039
1040 //**********************************************************************************************************************
1041
1042 string SffInfoCommand::parseHeaderLineToString(ifstream& file){
1043         try {
1044                 string text;
1045                 
1046                 while (!file.eof())     {
1047                         char c = file.get(); 
1048                         
1049                         if (c == ':'){
1050                                 //m->gobble(file);
1051                                 //text = m->getline(file);      
1052                                 file >> text;
1053                                 break;
1054                         }
1055                 }
1056                 m->gobble(file);
1057                 
1058                 return text;
1059         }
1060         catch(exception& e) {
1061                 m->errorOut(e, "SffInfoCommand", "parseHeaderLineToString");
1062                 exit(1);
1063         }
1064 }
1065
1066 //**********************************************************************************************************************
1067
1068 vector<unsigned short> SffInfoCommand::parseHeaderLineToFloatVector(ifstream& file, int length){
1069         try {
1070                 vector<unsigned short> floatVector(length);
1071                 
1072                 while (!file.eof())     {
1073                         char c = file.get(); 
1074                         if (c == ':'){
1075                                 float temp;
1076                                 for(int i=0;i<length;i++){
1077                                         file >> temp;
1078                                         floatVector[i] = temp * 100;
1079                                 }
1080                                 break;
1081                         }
1082                 }
1083                 m->gobble(file);        
1084                 return floatVector;
1085         }
1086         catch(exception& e) {
1087                 m->errorOut(e, "SffInfoCommand", "parseHeaderLineToFloatVector");
1088                 exit(1);
1089         }
1090 }
1091
1092 //**********************************************************************************************************************
1093
1094 vector<unsigned int> SffInfoCommand::parseHeaderLineToIntVector(ifstream& file, int length){
1095         try {
1096                 vector<unsigned int> intVector(length);
1097                 
1098                 while (!file.eof())     {
1099                         char c = file.get(); 
1100                         if (c == ':'){
1101                                 for(int i=0;i<length;i++){
1102                                         file >> intVector[i];
1103                                 }
1104                                 break;
1105                         }
1106                 }
1107                 m->gobble(file);        
1108                 return intVector;
1109         }
1110         catch(exception& e) {
1111                 m->errorOut(e, "SffInfoCommand", "parseHeaderLineToIntVector");
1112                 exit(1);
1113         }
1114 }
1115
1116 //**********************************************************************************************************************
1117
1118
1119                                 
1120