]> git.donarmstrong.com Git - mothur.git/blob - sffinfocommand.cpp
removed read.dist, read.otu, read.tree and globaldata. added current to defaults...
[mothur.git] / sffinfocommand.cpp
1 /*
2  *  sffinfocommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 7/7/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "sffinfocommand.h"
11 #include "endiannessmacros.h"
12
13 //**********************************************************************************************************************
14 vector<string> SffInfoCommand::setParameters(){ 
15         try {           
16                 CommandParameter psff("sff", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(psff);
17                 CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(paccnos);
18                 CommandParameter psfftxt("sfftxt", "String", "", "", "", "", "",false,false); parameters.push_back(psfftxt);
19                 CommandParameter pflow("flow", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pflow);
20                 CommandParameter ptrim("trim", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(ptrim);
21                 CommandParameter pfasta("fasta", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pfasta);
22                 CommandParameter pqfile("name", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pqfile);
23                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
24                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
25                 
26                 vector<string> myArray;
27                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
28                 return myArray;
29         }
30         catch(exception& e) {
31                 m->errorOut(e, "SffInfoCommand", "setParameters");
32                 exit(1);
33         }
34 }
35 //**********************************************************************************************************************
36 string SffInfoCommand::getHelpString(){ 
37         try {
38                 string helpString = "";
39                 helpString += "The sffinfo command reads a sff file and extracts the sequence data, or you can use it to parse a sfftxt file.\n";
40                 helpString += "The sffinfo command parameters are sff, fasta, qfile, accnos, flow, sfftxt, and trim. sff is required. \n";
41                 helpString += "The sff parameter allows you to enter the sff file you would like to extract data from.  You may enter multiple files by separating them by -'s.\n";
42                 helpString += "The fasta parameter allows you to indicate if you would like a fasta formatted file generated.  Default=True. \n";
43                 helpString += "The qfile parameter allows you to indicate if you would like a quality file generated.  Default=True. \n";
44                 helpString += "The flow parameter allows you to indicate if you would like a flowgram file generated.  Default=False. \n";
45                 helpString += "The sfftxt parameter allows you to indicate if you would like a sff.txt file generated.  Default=False. \n";
46                 helpString += "If you want to parse an existing sfftxt file into flow, fasta and quality file, enter the file name using the sfftxt parameter. \n";
47                 helpString += "The trim parameter allows you to indicate if you would like a sequences and quality scores trimmed to the clipQualLeft and clipQualRight values.  Default=True. \n";
48                 helpString += "The accnos parameter allows you to provide a accnos file containing the names of the sequences you would like extracted. You may enter multiple files by separating them by -'s. \n";
49                 helpString += "Example sffinfo(sff=mySffFile.sff, trim=F).\n";
50                 helpString += "Note: No spaces between parameter labels (i.e. sff), '=' and parameters (i.e.yourSffFileName).\n\n";
51                 return helpString;
52         }
53         catch(exception& e) {
54                 m->errorOut(e, "SffInfoCommand", "getHelpString");
55                 exit(1);
56         }
57 }
58
59
60 //**********************************************************************************************************************
61 SffInfoCommand::SffInfoCommand(){       
62         try {
63                 abort = true; calledHelp = true; 
64                 setParameters();
65                 vector<string> tempOutNames;
66                 outputTypes["fasta"] = tempOutNames;
67                 outputTypes["flow"] = tempOutNames;
68                 outputTypes["sfftxt"] = tempOutNames;
69                 outputTypes["qfile"] = tempOutNames;
70         }
71         catch(exception& e) {
72                 m->errorOut(e, "SffInfoCommand", "SffInfoCommand");
73                 exit(1);
74         }
75 }
76 //**********************************************************************************************************************
77
78 SffInfoCommand::SffInfoCommand(string option)  {
79         try {
80                 abort = false; calledHelp = false;   
81                 hasAccnos = false;
82                 
83                 //allow user to run help
84                 if(option == "help") { help(); abort = true; calledHelp = true; }
85                 
86                 else {
87                         //valid paramters for this command
88                         vector<string> myArray = setParameters();
89                         
90                         OptionParser parser(option);
91                         map<string, string> parameters = parser.getParameters();
92                         
93                         ValidParameters validParameter;
94                         //check to make sure all parameters are valid for command
95                         for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
96                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
97                         }
98                         
99                         //initialize outputTypes
100                         vector<string> tempOutNames;
101                         outputTypes["fasta"] = tempOutNames;
102                         outputTypes["flow"] = tempOutNames;
103                         outputTypes["sfftxt"] = tempOutNames;
104                         outputTypes["qfile"] = tempOutNames;
105                         
106                         //if the user changes the output directory command factory will send this info to us in the output parameter 
107                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
108                         
109                         //if the user changes the input directory command factory will send this info to us in the output parameter 
110                         string inputDir = validParameter.validFile(parameters, "inputdir", false);        if (inputDir == "not found"){ inputDir = "";          }
111
112                         sffFilename = validParameter.validFile(parameters, "sff", false);
113                         if (sffFilename == "not found") { sffFilename = "";  }
114                         else { 
115                                 m->splitAtDash(sffFilename, filenames);
116                                 
117                                 //go through files and make sure they are good, if not, then disregard them
118                                 for (int i = 0; i < filenames.size(); i++) {
119                                         if (inputDir != "") {
120                                                 string path = m->hasPath(filenames[i]);
121                                                 //if the user has not given a path then, add inputdir. else leave path alone.
122                                                 if (path == "") {       filenames[i] = inputDir + filenames[i];         }
123                                         }
124         
125                                         ifstream in;
126                                         int ableToOpen = m->openInputFile(filenames[i], in, "noerror");
127                                 
128                                         //if you can't open it, try default location
129                                         if (ableToOpen == 1) {
130                                                 if (m->getDefaultPath() != "") { //default path is set
131                                                         string tryPath = m->getDefaultPath() + m->getSimpleName(filenames[i]);
132                                                         m->mothurOut("Unable to open " + filenames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
133                                                         ifstream in2;
134                                                         ableToOpen = m->openInputFile(tryPath, in2, "noerror");
135                                                         in2.close();
136                                                         filenames[i] = tryPath;
137                                                 }
138                                         }
139                                         
140                                         //if you can't open it, try default location
141                                         if (ableToOpen == 1) {
142                                                 if (m->getOutputDir() != "") { //default path is set
143                                                         string tryPath = m->getOutputDir() + m->getSimpleName(filenames[i]);
144                                                         m->mothurOut("Unable to open " + filenames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
145                                                         ifstream in2;
146                                                         ableToOpen = m->openInputFile(tryPath, in2, "noerror");
147                                                         in2.close();
148                                                         filenames[i] = tryPath;
149                                                 }
150                                         }
151                                         
152                                         in.close();
153                                         
154                                         if (ableToOpen == 1) { 
155                                                 m->mothurOut("Unable to open " + filenames[i] + ". It will be disregarded."); m->mothurOutEndLine();
156                                                 //erase from file list
157                                                 filenames.erase(filenames.begin()+i);
158                                                 i--;
159                                         }
160                                 }
161                                 
162                                 //make sure there is at least one valid file left
163                                 if (filenames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; }
164                         }
165                         
166                         accnosName = validParameter.validFile(parameters, "accnos", false);
167                         if (accnosName == "not found") { accnosName = "";  }
168                         else { 
169                                 hasAccnos = true;
170                                 m->splitAtDash(accnosName, accnosFileNames);
171                                 
172                                 //go through files and make sure they are good, if not, then disregard them
173                                 for (int i = 0; i < accnosFileNames.size(); i++) {
174                                         if (inputDir != "") {
175                                                 string path = m->hasPath(accnosFileNames[i]);
176                                                 //if the user has not given a path then, add inputdir. else leave path alone.
177                                                 if (path == "") {       accnosFileNames[i] = inputDir + accnosFileNames[i];             }
178                                         }
179         
180                                         ifstream in;
181                                         int ableToOpen = m->openInputFile(accnosFileNames[i], in, "noerror");
182                                 
183                                         //if you can't open it, try default location
184                                         if (ableToOpen == 1) {
185                                                 if (m->getDefaultPath() != "") { //default path is set
186                                                         string tryPath = m->getDefaultPath() + m->getSimpleName(accnosFileNames[i]);
187                                                         m->mothurOut("Unable to open " + accnosFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
188                                                         ifstream in2;
189                                                         ableToOpen = m->openInputFile(tryPath, in2, "noerror");
190                                                         in2.close();
191                                                         accnosFileNames[i] = tryPath;
192                                                 }
193                                         }
194                                         //if you can't open it, try default location
195                                         if (ableToOpen == 1) {
196                                                 if (m->getOutputDir() != "") { //default path is set
197                                                         string tryPath = m->getOutputDir() + m->getSimpleName(accnosFileNames[i]);
198                                                         m->mothurOut("Unable to open " + accnosFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
199                                                         ifstream in2;
200                                                         ableToOpen = m->openInputFile(tryPath, in2, "noerror");
201                                                         in2.close();
202                                                         accnosFileNames[i] = tryPath;
203                                                 }
204                                         }
205                                         in.close();
206                                         
207                                         if (ableToOpen == 1) { 
208                                                 m->mothurOut("Unable to open " + accnosFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();
209                                                 //erase from file list
210                                                 accnosFileNames.erase(accnosFileNames.begin()+i);
211                                                 i--;
212                                         }
213                                 }
214                                 
215                                 //make sure there is at least one valid file left
216                                 if (accnosFileNames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; }
217                         }
218                         
219                         if (hasAccnos) {
220                                 if (accnosFileNames.size() != filenames.size()) { abort = true; m->mothurOut("If you provide a accnos file, you must have one for each sff file."); m->mothurOutEndLine(); }
221                         }
222                         
223                         string temp = validParameter.validFile(parameters, "qfile", false);                     if (temp == "not found"){       temp = "T";                             }
224                         qual = m->isTrue(temp); 
225                         
226                         temp = validParameter.validFile(parameters, "fasta", false);                            if (temp == "not found"){       temp = "T";                             }
227                         fasta = m->isTrue(temp); 
228                         
229                         temp = validParameter.validFile(parameters, "flow", false);                                     if (temp == "not found"){       temp = "F";                             }
230                         flow = m->isTrue(temp); 
231                         
232                         temp = validParameter.validFile(parameters, "trim", false);                                     if (temp == "not found"){       temp = "T";                             }
233                         trim = m->isTrue(temp); 
234                         
235                         temp = validParameter.validFile(parameters, "sfftxt", false);                           
236                         if (temp == "not found")        {       temp = "F";      sfftxt = false; sfftxtFilename = "";           }
237                         else if (m->isTrue(temp))       {       sfftxt = true;          sfftxtFilename = "";                            }
238                         else {
239                                 //you are a filename
240                                 if (inputDir != "") {
241                                         map<string,string>::iterator it = parameters.find("sfftxt");
242                                         //user has given a template file
243                                         if(it != parameters.end()){ 
244                                                 string path = m->hasPath(it->second);
245                                                 //if the user has not given a path then, add inputdir. else leave path alone.
246                                                 if (path == "") {       parameters["sfftxt"] = inputDir + it->second;           }
247                                         }
248                                 }
249                                 
250                                 sfftxtFilename = validParameter.validFile(parameters, "sfftxt", true);
251                                 if (sfftxtFilename == "not found") { sfftxtFilename = "";  }
252                                 else if (sfftxtFilename == "not open") { sfftxtFilename = "";  }
253                         }
254                         
255                         if ((sfftxtFilename == "") && (filenames.size() == 0)) {  
256                                 //if there is a current fasta file, use it
257                                 string filename = m->getSFFFile(); 
258                                 if (filename != "") { filenames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the sff parameter."); m->mothurOutEndLine(); }
259                                 else {  m->mothurOut("[ERROR]: you must provide a valid sff or sfftxt file."); m->mothurOutEndLine(); abort=true;  }
260                         }
261                 }
262         }
263         catch(exception& e) {
264                 m->errorOut(e, "SffInfoCommand", "SffInfoCommand");
265                 exit(1);
266         }
267 }
268 //**********************************************************************************************************************
269 int SffInfoCommand::execute(){
270         try {
271                 
272                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
273                 
274                 for (int s = 0; s < filenames.size(); s++) {
275                         
276                         if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) {       remove(outputNames[i].c_str());         } return 0; }
277                         
278                         int start = time(NULL);
279                         
280                         m->mothurOut("Extracting info from " + filenames[s] + " ..." ); m->mothurOutEndLine();
281                         
282                         string accnos = "";
283                         if (hasAccnos) { accnos = accnosFileNames[s]; }
284                         
285                         int numReads = extractSffInfo(filenames[s], accnos);
286
287                         m->mothurOut("It took " + toString(time(NULL) - start) + " secs to extract " + toString(numReads) + ".");
288                 }
289                 
290                 if (sfftxtFilename != "") {  parseSffTxt(); }
291                 
292                 if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) {       remove(outputNames[i].c_str());         } return 0; }
293                 
294                 //set fasta file as new current fastafile
295                 string current = "";
296                 itTypes = outputTypes.find("fasta");
297                 if (itTypes != outputTypes.end()) {
298                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
299                 }
300                 
301                 itTypes = outputTypes.find("qfile");
302                 if (itTypes != outputTypes.end()) {
303                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); }
304                 }       
305                 
306                 //report output filenames
307                 m->mothurOutEndLine();
308                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
309                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
310                 m->mothurOutEndLine();
311
312                 return 0;
313         }
314         catch(exception& e) {
315                 m->errorOut(e, "SffInfoCommand", "execute");
316                 exit(1);
317         }
318 }
319 //**********************************************************************************************************************
320 int SffInfoCommand::extractSffInfo(string input, string accnos){
321         try {
322                 
323                 if (outputDir == "") {  outputDir += m->hasPath(input); }
324                 
325                 if (accnos != "")       {  readAccnosFile(accnos);  }
326                 else                            {       seqNames.clear();               }
327
328                 ofstream outSfftxt, outFasta, outQual, outFlow;
329                 string outFastaFileName, outQualFileName;
330                 string sfftxtFileName = outputDir + m->getRootName(m->getSimpleName(input)) + "sff.txt";
331                 string outFlowFileName = outputDir + m->getRootName(m->getSimpleName(input)) + "flow";
332                 if (trim) {
333                         outFastaFileName = outputDir + m->getRootName(m->getSimpleName(input)) + "fasta";
334                         outQualFileName = outputDir + m->getRootName(m->getSimpleName(input)) + "qual";
335                 }else{
336                         outFastaFileName = outputDir + m->getRootName(m->getSimpleName(input)) + "raw.fasta";
337                         outQualFileName = outputDir + m->getRootName(m->getSimpleName(input)) + "raw.qual";
338                 }
339                 
340                 if (sfftxt) { m->openOutputFile(sfftxtFileName, outSfftxt); outSfftxt.setf(ios::fixed, ios::floatfield); outSfftxt.setf(ios::showpoint);  outputNames.push_back(sfftxtFileName);  outputTypes["sfftxt"].push_back(sfftxtFileName); }
341                 if (fasta)      { m->openOutputFile(outFastaFileName, outFasta);        outputNames.push_back(outFastaFileName); outputTypes["fasta"].push_back(outFastaFileName); }
342                 if (qual)       { m->openOutputFile(outQualFileName, outQual);          outputNames.push_back(outQualFileName); outputTypes["qfile"].push_back(outQualFileName);  }
343                 if (flow)       { m->openOutputFile(outFlowFileName, outFlow);          outputNames.push_back(outFlowFileName);  outFlow.setf(ios::fixed, ios::floatfield); outFlow.setf(ios::showpoint); outputTypes["flow"].push_back(outFlowFileName);  }
344                 
345                 ifstream in;
346                 in.open(input.c_str(), ios::binary);
347                 
348                 CommonHeader header; 
349                 readCommonHeader(in, header);
350         
351                 int count = 0;
352                 mycount = 0;
353                 
354                 //check magic number and version
355                 if (header.magicNumber != 779314790) { m->mothurOut("Magic Number is not correct, not a valid .sff file"); m->mothurOutEndLine(); return count; }
356                 if (header.version != "0001") { m->mothurOut("Version is not supported, only support version 0001."); m->mothurOutEndLine(); return count; }
357         
358                 //print common header
359                 if (sfftxt) {   printCommonHeader(outSfftxt, header);           }
360                 if (flow)       {       outFlow << header.numFlowsPerRead << endl;      }
361                         
362                 //read through the sff file
363                 while (!in.eof()) {
364                         
365                         bool print = true;
366                         
367                         //read header
368                         Header readheader;
369                         readHeader(in, readheader);
370                         
371                         //read data
372                         seqRead read; 
373                         readSeqData(in, read, header.numFlowsPerRead, readheader.numBases);
374                                 
375                         //if you have provided an accosfile and this seq is not in it, then dont print
376                         if (seqNames.size() != 0) {   if (seqNames.count(readheader.name) == 0) { print = false; }  }
377                         
378                         //print 
379                         if (print) {
380                                 if (sfftxt) { printHeader(outSfftxt, readheader); printSffTxtSeqData(outSfftxt, read, readheader); }
381                                 if (fasta)      {       printFastaSeqData(outFasta, read, readheader);  }
382                                 if (qual)       {       printQualSeqData(outQual, read, readheader);    }
383                                 if (flow)       {       printFlowSeqData(outFlow, read, readheader);    }
384                         }
385                         
386                         count++;
387                         mycount++;
388                 
389                         //report progress
390                         if((count+1) % 10000 == 0){     m->mothurOut(toString(count+1)); m->mothurOutEndLine();         }
391                 
392                         if (m->control_pressed) { count = 0; break;   }
393                         
394                         if (count >= header.numReads) { break; }
395                 }
396                 
397                 //report progress
398                 if (!m->control_pressed) {   if((count) % 10000 != 0){  m->mothurOut(toString(count)); m->mothurOutEndLine();           }  }
399                 
400                 in.close();
401                 
402                 if (sfftxt) {  outSfftxt.close();       }
403                 if (fasta)      {  outFasta.close();    }
404                 if (qual)       {  outQual.close();             }
405                 if (flow)       {  outFlow.close();             }
406                 
407                 return count;
408         }
409         catch(exception& e) {
410                 m->errorOut(e, "SffInfoCommand", "extractSffInfo");
411                 exit(1);
412         }
413 }
414 //**********************************************************************************************************************
415 int SffInfoCommand::readCommonHeader(ifstream& in, CommonHeader& header){
416         try {
417
418                 if (!in.eof()) {
419
420                         //read magic number
421                         char buffer[4];
422                         in.read(buffer, 4);
423                         header.magicNumber = be_int4(*(unsigned int *)(&buffer));
424                 
425                         //read version
426                         char buffer9[4];
427                         in.read(buffer9, 4);
428                         header.version = "";
429                         for (int i = 0; i < 4; i++) {  header.version += toString((int)(buffer9[i])); }
430                                 
431                         //read offset
432                         char buffer2 [8];
433                         in.read(buffer2, 8);
434                         header.indexOffset =  be_int8(*(unsigned long int *)(&buffer2));
435                         
436                         //read index length
437                         char buffer3 [4];
438                         in.read(buffer3, 4);
439                         header.indexLength =  be_int4(*(unsigned int *)(&buffer3));
440                         
441                         //read num reads
442                         char buffer4 [4];
443                         in.read(buffer4, 4);
444                         header.numReads =  be_int4(*(unsigned int *)(&buffer4));
445                                 
446                         //read header length
447                         char buffer5 [2];
448                         in.read(buffer5, 2);
449                         header.headerLength =  be_int2(*(unsigned short *)(&buffer5));
450                                         
451                         //read key length
452                         char buffer6 [2];
453                         in.read(buffer6, 2);
454                         header.keyLength = be_int2(*(unsigned short *)(&buffer6));
455                         
456                         //read number of flow reads
457                         char buffer7 [2];
458                         in.read(buffer7, 2);
459                         header.numFlowsPerRead =  be_int2(*(unsigned short *)(&buffer7));
460                                 
461                         //read format code
462                         char buffer8 [1];
463                         in.read(buffer8, 1);
464                         header.flogramFormatCode = (int)(buffer8[0]);
465                         
466                         //read flow chars
467                         char* tempBuffer = new char[header.numFlowsPerRead];
468                         in.read(&(*tempBuffer), header.numFlowsPerRead); 
469                         header.flowChars = tempBuffer;
470                         if (header.flowChars.length() > header.numFlowsPerRead) { header.flowChars = header.flowChars.substr(0, header.numFlowsPerRead);  }
471                         delete[] tempBuffer;
472                         
473                         //read key
474                         char* tempBuffer2 = new char[header.keyLength];
475                         in.read(&(*tempBuffer2), header.keyLength);
476                         header.keySequence = tempBuffer2;
477                         if (header.keySequence.length() > header.keyLength) { header.keySequence = header.keySequence.substr(0, header.keyLength);  }
478                         delete[] tempBuffer2;
479                                 
480                         /* Pad to 8 chars */
481                         unsigned long int spotInFile = in.tellg();
482                         unsigned long int spot = (spotInFile + 7)& ~7;  // ~ inverts
483                         in.seekg(spot);
484                         
485                 }else{
486                         m->mothurOut("Error reading sff common header."); m->mothurOutEndLine();
487                 }
488
489                 return 0;
490         }
491         catch(exception& e) {
492                 m->errorOut(e, "SffInfoCommand", "readCommonHeader");
493                 exit(1);
494         }
495 }
496 //**********************************************************************************************************************
497 int SffInfoCommand::readHeader(ifstream& in, Header& header){
498         try {
499         
500                 if (!in.eof()) {
501                         
502                         //read header length
503                         char buffer [2];
504                         in.read(buffer, 2);
505                         header.headerLength = be_int2(*(unsigned short *)(&buffer));
506                                                 
507                         //read name length
508                         char buffer2 [2];
509                         in.read(buffer2, 2);
510                         header.nameLength = be_int2(*(unsigned short *)(&buffer2));
511
512                         //read num bases
513                         char buffer3 [4];
514                         in.read(buffer3, 4);
515                         header.numBases =  be_int4(*(unsigned int *)(&buffer3));
516                         
517                         //read clip qual left
518                         char buffer4 [2];
519                         in.read(buffer4, 2);
520                         header.clipQualLeft =  be_int2(*(unsigned short *)(&buffer4));
521                         header.clipQualLeft = 5; 
522                         
523                         //read clip qual right
524                         char buffer5 [2];
525                         in.read(buffer5, 2);
526                         header.clipQualRight =  be_int2(*(unsigned short *)(&buffer5));
527                         
528                         //read clipAdapterLeft
529                         char buffer6 [2];
530                         in.read(buffer6, 2);
531                         header.clipAdapterLeft = be_int2(*(unsigned short *)(&buffer6));
532
533                         //read clipAdapterRight
534                         char buffer7 [2];
535                         in.read(buffer7, 2);
536                         header.clipAdapterRight = be_int2(*(unsigned short *)(&buffer7));
537                 
538                         //read name
539                         char* tempBuffer = new char[header.nameLength];
540                         in.read(&(*tempBuffer), header.nameLength);
541                         header.name = tempBuffer;
542                         if (header.name.length() > header.nameLength) { header.name = header.name.substr(0, header.nameLength);  }
543                         delete[] tempBuffer;
544                         
545                         //extract info from name
546                         decodeName(header.timestamp, header.region, header.xy, header.name);
547                         
548                         /* Pad to 8 chars */
549                         unsigned long int spotInFile = in.tellg();
550                         unsigned long int spot = (spotInFile + 7)& ~7;
551                         in.seekg(spot);
552                         
553                 }else{
554                         m->mothurOut("Error reading sff header info."); m->mothurOutEndLine();
555                 }
556
557                 return 0;
558         }
559         catch(exception& e) {
560                 m->errorOut(e, "SffInfoCommand", "readHeader");
561                 exit(1);
562         }
563 }
564 //**********************************************************************************************************************
565 int SffInfoCommand::readSeqData(ifstream& in, seqRead& read, int numFlowReads, int numBases){
566         try {
567         
568                 if (!in.eof()) {
569         
570                         //read flowgram
571                         read.flowgram.resize(numFlowReads);
572                         for (int i = 0; i < numFlowReads; i++) {  
573                                 char buffer [2];
574                                 in.read(buffer, 2);
575                                 read.flowgram[i] = be_int2(*(unsigned short *)(&buffer));
576                         }
577         
578                         //read flowIndex
579                         read.flowIndex.resize(numBases);
580                         for (int i = 0; i < numBases; i++) {  
581                                 char temp[1];
582                                 in.read(temp, 1);
583                                 read.flowIndex[i] = be_int1(*(unsigned char *)(&temp));
584                         }
585         
586                         //read bases
587                         char* tempBuffer = new char[numBases];
588                         in.read(&(*tempBuffer), numBases);
589                         read.bases = tempBuffer;
590                         if (read.bases.length() > numBases) { read.bases = read.bases.substr(0, numBases);  }
591                         delete[] tempBuffer;
592
593                         //read qual scores
594                         read.qualScores.resize(numBases);
595                         for (int i = 0; i < numBases; i++) {  
596                                 char temp[1];
597                                 in.read(temp, 1);
598                                 read.qualScores[i] = be_int1(*(unsigned char *)(&temp));
599                         }
600         
601                         /* Pad to 8 chars */
602                         unsigned long int spotInFile = in.tellg();
603                         unsigned long int spot = (spotInFile + 7)& ~7;
604                         in.seekg(spot);
605                         
606                 }else{
607                         m->mothurOut("Error reading."); m->mothurOutEndLine();
608                 }
609
610                 return 0;
611         }
612         catch(exception& e) {
613                 m->errorOut(e, "SffInfoCommand", "readSeqData");
614                 exit(1);
615         }
616 }
617 //**********************************************************************************************************************
618 int SffInfoCommand::decodeName(string& timestamp, string& region, string& xy, string name) {
619         try {
620                 
621                 if (name.length() >= 6) {
622                         string time = name.substr(0, 6);
623                         unsigned int timeNum = m->fromBase36(time);
624                         
625                         int q1 = timeNum / 60;
626                         int sec = timeNum - 60 * q1;
627                         int q2 = q1 / 60;
628                         int minute = q1 - 60 * q2;
629                         int q3 = q2 / 24;
630                         int hr = q2 - 24 * q3;
631                         int q4 = q3 / 32;
632                         int day = q3 - 32 * q4;
633                         int q5 = q4 / 13;
634                         int mon = q4 - 13 * q5;
635                         int year = 2000 + q5;
636                 
637                         timestamp = toString(year) + "_" + toString(mon) + "_" + toString(day) + "_" + toString(hr) + "_" + toString(minute) + "_" + toString(sec);
638                 }
639                 
640                 if (name.length() >= 9) {
641                         region = name.substr(7, 2);
642                 
643                         string xyNum = name.substr(9);
644                         unsigned int myXy = m->fromBase36(xyNum);
645                         int x = myXy >> 12;
646                         int y = myXy & 4095;
647                 
648                         xy = toString(x) + "_" + toString(y);
649                 }
650                 
651                 return 0;
652         }
653         catch(exception& e) {
654                 m->errorOut(e, "SffInfoCommand", "decodeName");
655                 exit(1);
656         }
657 }
658 //**********************************************************************************************************************
659 int SffInfoCommand::printCommonHeader(ofstream& out, CommonHeader& header) {
660         try {
661         
662                 out << "Common Header:\nMagic Number: " << header.magicNumber << endl;
663                 out << "Version: " << header.version << endl;
664                 out << "Index Offset: " << header.indexOffset << endl;
665                 out << "Index Length: " << header.indexLength << endl;
666                 out << "Number of Reads: " << header.numReads << endl;
667                 out << "Header Length: " << header.headerLength << endl;
668                 out << "Key Length: " << header.keyLength << endl;
669                 out << "Number of Flows: " << header.numFlowsPerRead << endl;
670                 out << "Format Code: " << header.flogramFormatCode << endl;
671                 out << "Flow Chars: " << header.flowChars << endl;
672                 out << "Key Sequence: " << header.keySequence << endl << endl;
673                         
674                 return 0;
675         }
676         catch(exception& e) {
677                 m->errorOut(e, "SffInfoCommand", "printCommonHeader");
678                 exit(1);
679         }
680 }
681 //**********************************************************************************************************************
682 int SffInfoCommand::printHeader(ofstream& out, Header& header) {
683         try {
684                 
685                 out << ">" << header.name << endl;
686                 out << "Run Prefix: " << header.timestamp << endl;
687                 out << "Region #:  " << header.region << endl;
688                 out << "XY Location: " << header.xy << endl << endl;
689                 
690                 out << "Run Name:  " << endl;
691                 out << "Analysis Name:  " << endl;
692                 out << "Full Path: " << endl << endl;
693                 
694                 out << "Read Header Len: " << header.headerLength << endl;
695                 out << "Name Length: " << header.nameLength << endl;
696                 out << "# of Bases: " << header.numBases << endl;
697                 out << "Clip Qual Left: " << header.clipQualLeft << endl;
698                 out << "Clip Qual Right: " << header.clipQualRight << endl;
699                 out << "Clip Adap Left: " << header.clipAdapterLeft << endl;
700                 out << "Clip Adap Right: " << header.clipAdapterRight << endl << endl;
701                 
702                 return 0;
703         }
704         catch(exception& e) {
705                 m->errorOut(e, "SffInfoCommand", "printHeader");
706                 exit(1);
707         }
708 }
709
710 //**********************************************************************************************************************
711 int SffInfoCommand::printSffTxtSeqData(ofstream& out, seqRead& read, Header& header) {
712         try {
713                 
714                 out << "Flowgram: ";
715                 for (int i = 0; i < read.flowgram.size(); i++) { out << setprecision(2) << (read.flowgram[i]/(float)100) << '\t';  }
716                 
717                 out << endl <<  "Flow Indexes: ";
718                 int sum = 0;
719                 for (int i = 0; i < read.flowIndex.size(); i++) {  sum +=  read.flowIndex[i];  out << sum << '\t'; }
720                 
721                 //make the bases you want to clip lowercase and the bases you want to keep upper case
722                 if(header.clipQualRight == 0){  header.clipQualRight = read.bases.length();     }
723                 for (int i = 0; i < (header.clipQualLeft-1); i++) { read.bases[i] = tolower(read.bases[i]); }
724                 for (int i = (header.clipQualLeft-1); i < (header.clipQualRight-1); i++) {   read.bases[i] = toupper(read.bases[i]);  }
725                 for (int i = (header.clipQualRight-1); i < read.bases.length(); i++) {   read.bases[i] = tolower(read.bases[i]);  }
726                 
727                 out << endl <<  "Bases: " << read.bases << endl << "Quality Scores: ";
728                 for (int i = 0; i < read.qualScores.size(); i++) {   out << read.qualScores[i] << '\t';  }
729         
730                 
731                 out << endl << endl;
732                 
733                 return 0;
734         }
735         catch(exception& e) {
736                 m->errorOut(e, "SffInfoCommand", "printSffTxtSeqData");
737                 exit(1);
738         }
739 }
740 //**********************************************************************************************************************
741 int SffInfoCommand::printFastaSeqData(ofstream& out, seqRead& read, Header& header) {
742         try {
743                 
744                 string seq = read.bases;
745                 
746                 if (trim) {
747                         if(header.clipQualRight < header.clipQualLeft){
748                                 seq = "NNNN";
749                         }
750                         else if((header.clipQualRight != 0) && ((header.clipQualRight-header.clipQualLeft) >= 0)){
751                                 seq = seq.substr((header.clipQualLeft-1), (header.clipQualRight-header.clipQualLeft));
752                         }
753                         else {
754                                 seq = seq.substr(header.clipQualLeft-1);
755                         }
756                 }else{
757                         //if you wanted the sfftxt then you already converted the bases to the right case
758                         if (!sfftxt) {
759                                 //make the bases you want to clip lowercase and the bases you want to keep upper case
760                                 if(header.clipQualRight == 0){  header.clipQualRight = seq.length();    }
761                                 for (int i = 0; i < (header.clipQualLeft-1); i++) { seq[i] = tolower(seq[i]);  }
762                                 for (int i = (header.clipQualLeft-1); i < (header.clipQualRight-1); i++)  {   seq[i] = toupper(seq[i]);  }
763                                 for (int i = (header.clipQualRight-1); i < seq.length(); i++) {   seq[i] = tolower(seq[i]);  }
764                         }
765                 }
766                 
767                 out << ">" << header.name  << " xy=" << header.xy << endl;
768                 out << seq << endl;
769                 
770                 return 0;
771         }
772         catch(exception& e) {
773                 m->errorOut(e, "SffInfoCommand", "printFastaSeqData");
774                 exit(1);
775         }
776 }
777
778 //**********************************************************************************************************************
779 int SffInfoCommand::printQualSeqData(ofstream& out, seqRead& read, Header& header) {
780         try {
781                 
782                 if (trim) {
783                         if(header.clipQualRight < header.clipQualLeft){
784                                 out << ">" << header.name << " xy=" << header.xy << endl;
785                                 out << "0\t0\t0\t0";
786                         }
787                         else if((header.clipQualRight != 0) && ((header.clipQualRight-header.clipQualLeft) >= 0)){
788                                 out << ">" << header.name << " xy=" << header.xy << " length=" << (header.clipQualRight-header.clipQualLeft) << endl;
789                                 for (int i = (header.clipQualLeft-1); i < (header.clipQualRight-1); i++) {   out << read.qualScores[i] << '\t'; }
790                         }
791                         else{
792                                 out << ">" << header.name << " xy=" << header.xy << " length=" << (header.clipQualRight-header.clipQualLeft) << endl;
793                                 for (int i = (header.clipQualLeft-1); i < read.qualScores.size(); i++) {   out << read.qualScores[i] << '\t';   }                       
794                         }
795                 }else{
796                         out << ">" << header.name << " xy=" << header.xy << " length=" << read.qualScores.size() << endl;
797                         for (int i = 0; i < read.qualScores.size(); i++) {   out << read.qualScores[i] << '\t';  }
798                 }
799                 
800                 out << endl;
801                 
802                 return 0;
803         }
804         catch(exception& e) {
805                 m->errorOut(e, "SffInfoCommand", "printQualSeqData");
806                 exit(1);
807         }
808 }
809
810 //**********************************************************************************************************************
811 int SffInfoCommand::printFlowSeqData(ofstream& out, seqRead& read, Header& header) {
812         try {
813                 if(header.clipQualRight > header.clipQualLeft){
814                         
815                         int rightIndex = 0;
816                         for (int i = 0; i < header.clipQualRight; i++) {  rightIndex +=  read.flowIndex[i];     }
817
818                         out << header.name << ' ' << rightIndex;
819                         for (int i = 0; i < read.flowgram.size(); i++) { out << setprecision(2) << ' ' << (read.flowgram[i]/(float)100);  }
820                         out << endl;
821                 }
822                 
823                 
824                 return 0;
825         }
826         catch(exception& e) {
827                 m->errorOut(e, "SffInfoCommand", "printFlowSeqData");
828                 exit(1);
829         }
830 }
831 //**********************************************************************************************************************
832 int SffInfoCommand::readAccnosFile(string filename) {
833         try {
834                 //remove old names
835                 seqNames.clear();
836                 
837                 ifstream in;
838                 m->openInputFile(filename, in);
839                 string name;
840                 
841                 while(!in.eof()){
842                         in >> name; m->gobble(in);
843                                                 
844                         seqNames.insert(name);
845                         
846                         if (m->control_pressed) { seqNames.clear(); break; }
847                 }
848                 in.close();             
849                 
850                 return 0;
851         }
852         catch(exception& e) {
853                 m->errorOut(e, "SffInfoCommand", "readAccnosFile");
854                 exit(1);
855         }
856 }
857 //**********************************************************************************************************************
858 int SffInfoCommand::parseSffTxt() {
859         try {
860                 
861                 ifstream inSFF;
862                 m->openInputFile(sfftxtFilename, inSFF);
863                 
864                 if (outputDir == "") {  outputDir += m->hasPath(sfftxtFilename); }
865                 
866                 //output file names
867                 ofstream outFasta, outQual, outFlow;
868                 string outFastaFileName, outQualFileName;
869                 string outFlowFileName = outputDir + m->getRootName(m->getSimpleName(sfftxtFilename)) + "flow";
870                 if (trim) {
871                         outFastaFileName = outputDir + m->getRootName(m->getSimpleName(sfftxtFilename)) + "fasta";
872                         outQualFileName = outputDir + m->getRootName(m->getSimpleName(sfftxtFilename)) + "qual";
873                 }else{
874                         outFastaFileName = outputDir + m->getRootName(m->getSimpleName(sfftxtFilename)) + "raw.fasta";
875                         outQualFileName = outputDir + m->getRootName(m->getSimpleName(sfftxtFilename)) + "raw.qual";
876                 }
877                 
878                 if (fasta)      { m->openOutputFile(outFastaFileName, outFasta);        outputNames.push_back(outFastaFileName); outputTypes["fasta"].push_back(outFastaFileName); }
879                 if (qual)       { m->openOutputFile(outQualFileName, outQual);          outputNames.push_back(outQualFileName); outputTypes["qual"].push_back(outQualFileName);  }
880                 if (flow)       { m->openOutputFile(outFlowFileName, outFlow);          outputNames.push_back(outFlowFileName);  outFlow.setf(ios::fixed, ios::floatfield); outFlow.setf(ios::showpoint); outputTypes["flow"].push_back(outFlowFileName);  }
881                 
882                 //read common header
883                 string commonHeader = m->getline(inSFF);
884                 string magicNumber = m->getline(inSFF); 
885                 string version = m->getline(inSFF);
886                 string indexOffset = m->getline(inSFF);
887                 string indexLength = m->getline(inSFF);
888                 int numReads = parseHeaderLineToInt(inSFF);
889                 string headerLength = m->getline(inSFF);
890                 string keyLength = m->getline(inSFF);
891                 int numFlows = parseHeaderLineToInt(inSFF);
892                 string flowgramCode = m->getline(inSFF);
893                 string flowChars = m->getline(inSFF);
894                 string keySequence = m->getline(inSFF);
895                 m->gobble(inSFF);
896                 
897                 string seqName;
898                 
899                 if (flow)       {       outFlow << numFlows << endl;    }
900                 
901                 for(int i=0;i<numReads;i++){
902                         
903                         //sanity check
904                         if (inSFF.eof()) { m->mothurOut("[ERROR]: Expected " + toString(numReads) + " but reached end of file at " + toString(i+1) + "."); m->mothurOutEndLine(); break; }
905                         
906                         Header header;
907                         
908                         //parse read header
909                         inSFF >> seqName;
910                         seqName = seqName.substr(1);
911                         m->gobble(inSFF);
912                         header.name = seqName;
913                         
914                         string runPrefix = parseHeaderLineToString(inSFF);              header.timestamp = runPrefix;
915                         string regionNumber = parseHeaderLineToString(inSFF);   header.region = regionNumber;
916                         string xyLocation = parseHeaderLineToString(inSFF);             header.xy = xyLocation;
917                         m->gobble(inSFF);
918                                 
919                         string runName = parseHeaderLineToString(inSFF);
920                         string analysisName = parseHeaderLineToString(inSFF);
921                         string fullPath = parseHeaderLineToString(inSFF);
922                         m->gobble(inSFF);
923                         
924                         string readHeaderLen = parseHeaderLineToString(inSFF);  convert(readHeaderLen, header.headerLength);
925                         string nameLength = parseHeaderLineToString(inSFF);             convert(nameLength, header.nameLength);
926                         int numBases = parseHeaderLineToInt(inSFF);                             header.numBases = numBases;
927                         string clipQualLeft = parseHeaderLineToString(inSFF);   convert(clipQualLeft, header.clipQualLeft);
928                         int clipQualRight = parseHeaderLineToInt(inSFF);                header.clipQualRight = clipQualRight;
929                         string clipAdapLeft = parseHeaderLineToString(inSFF);   convert(clipAdapLeft, header.clipAdapterLeft);
930                         string clipAdapRight = parseHeaderLineToString(inSFF);  convert(clipAdapRight, header.clipAdapterRight);
931                         m->gobble(inSFF);
932                                 
933                         seqRead read;
934                         
935                         //parse read
936                         vector<unsigned short> flowVector = parseHeaderLineToFloatVector(inSFF, numFlows);      read.flowgram = flowVector;
937                         vector<unsigned int> flowIndices = parseHeaderLineToIntVector(inSFF, numBases); 
938                         
939                         //adjust for print
940                         vector<unsigned int> flowIndicesAdjusted; flowIndicesAdjusted.push_back(flowIndices[0]);
941                         for (int j = 1; j < flowIndices.size(); j++) {   flowIndicesAdjusted.push_back(flowIndices[j] - flowIndices[j-1]);   }
942                         read.flowIndex = flowIndicesAdjusted;
943                         
944                         string bases = parseHeaderLineToString(inSFF);                                                                          read.bases = bases;
945                         vector<unsigned int> qualityScores = parseHeaderLineToIntVector(inSFF, numBases);       read.qualScores = qualityScores;
946                         m->gobble(inSFF);
947                                         
948                         //if you have provided an accosfile and this seq is not in it, then dont print
949                         bool print = true;
950                         if (seqNames.size() != 0) {   if (seqNames.count(header.name) == 0) { print = false; }  }
951                         
952                         //print 
953                         if (print) {
954                                 if (fasta)      {       printFastaSeqData(outFasta, read, header);      }
955                                 if (qual)       {       printQualSeqData(outQual, read, header);        }
956                                 if (flow)       {       printFlowSeqData(outFlow, read, header);        }
957                         }
958                         
959                         //report progress
960                         if((i+1) % 10000 == 0){ m->mothurOut(toString(i+1)); m->mothurOutEndLine();             }
961                         
962                         if (m->control_pressed) {  break;  }
963                 }
964                 
965                 //report progress
966                 if (!m->control_pressed) {   if((numReads) % 10000 != 0){       m->mothurOut(toString(numReads)); m->mothurOutEndLine();                }  }
967                 
968                 inSFF.close();
969                 
970                 if (fasta)      {  outFasta.close();    }
971                 if (qual)       {  outQual.close();             }
972                 if (flow)       {  outFlow.close();             }
973                 
974                 return 0;
975         }
976         catch(exception& e) {
977                 m->errorOut(e, "SffInfoCommand", "parseSffTxt");
978                 exit(1);
979         }
980 }
981 //**********************************************************************************************************************
982
983 int SffInfoCommand::parseHeaderLineToInt(ifstream& file){
984         try {
985                 int number;
986                 
987                 while (!file.eof())     {
988                         
989                         char c = file.get(); 
990                         if (c == ':'){
991                                 file >> number;
992                                 break;
993                         }
994                         
995                 }
996                 m->gobble(file);
997                 return number;
998         }
999         catch(exception& e) {
1000                 m->errorOut(e, "SffInfoCommand", "parseHeaderLineToInt");
1001                 exit(1);
1002         }
1003         
1004 }
1005
1006 //**********************************************************************************************************************
1007
1008 string SffInfoCommand::parseHeaderLineToString(ifstream& file){
1009         try {
1010                 string text;
1011                 
1012                 while (!file.eof())     {
1013                         char c = file.get(); 
1014                         
1015                         if (c == ':'){
1016                                 //m->gobble(file);
1017                                 //text = m->getline(file);      
1018                                 file >> text;
1019                                 break;
1020                         }
1021                 }
1022                 m->gobble(file);
1023                 
1024                 return text;
1025         }
1026         catch(exception& e) {
1027                 m->errorOut(e, "SffInfoCommand", "parseHeaderLineToString");
1028                 exit(1);
1029         }
1030 }
1031
1032 //**********************************************************************************************************************
1033
1034 vector<unsigned short> SffInfoCommand::parseHeaderLineToFloatVector(ifstream& file, int length){
1035         try {
1036                 vector<unsigned short> floatVector(length);
1037                 
1038                 while (!file.eof())     {
1039                         char c = file.get(); 
1040                         if (c == ':'){
1041                                 float temp;
1042                                 for(int i=0;i<length;i++){
1043                                         file >> temp;
1044                                         floatVector[i] = temp * 100;
1045                                 }
1046                                 break;
1047                         }
1048                 }
1049                 m->gobble(file);        
1050                 return floatVector;
1051         }
1052         catch(exception& e) {
1053                 m->errorOut(e, "SffInfoCommand", "parseHeaderLineToFloatVector");
1054                 exit(1);
1055         }
1056 }
1057
1058 //**********************************************************************************************************************
1059
1060 vector<unsigned int> SffInfoCommand::parseHeaderLineToIntVector(ifstream& file, int length){
1061         try {
1062                 vector<unsigned int> intVector(length);
1063                 
1064                 while (!file.eof())     {
1065                         char c = file.get(); 
1066                         if (c == ':'){
1067                                 for(int i=0;i<length;i++){
1068                                         file >> intVector[i];
1069                                 }
1070                                 break;
1071                         }
1072                 }
1073                 m->gobble(file);        
1074                 return intVector;
1075         }
1076         catch(exception& e) {
1077                 m->errorOut(e, "SffInfoCommand", "parseHeaderLineToIntVector");
1078                 exit(1);
1079         }
1080 }
1081
1082 //**********************************************************************************************************************
1083
1084
1085                                 
1086