]> git.donarmstrong.com Git - mothur.git/blob - sffinfocommand.cpp
changes while testing
[mothur.git] / sffinfocommand.cpp
1 /*
2  *  sffinfocommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 7/7/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "sffinfocommand.h"
11 #include "endiannessmacros.h"
12 #include "trimoligos.h"
13 #include "sequence.hpp"
14 #include "qualityscores.h"
15
16 //**********************************************************************************************************************
17 vector<string> SffInfoCommand::setParameters(){ 
18         try {           
19                 CommandParameter psff("sff", "InputTypes", "", "", "none", "none", "none","",false,false,true); parameters.push_back(psff);
20         CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(poligos);
21                 CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(paccnos);
22                 CommandParameter psfftxt("sfftxt", "String", "", "", "", "", "","",false,false); parameters.push_back(psfftxt);
23                 CommandParameter pflow("flow", "Boolean", "", "T", "", "", "","flow",false,false); parameters.push_back(pflow);
24                 CommandParameter ptrim("trim", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(ptrim);
25                 CommandParameter pfasta("fasta", "Boolean", "", "T", "", "", "","fasta",false,false); parameters.push_back(pfasta);
26                 CommandParameter pqfile("qfile", "Boolean", "", "T", "", "", "","qfile",false,false); parameters.push_back(pqfile);
27         CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ppdiffs);
28                 CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pbdiffs);
29         CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pldiffs);
30                 CommandParameter psdiffs("sdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(psdiffs);
31         CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ptdiffs);
32                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
33                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
34                 
35                 vector<string> myArray;
36                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
37                 return myArray;
38         }
39         catch(exception& e) {
40                 m->errorOut(e, "SffInfoCommand", "setParameters");
41                 exit(1);
42         }
43 }
44 //**********************************************************************************************************************
45 string SffInfoCommand::getHelpString(){ 
46         try {
47                 string helpString = "";
48                 helpString += "The sffinfo command reads a sff file and extracts the sequence data, or you can use it to parse a sfftxt file.\n";
49                 helpString += "The sffinfo command parameters are sff, fasta, qfile, accnos, flow, sfftxt, oligos, bdiffs, tdiffs, ldiffs, sdiffs, pdiffs and trim. sff is required. \n";
50                 helpString += "The sff parameter allows you to enter the sff file you would like to extract data from.  You may enter multiple files by separating them by -'s.\n";
51                 helpString += "The fasta parameter allows you to indicate if you would like a fasta formatted file generated.  Default=True. \n";
52                 helpString += "The qfile parameter allows you to indicate if you would like a quality file generated.  Default=True. \n";
53         helpString += "The oligos parameter allows you to provide an oligos file to split your sff file into separate sff files by barcode. \n";
54         helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n";
55                 helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";
56                 helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
57         helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n";
58                 helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n";
59                 helpString += "The flow parameter allows you to indicate if you would like a flowgram file generated.  Default=True. \n";
60                 helpString += "The sfftxt parameter allows you to indicate if you would like a sff.txt file generated.  Default=False. \n";
61                 helpString += "If you want to parse an existing sfftxt file into flow, fasta and quality file, enter the file name using the sfftxt parameter. \n";
62                 helpString += "The trim parameter allows you to indicate if you would like a sequences and quality scores trimmed to the clipQualLeft and clipQualRight values.  Default=True. \n";
63                 helpString += "The accnos parameter allows you to provide a accnos file containing the names of the sequences you would like extracted. You may enter multiple files by separating them by -'s. \n";
64                 helpString += "Example sffinfo(sff=mySffFile.sff, trim=F).\n";
65                 helpString += "Note: No spaces between parameter labels (i.e. sff), '=' and parameters (i.e.yourSffFileName).\n";
66                 return helpString;
67         }
68         catch(exception& e) {
69                 m->errorOut(e, "SffInfoCommand", "getHelpString");
70                 exit(1);
71         }
72 }
73
74 //**********************************************************************************************************************
75 string SffInfoCommand::getOutputPattern(string type) {
76     try {
77         string pattern = "";
78         
79         if (type == "fasta")            {   pattern =  "[filename],fasta-[filename],[tag],fasta";   }
80         else if (type == "flow")    {   pattern =  "[filename],flow";   }
81         else if (type == "sfftxt")        {   pattern =  "[filename],sff.txt";   }
82         else if (type == "sff")        {   pattern =  "[filename],[group],sff";   }
83         else if (type == "qfile")       {   pattern =  "[filename],qual-[filename],[tag],qual";   }
84         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
85         
86         return pattern;
87     }
88     catch(exception& e) {
89         m->errorOut(e, "SffInfoCommand", "getOutputPattern");
90         exit(1);
91     }
92 }
93 //**********************************************************************************************************************
94 SffInfoCommand::SffInfoCommand(){       
95         try {
96                 abort = true; calledHelp = true; 
97                 setParameters();
98                 vector<string> tempOutNames;
99                 outputTypes["fasta"] = tempOutNames;
100                 outputTypes["flow"] = tempOutNames;
101                 outputTypes["sfftxt"] = tempOutNames;
102                 outputTypes["qfile"] = tempOutNames;
103         outputTypes["sff"] = tempOutNames;
104         }
105         catch(exception& e) {
106                 m->errorOut(e, "SffInfoCommand", "SffInfoCommand");
107                 exit(1);
108         }
109 }
110 //**********************************************************************************************************************
111
112 SffInfoCommand::SffInfoCommand(string option)  {
113         try {
114                 abort = false; calledHelp = false;   
115                 hasAccnos = false; hasOligos = false;
116         split = 1;
117                 
118                 //allow user to run help
119                 if(option == "help") { help(); abort = true; calledHelp = true; }
120                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
121                 
122                 else {
123                         //valid paramters for this command
124                         vector<string> myArray = setParameters();
125                         
126                         OptionParser parser(option);
127                         map<string, string> parameters = parser.getParameters();
128                         
129                         ValidParameters validParameter;
130                         //check to make sure all parameters are valid for command
131                         for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
132                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
133                         }
134                         
135                         //initialize outputTypes
136                         vector<string> tempOutNames;
137                         outputTypes["fasta"] = tempOutNames;
138                         outputTypes["flow"] = tempOutNames;
139                         outputTypes["sfftxt"] = tempOutNames;
140                         outputTypes["qfile"] = tempOutNames;
141             outputTypes["sff"] = tempOutNames;
142                         
143                         //if the user changes the output directory command factory will send this info to us in the output parameter 
144                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
145                         
146                         //if the user changes the input directory command factory will send this info to us in the output parameter 
147                         string inputDir = validParameter.validFile(parameters, "inputdir", false);        if (inputDir == "not found"){ inputDir = "";          }
148
149                         sffFilename = validParameter.validFile(parameters, "sff", false);
150                         if (sffFilename == "not found") { sffFilename = "";  }
151                         else { 
152                                 m->splitAtDash(sffFilename, filenames);
153                                 
154                                 //go through files and make sure they are good, if not, then disregard them
155                                 for (int i = 0; i < filenames.size(); i++) {
156                                         bool ignore = false;
157                                         if (filenames[i] == "current") { 
158                                                 filenames[i] = m->getSFFFile(); 
159                                                 if (filenames[i] != "") {  m->mothurOut("Using " + filenames[i] + " as input file for the sff parameter where you had given current."); m->mothurOutEndLine(); }
160                                                 else {  
161                                                         m->mothurOut("You have no current sfffile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
162                                                         //erase from file list
163                                                         filenames.erase(filenames.begin()+i);
164                                                         i--;
165                                                 }
166                                         }
167                                         
168                                         if (!ignore) {
169                                                 if (inputDir != "") {
170                                                         string path = m->hasPath(filenames[i]);
171                                                         //if the user has not given a path then, add inputdir. else leave path alone.
172                                                         if (path == "") {       filenames[i] = inputDir + filenames[i];         }
173                                                 }
174                 
175                                                 ifstream in;
176                                                 int ableToOpen = m->openInputFile(filenames[i], in, "noerror");
177                                         
178                                                 //if you can't open it, try default location
179                                                 if (ableToOpen == 1) {
180                                                         if (m->getDefaultPath() != "") { //default path is set
181                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(filenames[i]);
182                                                                 m->mothurOut("Unable to open " + filenames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
183                                                                 ifstream in2;
184                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
185                                                                 in2.close();
186                                                                 filenames[i] = tryPath;
187                                                         }
188                                                 }
189                                                 
190                                                 //if you can't open it, try default location
191                                                 if (ableToOpen == 1) {
192                                                         if (m->getOutputDir() != "") { //default path is set
193                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(filenames[i]);
194                                                                 m->mothurOut("Unable to open " + filenames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
195                                                                 ifstream in2;
196                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
197                                                                 in2.close();
198                                                                 filenames[i] = tryPath;
199                                                         }
200                                                 }
201                                                 
202                                                 in.close();
203                                                 
204                                                 if (ableToOpen == 1) { 
205                                                         m->mothurOut("Unable to open " + filenames[i] + ". It will be disregarded."); m->mothurOutEndLine();
206                                                         //erase from file list
207                                                         filenames.erase(filenames.begin()+i);
208                                                         i--;
209                                                 }else { m->setSFFFile(filenames[i]); }
210                                         }
211                                 }
212                                 
213                                 //make sure there is at least one valid file left
214                                 if (filenames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; }
215                         }
216                         
217                         accnosName = validParameter.validFile(parameters, "accnos", false);
218                         if (accnosName == "not found") { accnosName = "";  }
219                         else { 
220                                 hasAccnos = true;
221                                 m->splitAtDash(accnosName, accnosFileNames);
222                                 
223                                 //go through files and make sure they are good, if not, then disregard them
224                                 for (int i = 0; i < accnosFileNames.size(); i++) {
225                                         bool ignore = false;
226                                         if (accnosFileNames[i] == "current") { 
227                                                 accnosFileNames[i] = m->getAccnosFile(); 
228                                                 if (accnosFileNames[i] != "") {  m->mothurOut("Using " + accnosFileNames[i] + " as input file for the accnos parameter where you had given current."); m->mothurOutEndLine(); }
229                                                 else {  
230                                                         m->mothurOut("You have no current accnosfile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
231                                                         //erase from file list
232                                                         accnosFileNames.erase(accnosFileNames.begin()+i);
233                                                         i--;
234                                                 }
235                                         }
236                                         
237                                         if (!ignore) {
238                                         
239                                                 if (inputDir != "") {
240                                                         string path = m->hasPath(accnosFileNames[i]);
241                                                         //if the user has not given a path then, add inputdir. else leave path alone.
242                                                         if (path == "") {       accnosFileNames[i] = inputDir + accnosFileNames[i];             }
243                                                 }
244                 
245                                                 ifstream in;
246                                                 int ableToOpen = m->openInputFile(accnosFileNames[i], in, "noerror");
247                                         
248                                                 //if you can't open it, try default location
249                                                 if (ableToOpen == 1) {
250                                                         if (m->getDefaultPath() != "") { //default path is set
251                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(accnosFileNames[i]);
252                                                                 m->mothurOut("Unable to open " + accnosFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
253                                                                 ifstream in2;
254                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
255                                                                 in2.close();
256                                                                 accnosFileNames[i] = tryPath;
257                                                         }
258                                                 }
259                                                 //if you can't open it, try default location
260                                                 if (ableToOpen == 1) {
261                                                         if (m->getOutputDir() != "") { //default path is set
262                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(accnosFileNames[i]);
263                                                                 m->mothurOut("Unable to open " + accnosFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
264                                                                 ifstream in2;
265                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
266                                                                 in2.close();
267                                                                 accnosFileNames[i] = tryPath;
268                                                         }
269                                                 }
270                                                 in.close();
271                                                 
272                                                 if (ableToOpen == 1) { 
273                                                         m->mothurOut("Unable to open " + accnosFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();
274                                                         //erase from file list
275                                                         accnosFileNames.erase(accnosFileNames.begin()+i);
276                                                         i--;
277                                                 }
278                                         }
279                                 }
280                                 
281                                 //make sure there is at least one valid file left
282                                 if (accnosFileNames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; }
283                         }
284             
285             oligosfile = validParameter.validFile(parameters, "oligos", false);
286                         if (oligosfile == "not found") { oligosfile = "";  }
287                         else { 
288                                 hasOligos = true;
289                                 m->splitAtDash(oligosfile, oligosFileNames);
290                                 
291                                 //go through files and make sure they are good, if not, then disregard them
292                                 for (int i = 0; i < oligosFileNames.size(); i++) {
293                                         bool ignore = false;
294                                         if (oligosFileNames[i] == "current") { 
295                                                 oligosFileNames[i] = m->getOligosFile(); 
296                                                 if (oligosFileNames[i] != "") {  m->mothurOut("Using " + oligosFileNames[i] + " as input file for the accnos parameter where you had given current."); m->mothurOutEndLine(); }
297                                                 else {  
298                                                         m->mothurOut("You have no current oligosfile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
299                                                         //erase from file list
300                                                         oligosFileNames.erase(oligosFileNames.begin()+i);
301                                                         i--;
302                                                 }
303                                         }
304                                         
305                                         if (!ignore) {
306                         
307                                                 if (inputDir != "") {
308                                                         string path = m->hasPath(oligosFileNames[i]);
309                                                         //if the user has not given a path then, add inputdir. else leave path alone.
310                                                         if (path == "") {       oligosFileNames[i] = inputDir + oligosFileNames[i];             }
311                                                 }
312                         
313                                                 ifstream in;
314                                                 int ableToOpen = m->openInputFile(oligosFileNames[i], in, "noerror");
315                         
316                                                 //if you can't open it, try default location
317                                                 if (ableToOpen == 1) {
318                                                         if (m->getDefaultPath() != "") { //default path is set
319                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(oligosFileNames[i]);
320                                                                 m->mothurOut("Unable to open " + oligosFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
321                                                                 ifstream in2;
322                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
323                                                                 in2.close();
324                                                                 oligosFileNames[i] = tryPath;
325                                                         }
326                                                 }
327                                                 //if you can't open it, try default location
328                                                 if (ableToOpen == 1) {
329                                                         if (m->getOutputDir() != "") { //default path is set
330                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(oligosFileNames[i]);
331                                                                 m->mothurOut("Unable to open " + oligosFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
332                                                                 ifstream in2;
333                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
334                                                                 in2.close();
335                                                                 oligosFileNames[i] = tryPath;
336                                                         }
337                                                 }
338                                                 in.close();
339                                                 
340                                                 if (ableToOpen == 1) { 
341                                                         m->mothurOut("Unable to open " + oligosFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();
342                                                         //erase from file list
343                                                         oligosFileNames.erase(oligosFileNames.begin()+i);
344                                                         i--;
345                                                 }
346                                         }
347                                 }
348                                 
349                                 //make sure there is at least one valid file left
350                                 if (oligosFileNames.size() == 0) { m->mothurOut("no valid oligos files."); m->mothurOutEndLine(); abort = true; }
351                         }
352
353                         if (hasOligos) {
354                 split = 2;
355                                 if (oligosFileNames.size() != filenames.size()) { abort = true; m->mothurOut("If you provide a oligos file, you must have one for each sff file."); m->mothurOutEndLine(); }
356                         }
357             
358                         if (hasAccnos) {
359                                 if (accnosFileNames.size() != filenames.size()) { abort = true; m->mothurOut("If you provide a accnos file, you must have one for each sff file."); m->mothurOutEndLine(); }
360                         }
361                         
362                         string temp = validParameter.validFile(parameters, "qfile", false);                     if (temp == "not found"){       temp = "T";                             }
363                         qual = m->isTrue(temp); 
364                         
365                         temp = validParameter.validFile(parameters, "fasta", false);                            if (temp == "not found"){       temp = "T";                             }
366                         fasta = m->isTrue(temp); 
367                         
368                         temp = validParameter.validFile(parameters, "flow", false);                                     if (temp == "not found"){       temp = "T";                             }
369                         flow = m->isTrue(temp); 
370                         
371                         temp = validParameter.validFile(parameters, "trim", false);                                     if (temp == "not found"){       temp = "T";                             }
372                         trim = m->isTrue(temp); 
373             
374             temp = validParameter.validFile(parameters, "bdiffs", false);               if (temp == "not found") { temp = "0"; }
375                         m->mothurConvert(temp, bdiffs);
376                         
377                         temp = validParameter.validFile(parameters, "pdiffs", false);           if (temp == "not found") { temp = "0"; }
378                         m->mothurConvert(temp, pdiffs);
379             
380             temp = validParameter.validFile(parameters, "ldiffs", false);               if (temp == "not found") { temp = "0"; }
381                         m->mothurConvert(temp, ldiffs);
382             
383             temp = validParameter.validFile(parameters, "sdiffs", false);               if (temp == "not found") { temp = "0"; }
384                         m->mothurConvert(temp, sdiffs);
385                         
386                         temp = validParameter.validFile(parameters, "tdiffs", false);           if (temp == "not found") { int tempTotal = pdiffs + bdiffs + ldiffs + sdiffs;  temp = toString(tempTotal); }
387                         m->mothurConvert(temp, tdiffs);
388                         
389                         if(tdiffs == 0){        tdiffs = bdiffs + pdiffs + ldiffs + sdiffs;     }
390             
391                         temp = validParameter.validFile(parameters, "sfftxt", false);                           
392                         if (temp == "not found")        {       temp = "F";      sfftxt = false; sfftxtFilename = "";           }
393                         else if (m->isTrue(temp))       {       sfftxt = true;          sfftxtFilename = "";                            }
394                         else {
395                                 //you are a filename
396                                 if (inputDir != "") {
397                                         map<string,string>::iterator it = parameters.find("sfftxt");
398                                         //user has given a template file
399                                         if(it != parameters.end()){ 
400                                                 string path = m->hasPath(it->second);
401                                                 //if the user has not given a path then, add inputdir. else leave path alone.
402                                                 if (path == "") {       parameters["sfftxt"] = inputDir + it->second;           }
403                                         }
404                                 }
405                                 
406                                 sfftxtFilename = validParameter.validFile(parameters, "sfftxt", true);
407                                 if (sfftxtFilename == "not found") { sfftxtFilename = "";  }
408                                 else if (sfftxtFilename == "not open") { sfftxtFilename = "";  }
409                         }
410                         
411                         if ((sfftxtFilename == "") && (filenames.size() == 0)) {  
412                                 //if there is a current sff file, use it
413                                 string filename = m->getSFFFile(); 
414                                 if (filename != "") { filenames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the sff parameter."); m->mothurOutEndLine(); }
415                                 else {  m->mothurOut("[ERROR]: you must provide a valid sff or sfftxt file."); m->mothurOutEndLine(); abort=true;  }
416                         }
417             
418             
419                 }
420         }
421         catch(exception& e) {
422                 m->errorOut(e, "SffInfoCommand", "SffInfoCommand");
423                 exit(1);
424         }
425 }
426 //**********************************************************************************************************************
427 int SffInfoCommand::execute(){
428         try {
429                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
430                 
431                 for (int s = 0; s < filenames.size(); s++) {
432                         
433                         if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]);        } return 0; }
434                         
435                         int start = time(NULL);
436                         
437             filenames[s] = m->getFullPathName(filenames[s]);
438                         m->mothurOut("Extracting info from " + filenames[s] + " ..." ); m->mothurOutEndLine();
439                         
440                         string accnos = "";
441                         if (hasAccnos) { accnos = accnosFileNames[s]; }
442             
443             string oligos = "";
444             if (hasOligos) { oligos = oligosFileNames[s]; }
445                         
446                         int numReads = extractSffInfo(filenames[s], accnos, oligos);
447
448                         m->mothurOut("It took " + toString(time(NULL) - start) + " secs to extract " + toString(numReads) + ".");
449                 }
450                 
451                 if (sfftxtFilename != "") {  parseSffTxt(); }
452                 
453                 if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]);        } return 0; }
454                 
455                 //set fasta file as new current fastafile
456                 string current = "";
457                 itTypes = outputTypes.find("fasta");
458                 if (itTypes != outputTypes.end()) {
459                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
460                 }
461                 
462                 itTypes = outputTypes.find("qfile");
463                 if (itTypes != outputTypes.end()) {
464                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); }
465                 }
466                 
467                 itTypes = outputTypes.find("flow");
468                 if (itTypes != outputTypes.end()) {
469                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFlowFile(current); }
470                 }
471                 
472                 //report output filenames
473                 m->mothurOutEndLine();
474                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
475                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
476                 m->mothurOutEndLine();
477
478                 return 0;
479         }
480         catch(exception& e) {
481                 m->errorOut(e, "SffInfoCommand", "execute");
482                 exit(1);
483         }
484 }
485 //**********************************************************************************************************************
486 int SffInfoCommand::extractSffInfo(string input, string accnos, string oligos){
487         try {
488                 currentFileName = input;
489                 if (outputDir == "") {  outputDir += m->hasPath(input); }
490                 
491                 if (accnos != "")       {  readAccnosFile(accnos);  }
492                 else                            {       seqNames.clear();               }
493          
494         if (oligos != "")   {   readOligos(oligos);  split = 2;   }
495
496                 ofstream outSfftxt, outFasta, outQual, outFlow;
497                 string outFastaFileName, outQualFileName;
498         string rootName = outputDir + m->getRootName(m->getSimpleName(input));
499         if(rootName.find_last_of(".") == rootName.npos){ rootName += "."; }
500         
501         map<string, string> variables; 
502                 variables["[filename]"] = rootName;
503                 string sfftxtFileName = getOutputFileName("sfftxt",variables);
504                 string outFlowFileName = getOutputFileName("flow",variables);
505                 if (!trim) { variables["[tag]"] = "raw"; }
506                 outFastaFileName = getOutputFileName("fasta",variables);
507         outQualFileName = getOutputFileName("qfile",variables);
508         
509                 if (sfftxt) { m->openOutputFile(sfftxtFileName, outSfftxt); outSfftxt.setf(ios::fixed, ios::floatfield); outSfftxt.setf(ios::showpoint);  outputNames.push_back(sfftxtFileName);  outputTypes["sfftxt"].push_back(sfftxtFileName); }
510                 if (fasta)      { m->openOutputFile(outFastaFileName, outFasta);        outputNames.push_back(outFastaFileName); outputTypes["fasta"].push_back(outFastaFileName); }
511                 if (qual)       { m->openOutputFile(outQualFileName, outQual);          outputNames.push_back(outQualFileName); outputTypes["qfile"].push_back(outQualFileName);  }
512                 if (flow)       { m->openOutputFile(outFlowFileName, outFlow);          outputNames.push_back(outFlowFileName);  outFlow.setf(ios::fixed, ios::floatfield); outFlow.setf(ios::showpoint); outputTypes["flow"].push_back(outFlowFileName);  }
513                 
514                 ifstream in;
515                 in.open(input.c_str(), ios::binary);
516                 
517                 CommonHeader header; 
518                 readCommonHeader(in, header);
519         
520                 int count = 0;
521                 mycount = 0;
522                 
523                 //check magic number and version
524                 if (header.magicNumber != 779314790) { m->mothurOut("Magic Number is not correct, not a valid .sff file"); m->mothurOutEndLine(); return count; }
525                 if (header.version != "0001") { m->mothurOut("Version is not supported, only support version 0001."); m->mothurOutEndLine(); return count; }
526         
527                 //print common header
528                 if (sfftxt) {   printCommonHeader(outSfftxt, header);           }
529                 if (flow)       {       outFlow << header.numFlowsPerRead << endl;      }
530                         
531                 //read through the sff file
532                 while (!in.eof()) {
533                         
534                         bool print = true;
535                                                 
536                         //read data
537                         seqRead read;  Header readheader;
538                         readSeqData(in, read, header.numFlowsPerRead, readheader);
539             bool okay = sanityCheck(readheader, read);
540             if (!okay) { break; }
541             
542                         //if you have provided an accosfile and this seq is not in it, then dont print
543                         if (seqNames.size() != 0) {   if (seqNames.count(readheader.name) == 0) { print = false; }  }
544                         
545                         //print 
546                         if (print) {
547                                 if (sfftxt) { printHeader(outSfftxt, readheader); printSffTxtSeqData(outSfftxt, read, readheader); }
548                                 if (fasta)      {       printFastaSeqData(outFasta, read, readheader);  }
549                                 if (qual)       {       printQualSeqData(outQual, read, readheader);    }
550                                 if (flow)       {       printFlowSeqData(outFlow, read, readheader);    }
551                         }
552                         
553                         count++;
554                         mycount++;
555         
556                         //report progress
557                         if((count+1) % 10000 == 0){     m->mothurOut(toString(count+1)); m->mothurOutEndLine();         }
558                 
559                         if (m->control_pressed) { count = 0; break;   }
560                         
561                         if (count >= header.numReads) { break; }
562             //if (count >= 100) { break; }
563                 }
564                 
565                 //report progress
566                 if (!m->control_pressed) {   if((count) % 10000 != 0){  m->mothurOut(toString(count)); m->mothurOutEndLine();           }  }
567                 
568                 in.close();
569                 
570                 if (sfftxt) {  outSfftxt.close();       }
571                 if (fasta)      {  outFasta.close();    }
572                 if (qual)       {  outQual.close();             }
573                 if (flow)       {  outFlow.close();             }
574                 
575         if (split > 1) {
576             //create new common headers for each file with the correct number of reads
577             adjustCommonHeader(header);
578             
579                         map<string, string>::iterator it;
580                         set<string> namesToRemove;
581                         for(int i=0;i<filehandles.size();i++){
582                                 for(int j=0;j<filehandles[0].size();j++){
583                                         if (filehandles[i][j] != "") {
584                                                 if (namesToRemove.count(filehandles[i][j]) == 0) {
585                                                         if(m->isBlank(filehandles[i][j])){
586                                                                 m->mothurRemove(filehandles[i][j]);
587                                 m->mothurRemove(filehandlesHeaders[i][j]);
588                                                                 namesToRemove.insert(filehandles[i][j]);
589                             }
590                                                 }
591                                         }
592                                 }
593                         }
594             
595             //append new header to reads
596             for (int i = 0; i < filehandles.size(); i++) {
597                 for (int j = 0; j < filehandles[i].size(); j++) {
598                     m->appendFiles(filehandles[i][j], filehandlesHeaders[i][j]);
599                     m->renameFile(filehandlesHeaders[i][j], filehandles[i][j]);
600                     m->mothurRemove(filehandlesHeaders[i][j]);
601                     if (numSplitReads[i][j] == 0) { m->mothurRemove(filehandles[i][j]); }
602                 }
603             }
604                         
605                         //remove names for outputFileNames, just cleans up the output
606                         for(int i = 0; i < outputNames.size(); i++) { 
607                 if (namesToRemove.count(outputNames[i]) != 0) { 
608                     outputNames.erase(outputNames.begin()+i);
609                     i--;
610                 } 
611             }
612             
613             if(m->isBlank(noMatchFile)){  m->mothurRemove(noMatchFile); }
614             else { outputNames.push_back(noMatchFile); outputTypes["sff"].push_back(noMatchFile); }
615         }
616         
617                 return count;
618         }
619         catch(exception& e) {
620                 m->errorOut(e, "SffInfoCommand", "extractSffInfo");
621                 exit(1);
622         }
623 }
624 //**********************************************************************************************************************
625 int SffInfoCommand::readCommonHeader(ifstream& in, CommonHeader& header){
626         try {
627         
628                 if (!in.eof()) {
629
630                         //read magic number
631                         char buffer[4];
632                         in.read(buffer, 4);
633                         header.magicNumber = be_int4(*(unsigned int *)(&buffer));
634             
635                         //read version
636                         char buffer9[4];
637                         in.read(buffer9, 4);
638                         header.version = "";
639                         for (int i = 0; i < 4; i++) {  header.version += toString((int)(buffer9[i]));  }
640     
641                         //read offset
642                         char buffer2 [8];
643                         in.read(buffer2, 8);
644                         header.indexOffset =  be_int8(*(unsigned long long *)(&buffer2));
645                         
646                         //read index length
647                         char buffer3 [4];
648                         in.read(buffer3, 4);
649                         header.indexLength =  be_int4(*(unsigned int *)(&buffer3));
650             
651                         //read num reads
652                         char buffer4 [4];
653                         in.read(buffer4, 4);
654                         header.numReads =  be_int4(*(unsigned int *)(&buffer4));
655             
656             if (m->debug) { m->mothurOut("[DEBUG]: numReads = " + toString(header.numReads) + "\n"); }
657                                 
658                         //read header length
659                         char buffer5 [2];
660                         in.read(buffer5, 2);
661                         header.headerLength =  be_int2(*(unsigned short *)(&buffer5));
662                                         
663                         //read key length
664                         char buffer6 [2];
665                         in.read(buffer6, 2);
666                         header.keyLength = be_int2(*(unsigned short *)(&buffer6));
667                         
668                         //read number of flow reads
669                         char buffer7 [2];
670                         in.read(buffer7, 2);
671                         header.numFlowsPerRead =  be_int2(*(unsigned short *)(&buffer7));
672                                 
673                         //read format code
674                         char buffer8 [1];
675                         in.read(buffer8, 1);
676                         header.flogramFormatCode = (int)(buffer8[0]);
677                         
678                         //read flow chars
679                         char* tempBuffer = new char[header.numFlowsPerRead];
680                         in.read(&(*tempBuffer), header.numFlowsPerRead); 
681                         header.flowChars = tempBuffer;
682                         if (header.flowChars.length() > header.numFlowsPerRead) { header.flowChars = header.flowChars.substr(0, header.numFlowsPerRead);  }
683                         delete[] tempBuffer;
684                         
685                         //read key
686                         char* tempBuffer2 = new char[header.keyLength];
687                         in.read(&(*tempBuffer2), header.keyLength);
688                         header.keySequence = tempBuffer2;
689                         if (header.keySequence.length() > header.keyLength) { header.keySequence = header.keySequence.substr(0, header.keyLength);  }
690                         delete[] tempBuffer2;
691                         
692                         /* Pad to 8 chars */
693                         unsigned long long spotInFile = in.tellg();
694                         unsigned long long spot = (spotInFile + 7)& ~7;  // ~ inverts
695                         in.seekg(spot);
696             
697         }else{
698                         m->mothurOut("Error reading sff common header."); m->mothurOutEndLine();
699                 }
700         
701                 return 0;
702         
703         }
704         catch(exception& e) {
705                 m->errorOut(e, "SffInfoCommand", "readCommonHeader");
706                 exit(1);
707         }
708 }
709 //**********************************************************************************************************************
710 int SffInfoCommand::adjustCommonHeader(CommonHeader header){
711         try {
712
713         char* mybuffer = new char[4];
714         ifstream in;
715         in.open(currentFileName.c_str(), ios::binary);
716         
717         //magic number
718         in.read(mybuffer,4);
719         for (int i = 0; i < filehandlesHeaders.size(); i++) {  
720             for (int j = 0; j < filehandlesHeaders[i].size(); j++) {
721                 ofstream out;
722                 m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);
723                 out.write(mybuffer, in.gcount()); 
724                 out.close();
725             }
726         }
727         delete[] mybuffer;
728         
729         //version
730         mybuffer = new char[4];
731         in.read(mybuffer,4);
732         for (int i = 0; i < filehandlesHeaders.size(); i++) {  
733             for (int j = 0; j < filehandlesHeaders[i].size(); j++) {
734                 ofstream out;
735                 m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);
736                 out.write(mybuffer, in.gcount()); 
737                 out.close();
738             }
739         }
740         delete[] mybuffer;
741         
742         //offset
743         mybuffer = new char[8];
744         in.read(mybuffer,8);
745         for (int i = 0; i < filehandlesHeaders.size(); i++) {  
746             for (int j = 0; j < filehandlesHeaders[i].size(); j++) {
747                 unsigned long long offset = 0;
748                 char* thisbuffer = new char[8];
749                 thisbuffer[0] = (offset >> 56) & 0xFF;
750                 thisbuffer[1] = (offset >> 48) & 0xFF;
751                 thisbuffer[2] = (offset >> 40) & 0xFF;
752                 thisbuffer[3] = (offset >> 32) & 0xFF;
753                 thisbuffer[4] = (offset >> 24) & 0xFF;
754                 thisbuffer[5] = (offset >> 16) & 0xFF;
755                 thisbuffer[6] = (offset >> 8) & 0xFF;
756                 thisbuffer[7] = offset & 0xFF;
757                 ofstream out;
758                 m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);
759                 out.write(thisbuffer, 8);
760                 out.close();
761             }
762         }
763         delete[] mybuffer;
764             
765                         
766         //read index length
767                 mybuffer = new char[4];
768         in.read(mybuffer,4);
769         for (int i = 0; i < filehandlesHeaders.size(); i++) {  
770             for (int j = 0; j < filehandlesHeaders[i].size(); j++) {
771                 ofstream out;
772                 m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);
773                 unsigned int offset = 0;
774                 char* thisbuffer = new char[4];
775                 thisbuffer[0] = (offset >> 24) & 0xFF;
776                 thisbuffer[1] = (offset >> 16) & 0xFF;
777                 thisbuffer[2] = (offset >> 8) & 0xFF;
778                 thisbuffer[3] = offset & 0xFF;
779                 out.write(thisbuffer, 4);
780                 out.close();
781             }
782         }
783         delete[] mybuffer;
784                 
785         //change num reads
786         mybuffer = new char[4];
787         in.read(mybuffer,4);
788         delete[] mybuffer;
789         for (int i = 0; i < filehandlesHeaders.size(); i++) {  
790             for (int j = 0; j < filehandlesHeaders[i].size(); j++) {
791                 ofstream out;
792                 m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);
793                 //convert number of reads to 4 byte char*
794                 char* thisbuffer = new char[4];
795                 if ((m->findEdianness()) == "BIG_ENDIAN") {
796                     thisbuffer[0] = (numSplitReads[i][j] >> 24) & 0xFF;
797                     thisbuffer[1] = (numSplitReads[i][j] >> 16) & 0xFF;
798                     thisbuffer[2] = (numSplitReads[i][j] >> 8) & 0xFF;
799                     thisbuffer[3] = numSplitReads[i][j] & 0xFF;
800                 }else {
801                     thisbuffer[0] = numSplitReads[i][j] & 0xFF;
802                     thisbuffer[1] = (numSplitReads[i][j] >> 8) & 0xFF;
803                     thisbuffer[2] = (numSplitReads[i][j] >> 16) & 0xFF;
804                     thisbuffer[3] = (numSplitReads[i][j] >> 24) & 0xFF;
805                  }
806                 out.write(thisbuffer, 4);
807                 out.close();
808                 delete[] thisbuffer;
809             }
810         }
811             
812         //read header length
813         mybuffer = new char[2];
814         in.read(mybuffer,2);
815         for (int i = 0; i < filehandlesHeaders.size(); i++) {  
816             for (int j = 0; j < filehandlesHeaders[i].size(); j++) {
817                 ofstream out;
818                 m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);
819                 out.write(mybuffer, in.gcount()); 
820                 out.close();
821             }
822         }
823         delete[] mybuffer;
824             
825         //read key length
826         mybuffer = new char[2];
827         in.read(mybuffer,2);
828         for (int i = 0; i < filehandlesHeaders.size(); i++) {  
829             for (int j = 0; j < filehandlesHeaders[i].size(); j++) {
830                 ofstream out;
831                 m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);
832                 out.write(mybuffer, in.gcount()); 
833                 out.close();
834             }
835         }
836         delete[] mybuffer;
837                         
838         //read number of flow reads
839         mybuffer = new char[2];
840         in.read(mybuffer,2);
841         for (int i = 0; i < filehandlesHeaders.size(); i++) {  
842             for (int j = 0; j < filehandlesHeaders[i].size(); j++) {
843                 ofstream out;
844                 m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);
845                 out.write(mybuffer, in.gcount()); 
846                 out.close();
847             }
848         }
849         delete[] mybuffer;
850             
851         //read format code
852         mybuffer = new char[1];
853         in.read(mybuffer,1);
854         for (int i = 0; i < filehandlesHeaders.size(); i++) {  
855             for (int j = 0; j < filehandlesHeaders[i].size(); j++) {
856                 ofstream out;
857                 m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);
858                 out.write(mybuffer, in.gcount()); 
859                 out.close();
860             }
861         }
862         delete[] mybuffer;
863                         
864         //read flow chars
865         mybuffer = new char[header.numFlowsPerRead];
866         in.read(mybuffer,header.numFlowsPerRead);
867         for (int i = 0; i < filehandlesHeaders.size(); i++) {  
868             for (int j = 0; j < filehandlesHeaders[i].size(); j++) {
869                 ofstream out;
870                 m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);
871                 out.write(mybuffer, in.gcount()); 
872                 out.close();
873             }
874         }
875         delete[] mybuffer;
876                         
877         //read key
878         mybuffer = new char[header.keyLength];
879         in.read(mybuffer,header.keyLength);
880         for (int i = 0; i < filehandlesHeaders.size(); i++) {  
881             for (int j = 0; j < filehandlesHeaders[i].size(); j++) {
882                 ofstream out;
883                 m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);
884                 out.write(mybuffer, in.gcount()); 
885                 out.close();
886             }
887         }
888         delete[] mybuffer;
889         
890                         
891         /* Pad to 8 chars */
892         unsigned long long spotInFile = in.tellg();
893         unsigned long long spot = (spotInFile + 7)& ~7;  // ~ inverts
894         in.seekg(spot);
895         
896         mybuffer = new char[spot-spotInFile];
897         for (int i = 0; i < filehandlesHeaders.size(); i++) { 
898             for (int j = 0; j < filehandlesHeaders[i].size(); j++) {
899                 ofstream out;
900                 m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);
901                 out.write(mybuffer, spot-spotInFile); 
902                 out.close();
903             }
904         }
905         delete[] mybuffer;
906         in.close();
907                 return 0;
908         
909         }
910         catch(exception& e) {
911                 m->errorOut(e, "SffInfoCommand", "adjustCommonHeader");
912                 exit(1);
913         }
914 }
915 //**********************************************************************************************************************
916 int SffInfoCommand::readSeqData(ifstream& in, seqRead& read, int numFlowReads, Header& header){
917         try {
918         unsigned long long startSpotInFile = in.tellg();
919                 if (!in.eof()) {
920             
921             /*****************************************/
922             //read header
923             
924             //read header length
925                         char buffer [2];
926                         in.read(buffer, 2); 
927                         header.headerLength = be_int2(*(unsigned short *)(&buffer));
928             
929                         //read name length
930                         char buffer2 [2];
931                         in.read(buffer2, 2);
932                         header.nameLength = be_int2(*(unsigned short *)(&buffer2));
933             
934                         //read num bases
935                         char buffer3 [4];
936                         in.read(buffer3, 4);
937                         header.numBases =  be_int4(*(unsigned int *)(&buffer3));
938             
939                         
940                         //read clip qual left
941                         char buffer4 [2];
942                         in.read(buffer4, 2);
943                         header.clipQualLeft =  be_int2(*(unsigned short *)(&buffer4));
944                         header.clipQualLeft = 5;
945             
946                         
947                         //read clip qual right
948                         char buffer5 [2];
949                         in.read(buffer5, 2);
950                         header.clipQualRight =  be_int2(*(unsigned short *)(&buffer5));
951            
952             
953                         //read clipAdapterLeft
954                         char buffer6 [2];
955                         in.read(buffer6, 2);
956                         header.clipAdapterLeft = be_int2(*(unsigned short *)(&buffer6));
957             
958             
959                         //read clipAdapterRight
960                         char buffer7 [2];
961                         in.read(buffer7, 2);
962                         header.clipAdapterRight = be_int2(*(unsigned short *)(&buffer7));
963             
964             
965                         //read name
966                         char* tempBuffer = new char[header.nameLength];
967                         in.read(&(*tempBuffer), header.nameLength);
968                         header.name = tempBuffer;
969                         if (header.name.length() > header.nameLength) { header.name = header.name.substr(0, header.nameLength);  }
970             
971                         delete[] tempBuffer;
972                         
973                         //extract info from name
974                         decodeName(header.timestamp, header.region, header.xy, header.name);
975                         
976                         /* Pad to 8 chars */
977                         unsigned long long spotInFile = in.tellg();
978                         unsigned long long spot = (spotInFile + 7)& ~7;
979                         in.seekg(spot);
980
981             /*****************************************/
982             //sequence read 
983             
984                         //read flowgram
985                         read.flowgram.resize(numFlowReads);
986                         for (int i = 0; i < numFlowReads; i++) {  
987                                 char buffer [2];
988                                 in.read(buffer, 2);
989                                 read.flowgram[i] = be_int2(*(unsigned short *)(&buffer));
990                         }
991             
992                         //read flowIndex
993                         read.flowIndex.resize(header.numBases);
994                         for (int i = 0; i < header.numBases; i++) {  
995                                 char temp[1];
996                                 in.read(temp, 1);
997                                 read.flowIndex[i] = be_int1(*(unsigned char *)(&temp));
998                         }
999         
1000                         //read bases
1001                         char* tempBuffer6 = new char[header.numBases];
1002                         in.read(&(*tempBuffer6), header.numBases);
1003                         read.bases = tempBuffer6;
1004                         if (read.bases.length() > header.numBases) { read.bases = read.bases.substr(0, header.numBases);  }
1005                         delete[] tempBuffer6;
1006
1007                         //read qual scores
1008                         read.qualScores.resize(header.numBases);
1009                         for (int i = 0; i < header.numBases; i++) {  
1010                                 char temp[1];
1011                                 in.read(temp, 1);
1012                                 read.qualScores[i] = be_int1(*(unsigned char *)(&temp));
1013                         }
1014         
1015                         /* Pad to 8 chars */
1016                         spotInFile = in.tellg();
1017                         spot = (spotInFile + 7)& ~7;
1018                         in.seekg(spot);
1019             
1020             if (split > 1) {
1021                 char * mybuffer;
1022                 mybuffer = new char [spot-startSpotInFile];
1023                 ifstream in2;
1024                 in2.open(currentFileName.c_str(), ios::binary);
1025                 in2.seekg(startSpotInFile);
1026                 in2.read(mybuffer,spot-startSpotInFile);
1027                 in2.close();
1028                 
1029                 int barcodeIndex, primerIndex;
1030                 int trashCodeLength = findGroup(header, read, barcodeIndex, primerIndex);
1031                                 
1032                 if(trashCodeLength == 0){
1033                     //cout << header.name << " length = " << spot << '\t' << startSpotInFile << '\t' << in2.gcount() << endl;
1034                     
1035                     ofstream out;
1036                     m->openOutputFileBinaryAppend(filehandles[barcodeIndex][primerIndex], out);
1037                     out.write(mybuffer, in2.gcount()); 
1038                     out.close();
1039                     numSplitReads[barcodeIndex][primerIndex]++;
1040                                 }
1041                                 else{
1042                                         ofstream out;
1043                     m->openOutputFileBinaryAppend(noMatchFile, out);
1044                     out.write(mybuffer, in2.gcount()); 
1045                     out.close();
1046                                 }
1047                                 delete[] mybuffer;
1048                         }
1049                 }else{
1050                         m->mothurOut("Error reading."); m->mothurOutEndLine();
1051                 }
1052
1053                 return 0;
1054         }
1055         catch(exception& e) {
1056                 m->errorOut(e, "SffInfoCommand", "readSeqData");
1057                 exit(1);
1058         }
1059 }
1060 //**********************************************************************************************************************
1061 int SffInfoCommand::findGroup(Header header, seqRead read, int& barcode, int& primer) {
1062         try {
1063         //find group read belongs to
1064         TrimOligos trimOligos(pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, revPrimer, linker, spacer);
1065         
1066         int success = 1;
1067         string trashCode = "";
1068         int currentSeqsDiffs = 0;
1069         
1070         string seq = read.bases;
1071         
1072         if (trim) {
1073             if(header.clipQualRight < header.clipQualLeft){
1074                 if (header.clipQualRight == 0) { //don't trim right
1075                     seq = seq.substr(header.clipQualLeft-1);
1076                 }else {
1077                     seq = "NNNN";
1078                 }
1079             }
1080             else if((header.clipQualRight != 0) && ((header.clipQualRight-header.clipQualLeft) >= 0)){
1081                 seq = seq.substr((header.clipQualLeft-1), (header.clipQualRight-header.clipQualLeft));
1082             }
1083             else {
1084                 seq = seq.substr(header.clipQualLeft-1);
1085             }
1086         }else{
1087             //if you wanted the sfftxt then you already converted the bases to the right case
1088             if (!sfftxt) {
1089                 int endValue = header.clipQualRight;
1090                 //make the bases you want to clip lowercase and the bases you want to keep upper case
1091                 if(endValue == 0){      endValue = seq.length();        }
1092                 for (int i = 0; i < (header.clipQualLeft-1); i++) { seq[i] = tolower(seq[i]);  }
1093                 for (int i = (header.clipQualLeft-1); i < (endValue-1); i++)  {   seq[i] = toupper(seq[i]);  }
1094                 for (int i = (endValue-1); i < seq.length(); i++) {   seq[i] = tolower(seq[i]);  }
1095             }
1096         }
1097         
1098         Sequence currSeq(header.name, seq);
1099         QualityScores currQual;
1100         
1101         if(numLinkers != 0){
1102             success = trimOligos.stripLinker(currSeq, currQual);
1103             if(success > ldiffs)                {       trashCode += 'k';       }
1104             else{ currentSeqsDiffs += success;  }
1105             
1106         }
1107         
1108         if(barcodes.size() != 0){
1109             success = trimOligos.stripBarcode(currSeq, currQual, barcode);
1110             if(success > bdiffs)                {       trashCode += 'b';       }
1111             else{ currentSeqsDiffs += success;  }
1112         }
1113         
1114         if(numSpacers != 0){
1115             success = trimOligos.stripSpacer(currSeq, currQual);
1116             if(success > sdiffs)                {       trashCode += 's';       }
1117             else{ currentSeqsDiffs += success;  }
1118             
1119         }
1120         
1121         if(numFPrimers != 0){
1122             success = trimOligos.stripForward(currSeq, currQual, primer, true);
1123             if(success > pdiffs)                {       trashCode += 'f';       }
1124             else{ currentSeqsDiffs += success;  }
1125         }
1126         
1127         if (currentSeqsDiffs > tdiffs)  {       trashCode += 't';   }
1128         
1129         if(revPrimer.size() != 0){
1130             success = trimOligos.stripReverse(currSeq, currQual);
1131             if(!success)                                {       trashCode += 'r';       }
1132         }
1133
1134         
1135         return trashCode.length();
1136     }
1137         catch(exception& e) {
1138                 m->errorOut(e, "SffInfoCommand", "findGroup");
1139                 exit(1);
1140         }
1141 }     
1142 //**********************************************************************************************************************
1143 int SffInfoCommand::decodeName(string& timestamp, string& region, string& xy, string name) {
1144         try {
1145                 
1146                 if (name.length() >= 6) {
1147                         string time = name.substr(0, 6);
1148                         unsigned int timeNum = m->fromBase36(time);
1149                         
1150                         int q1 = timeNum / 60;
1151                         int sec = timeNum - 60 * q1;
1152                         int q2 = q1 / 60;
1153                         int minute = q1 - 60 * q2;
1154                         int q3 = q2 / 24;
1155                         int hr = q2 - 24 * q3;
1156                         int q4 = q3 / 32;
1157                         int day = q3 - 32 * q4;
1158                         int q5 = q4 / 13;
1159                         int mon = q4 - 13 * q5;
1160                         int year = 2000 + q5;
1161                 
1162                         timestamp = toString(year) + "_" + toString(mon) + "_" + toString(day) + "_" + toString(hr) + "_" + toString(minute) + "_" + toString(sec);
1163                 }
1164                 
1165                 if (name.length() >= 9) {
1166                         region = name.substr(7, 2);
1167                 
1168                         string xyNum = name.substr(9);
1169                         unsigned int myXy = m->fromBase36(xyNum);
1170                         int x = myXy >> 12;
1171                         int y = myXy & 4095;
1172                 
1173                         xy = toString(x) + "_" + toString(y);
1174                 }
1175                 
1176                 return 0;
1177         }
1178         catch(exception& e) {
1179                 m->errorOut(e, "SffInfoCommand", "decodeName");
1180                 exit(1);
1181         }
1182 }
1183 //**********************************************************************************************************************
1184 int SffInfoCommand::printCommonHeader(ofstream& out, CommonHeader& header) {
1185         try {
1186         
1187                 out << "Common Header:\nMagic Number: " << header.magicNumber << endl;
1188                 out << "Version: " << header.version << endl;
1189                 out << "Index Offset: " << header.indexOffset << endl;
1190                 out << "Index Length: " << header.indexLength << endl;
1191                 out << "Number of Reads: " << header.numReads << endl;
1192                 out << "Header Length: " << header.headerLength << endl;
1193                 out << "Key Length: " << header.keyLength << endl;
1194                 out << "Number of Flows: " << header.numFlowsPerRead << endl;
1195                 out << "Format Code: " << header.flogramFormatCode << endl;
1196                 out << "Flow Chars: " << header.flowChars << endl;
1197                 out << "Key Sequence: " << header.keySequence << endl << endl;
1198                         
1199                 return 0;
1200         }
1201         catch(exception& e) {
1202                 m->errorOut(e, "SffInfoCommand", "printCommonHeader");
1203                 exit(1);
1204         }
1205 }
1206 //**********************************************************************************************************************
1207 int SffInfoCommand::printHeader(ofstream& out, Header& header) {
1208         try {
1209                 
1210                 out << ">" << header.name << endl;
1211                 out << "Run Prefix: " << header.timestamp << endl;
1212                 out << "Region #:  " << header.region << endl;
1213                 out << "XY Location: " << header.xy << endl << endl;
1214                 
1215                 out << "Run Name:  " << endl;
1216                 out << "Analysis Name:  " << endl;
1217                 out << "Full Path: " << endl << endl;
1218                 
1219                 out << "Read Header Len: " << header.headerLength << endl;
1220                 out << "Name Length: " << header.nameLength << endl;
1221                 out << "# of Bases: " << header.numBases << endl;
1222                 out << "Clip Qual Left: " << header.clipQualLeft << endl;
1223                 out << "Clip Qual Right: " << header.clipQualRight << endl;
1224                 out << "Clip Adap Left: " << header.clipAdapterLeft << endl;
1225                 out << "Clip Adap Right: " << header.clipAdapterRight << endl << endl;
1226                 
1227                 return 0;
1228         }
1229         catch(exception& e) {
1230                 m->errorOut(e, "SffInfoCommand", "printHeader");
1231                 exit(1);
1232         }
1233 }
1234 //**********************************************************************************************************************
1235 bool SffInfoCommand::sanityCheck(Header& header, seqRead& read) {
1236         try {
1237         bool okay = true;
1238         string message = "[WARNING]: Your sff file may be corrupted! Sequence: " + header.name + "\n";
1239         
1240         if (header.clipQualLeft > read.bases.length()) {
1241             okay = false; message += "Clip Qual Left = " + toString(header.clipQualLeft) + ", but we only read " + toString(read.bases.length()) + " bases.\n";
1242         }
1243         if (header.clipQualRight > read.bases.length()) {
1244             okay = false; message += "Clip Qual Right = " + toString(header.clipQualRight) + ", but we only read " + toString(read.bases.length()) + " bases.\n";
1245         }
1246         if (header.clipQualLeft > read.qualScores.size()) {
1247             okay = false; message += "Clip Qual Left = " + toString(header.clipQualLeft) + ", but we only read " + toString(read.qualScores.size()) + " quality scores.\n";
1248         }
1249         if (header.clipQualRight > read.qualScores.size()) {
1250             okay = false; message += "Clip Qual Right = " + toString(header.clipQualRight) + ", but we only read " + toString(read.qualScores.size()) + " quality scores.\n";
1251         }
1252         
1253         if (okay == false) {
1254             m->mothurOut(message); m->mothurOutEndLine();
1255         }
1256         
1257                 return okay;
1258         }
1259         catch(exception& e) {
1260                 m->errorOut(e, "SffInfoCommand", "sanityCheck");
1261                 exit(1);
1262         }
1263 }
1264 //**********************************************************************************************************************
1265 int SffInfoCommand::printSffTxtSeqData(ofstream& out, seqRead& read, Header& header) {
1266         try {
1267                 out << "Flowgram: ";
1268                 for (int i = 0; i < read.flowgram.size(); i++) { out << setprecision(2) << (read.flowgram[i]/(float)100) << '\t';  }
1269                 
1270                 out << endl <<  "Flow Indexes: ";
1271                 int sum = 0;
1272                 for (int i = 0; i < read.flowIndex.size(); i++) {  sum +=  read.flowIndex[i];  out << sum << '\t'; }
1273                 
1274                 //make the bases you want to clip lowercase and the bases you want to keep upper case
1275         int endValue = header.clipQualRight;
1276                 if(endValue == 0){      endValue = read.bases.length(); }
1277                 for (int i = 0; i < (header.clipQualLeft-1); i++) { read.bases[i] = tolower(read.bases[i]); }
1278                 for (int i = (header.clipQualLeft-1); i < (endValue-1); i++) {   read.bases[i] = toupper(read.bases[i]);  }
1279                 for (int i = (endValue-1); i < read.bases.length(); i++) {   read.bases[i] = tolower(read.bases[i]);  }
1280                 
1281                 out << endl <<  "Bases: " << read.bases << endl << "Quality Scores: ";
1282                 for (int i = 0; i < read.qualScores.size(); i++) {   out << read.qualScores[i] << '\t';  }
1283         
1284                 
1285                 out << endl << endl;
1286                 
1287                 return 0;
1288         }
1289         catch(exception& e) {
1290                 m->errorOut(e, "SffInfoCommand", "printSffTxtSeqData");
1291                 exit(1);
1292         }
1293 }
1294 //**********************************************************************************************************************
1295 int SffInfoCommand::printFastaSeqData(ofstream& out, seqRead& read, Header& header) {
1296         try {
1297                 string seq = read.bases;
1298                 
1299         if (trim) {
1300                         if(header.clipQualRight < header.clipQualLeft){
1301                                 if (header.clipQualRight == 0) { //don't trim right
1302                     seq = seq.substr(header.clipQualLeft-1);
1303                 }else {
1304                     seq = "NNNN";
1305                 }
1306                         }
1307                         else if((header.clipQualRight != 0) && ((header.clipQualRight-header.clipQualLeft) >= 0)){
1308                                 seq = seq.substr((header.clipQualLeft-1), (header.clipQualRight-header.clipQualLeft));
1309                         }
1310                         else {
1311                                 seq = seq.substr(header.clipQualLeft-1);
1312                         }
1313                 }else{
1314                         //if you wanted the sfftxt then you already converted the bases to the right case
1315                         if (!sfftxt) {
1316                 int endValue = header.clipQualRight;
1317                                 //make the bases you want to clip lowercase and the bases you want to keep upper case
1318                                 if(endValue == 0){      endValue = seq.length();        }
1319                                 for (int i = 0; i < (header.clipQualLeft-1); i++) { seq[i] = tolower(seq[i]);  }
1320                                 for (int i = (header.clipQualLeft-1); i < (endValue-1); i++)  {   seq[i] = toupper(seq[i]);  }
1321                                 for (int i = (endValue-1); i < seq.length(); i++) {   seq[i] = tolower(seq[i]);  }
1322                         }
1323                 }
1324                 
1325                 out << ">" << header.name  << " xy=" << header.xy << endl;
1326                 out << seq << endl;
1327                 
1328                 return 0;
1329         }
1330         catch(exception& e) {
1331                 m->errorOut(e, "SffInfoCommand", "printFastaSeqData");
1332                 exit(1);
1333         }
1334 }
1335
1336 //**********************************************************************************************************************
1337 int SffInfoCommand::printQualSeqData(ofstream& out, seqRead& read, Header& header) {
1338         try {
1339                 
1340                 if (trim) {
1341                         if(header.clipQualRight < header.clipQualLeft){
1342                 if (header.clipQualRight == 0) { //don't trim right
1343                     out << ">" << header.name << " xy=" << header.xy << " length=" << (read.qualScores.size()-header.clipQualLeft) << endl;
1344                     for (int i = (header.clipQualLeft-1); i < read.qualScores.size(); i++) {   out << read.qualScores[i] << '\t';       }       
1345                 }else {
1346                     out << ">" << header.name << " xy=" << header.xy << endl;
1347                     out << "0\t0\t0\t0";
1348                 }
1349                         }
1350                         else if((header.clipQualRight != 0) && ((header.clipQualRight-header.clipQualLeft) >= 0)){
1351                                 out << ">" << header.name << " xy=" << header.xy << " length=" << (header.clipQualRight-header.clipQualLeft) << endl;
1352                                 for (int i = (header.clipQualLeft-1); i < (header.clipQualRight-1); i++) {   out << read.qualScores[i] << '\t'; }
1353                         }
1354                         else{
1355                                 out << ">" << header.name << " xy=" << header.xy << " length=" << (header.clipQualRight-header.clipQualLeft) << endl;
1356                                 for (int i = (header.clipQualLeft-1); i < read.qualScores.size(); i++) {   out << read.qualScores[i] << '\t';   }                       
1357                         }
1358                 }else{
1359                         out << ">" << header.name << " xy=" << header.xy << " length=" << read.qualScores.size() << endl;
1360                         for (int i = 0; i < read.qualScores.size(); i++) {   out << read.qualScores[i] << '\t';  }
1361                 }
1362                 
1363                 out << endl;
1364                 
1365                 return 0;
1366         }
1367         catch(exception& e) {
1368                 m->errorOut(e, "SffInfoCommand", "printQualSeqData");
1369                 exit(1);
1370         }
1371 }
1372
1373 //**********************************************************************************************************************
1374 int SffInfoCommand::printFlowSeqData(ofstream& out, seqRead& read, Header& header) {
1375         try {
1376         
1377         int endValue = header.clipQualRight;
1378         if (header.clipQualRight == 0) {
1379             endValue = read.flowIndex.size();
1380             if (m->debug) { m->mothurOut("[DEBUG]: " + header.name + " has clipQualRight=0.\n"); }
1381         }
1382         if(endValue > header.clipQualLeft){
1383             
1384             int rightIndex = 0;
1385             for (int i = 0; i < endValue; i++) {  rightIndex +=  read.flowIndex[i];      }
1386             
1387             out << header.name << ' ' << rightIndex;
1388             for (int i = 0; i < read.flowgram.size(); i++) { out << setprecision(2) << ' ' << (read.flowgram[i]/(float)100);  }
1389             out << endl;
1390         }
1391                 
1392                 
1393                 return 0;
1394         }
1395         catch(exception& e) {
1396                 m->errorOut(e, "SffInfoCommand", "printFlowSeqData");
1397                 exit(1);
1398         }
1399 }
1400 //**********************************************************************************************************************
1401 int SffInfoCommand::readAccnosFile(string filename) {
1402         try {
1403                 //remove old names
1404                 seqNames.clear();
1405                 
1406                 ifstream in;
1407                 m->openInputFile(filename, in);
1408                 string name;
1409                 
1410                 while(!in.eof()){
1411                         in >> name; m->gobble(in);
1412                                                 
1413                         seqNames.insert(name);
1414                         
1415                         if (m->control_pressed) { seqNames.clear(); break; }
1416                 }
1417                 in.close();             
1418                 
1419                 return 0;
1420         }
1421         catch(exception& e) {
1422                 m->errorOut(e, "SffInfoCommand", "readAccnosFile");
1423                 exit(1);
1424         }
1425 }
1426 //**********************************************************************************************************************
1427 int SffInfoCommand::parseSffTxt() {
1428         try {
1429                 
1430                 ifstream inSFF;
1431                 m->openInputFile(sfftxtFilename, inSFF);
1432                 
1433                 if (outputDir == "") {  outputDir += m->hasPath(sfftxtFilename); }
1434                 
1435                 //output file names
1436                 ofstream outFasta, outQual, outFlow;
1437                 string outFastaFileName, outQualFileName;
1438                 string fileRoot = m->getRootName(m->getSimpleName(sfftxtFilename));
1439                 if (fileRoot.length() > 0) {
1440                         //rip off last .
1441                         fileRoot = fileRoot.substr(0, fileRoot.length()-1);
1442                         fileRoot = m->getRootName(fileRoot);
1443                 }
1444                 
1445         map<string, string> variables; 
1446                 variables["[filename]"] = fileRoot;
1447                 string sfftxtFileName = getOutputFileName("sfftxt",variables);
1448                 string outFlowFileName = getOutputFileName("flow",variables);
1449                 if (!trim) { variables["[tag]"] = "raw"; }
1450                 outFastaFileName = getOutputFileName("fasta",variables);
1451         outQualFileName = getOutputFileName("qfile",variables);
1452                 
1453                 if (fasta)      { m->openOutputFile(outFastaFileName, outFasta);        outputNames.push_back(outFastaFileName); outputTypes["fasta"].push_back(outFastaFileName); }
1454                 if (qual)       { m->openOutputFile(outQualFileName, outQual);          outputNames.push_back(outQualFileName); outputTypes["qfile"].push_back(outQualFileName);  }
1455                 if (flow)       { m->openOutputFile(outFlowFileName, outFlow);          outputNames.push_back(outFlowFileName);  outFlow.setf(ios::fixed, ios::floatfield); outFlow.setf(ios::showpoint); outputTypes["flow"].push_back(outFlowFileName);  }
1456                 
1457                 //read common header
1458                 string commonHeader = m->getline(inSFF);
1459                 string magicNumber = m->getline(inSFF); 
1460                 string version = m->getline(inSFF);
1461                 string indexOffset = m->getline(inSFF);
1462                 string indexLength = m->getline(inSFF);
1463                 int numReads = parseHeaderLineToInt(inSFF);
1464                 string headerLength = m->getline(inSFF);
1465                 string keyLength = m->getline(inSFF);
1466                 int numFlows = parseHeaderLineToInt(inSFF);
1467                 string flowgramCode = m->getline(inSFF);
1468                 string flowChars = m->getline(inSFF);
1469                 string keySequence = m->getline(inSFF);
1470                 m->gobble(inSFF);
1471                 
1472                 string seqName;
1473                 
1474                 if (flow)       {       outFlow << numFlows << endl;    }
1475                 
1476                 for(int i=0;i<numReads;i++){
1477                         
1478                         //sanity check
1479                         if (inSFF.eof()) { m->mothurOut("[ERROR]: Expected " + toString(numReads) + " but reached end of file at " + toString(i+1) + "."); m->mothurOutEndLine(); break; }
1480                         
1481                         Header header;
1482                         
1483                         //parse read header
1484                         inSFF >> seqName;
1485                         seqName = seqName.substr(1);
1486                         m->gobble(inSFF);
1487                         header.name = seqName;
1488                         
1489                         string runPrefix = parseHeaderLineToString(inSFF);              header.timestamp = runPrefix;
1490                         string regionNumber = parseHeaderLineToString(inSFF);   header.region = regionNumber;
1491                         string xyLocation = parseHeaderLineToString(inSFF);             header.xy = xyLocation;
1492                         m->gobble(inSFF);
1493                                 
1494                         string runName = parseHeaderLineToString(inSFF);
1495                         string analysisName = parseHeaderLineToString(inSFF);
1496                         string fullPath = parseHeaderLineToString(inSFF);
1497                         m->gobble(inSFF);
1498                         
1499                         string readHeaderLen = parseHeaderLineToString(inSFF);  convert(readHeaderLen, header.headerLength);
1500                         string nameLength = parseHeaderLineToString(inSFF);             convert(nameLength, header.nameLength);
1501                         int numBases = parseHeaderLineToInt(inSFF);                             header.numBases = numBases;
1502                         string clipQualLeft = parseHeaderLineToString(inSFF);   convert(clipQualLeft, header.clipQualLeft);
1503                         int clipQualRight = parseHeaderLineToInt(inSFF);                header.clipQualRight = clipQualRight;
1504                         string clipAdapLeft = parseHeaderLineToString(inSFF);   convert(clipAdapLeft, header.clipAdapterLeft);
1505                         string clipAdapRight = parseHeaderLineToString(inSFF);  convert(clipAdapRight, header.clipAdapterRight);
1506                         m->gobble(inSFF);
1507                                 
1508                         seqRead read;
1509                         
1510                         //parse read
1511                         vector<unsigned short> flowVector = parseHeaderLineToFloatVector(inSFF, numFlows);      read.flowgram = flowVector;
1512                         vector<unsigned int> flowIndices = parseHeaderLineToIntVector(inSFF, numBases); 
1513                         
1514                         //adjust for print
1515                         vector<unsigned int> flowIndicesAdjusted; flowIndicesAdjusted.push_back(flowIndices[0]);
1516                         for (int j = 1; j < flowIndices.size(); j++) {   flowIndicesAdjusted.push_back(flowIndices[j] - flowIndices[j-1]);   }
1517                         read.flowIndex = flowIndicesAdjusted;
1518                         
1519                         string bases = parseHeaderLineToString(inSFF);                                                                          read.bases = bases;
1520                         vector<unsigned int> qualityScores = parseHeaderLineToIntVector(inSFF, numBases);       read.qualScores = qualityScores;
1521                         m->gobble(inSFF);
1522                                         
1523                         //if you have provided an accosfile and this seq is not in it, then dont print
1524                         bool print = true;
1525                         if (seqNames.size() != 0) {   if (seqNames.count(header.name) == 0) { print = false; }  }
1526                         
1527                         //print 
1528                         if (print) {
1529                                 if (fasta)      {       printFastaSeqData(outFasta, read, header);      }
1530                                 if (qual)       {       printQualSeqData(outQual, read, header);        }
1531                                 if (flow)       {       printFlowSeqData(outFlow, read, header);        }
1532                         }
1533                         
1534                         //report progress
1535                         if((i+1) % 10000 == 0){ m->mothurOut(toString(i+1)); m->mothurOutEndLine();             }
1536                         
1537                         if (m->control_pressed) {  break;  }
1538                 }
1539                 
1540                 //report progress
1541                 if (!m->control_pressed) {   if((numReads) % 10000 != 0){       m->mothurOut(toString(numReads)); m->mothurOutEndLine();                }  }
1542                 
1543                 inSFF.close();
1544                 
1545                 if (fasta)      {  outFasta.close();    }
1546                 if (qual)       {  outQual.close();             }
1547                 if (flow)       {  outFlow.close();             }
1548                 
1549                 return 0;
1550         }
1551         catch(exception& e) {
1552                 m->errorOut(e, "SffInfoCommand", "parseSffTxt");
1553                 exit(1);
1554         }
1555 }
1556 //**********************************************************************************************************************
1557
1558 int SffInfoCommand::parseHeaderLineToInt(ifstream& file){
1559         try {
1560                 int number;
1561                 
1562                 while (!file.eof())     {
1563                         
1564                         char c = file.get(); 
1565                         if (c == ':'){
1566                                 file >> number;
1567                                 break;
1568                         }
1569                         
1570                 }
1571                 m->gobble(file);
1572                 return number;
1573         }
1574         catch(exception& e) {
1575                 m->errorOut(e, "SffInfoCommand", "parseHeaderLineToInt");
1576                 exit(1);
1577         }
1578         
1579 }
1580
1581 //**********************************************************************************************************************
1582
1583 string SffInfoCommand::parseHeaderLineToString(ifstream& file){
1584         try {
1585                 string text;
1586                 
1587                 while (!file.eof())     {
1588                         char c = file.get(); 
1589                         
1590                         if (c == ':'){
1591                                 //m->gobble(file);
1592                                 //text = m->getline(file);      
1593                                 file >> text;
1594                                 break;
1595                         }
1596                 }
1597                 m->gobble(file);
1598                 
1599                 return text;
1600         }
1601         catch(exception& e) {
1602                 m->errorOut(e, "SffInfoCommand", "parseHeaderLineToString");
1603                 exit(1);
1604         }
1605 }
1606
1607 //**********************************************************************************************************************
1608
1609 vector<unsigned short> SffInfoCommand::parseHeaderLineToFloatVector(ifstream& file, int length){
1610         try {
1611                 vector<unsigned short> floatVector(length);
1612                 
1613                 while (!file.eof())     {
1614                         char c = file.get(); 
1615                         if (c == ':'){
1616                                 float temp;
1617                                 for(int i=0;i<length;i++){
1618                                         file >> temp;
1619                                         floatVector[i] = temp * 100;
1620                                 }
1621                                 break;
1622                         }
1623                 }
1624                 m->gobble(file);        
1625                 return floatVector;
1626         }
1627         catch(exception& e) {
1628                 m->errorOut(e, "SffInfoCommand", "parseHeaderLineToFloatVector");
1629                 exit(1);
1630         }
1631 }
1632
1633 //**********************************************************************************************************************
1634
1635 vector<unsigned int> SffInfoCommand::parseHeaderLineToIntVector(ifstream& file, int length){
1636         try {
1637                 vector<unsigned int> intVector(length);
1638                 
1639                 while (!file.eof())     {
1640                         char c = file.get(); 
1641                         if (c == ':'){
1642                                 for(int i=0;i<length;i++){
1643                                         file >> intVector[i];
1644                                 }
1645                                 break;
1646                         }
1647                 }
1648                 m->gobble(file);        
1649                 return intVector;
1650         }
1651         catch(exception& e) {
1652                 m->errorOut(e, "SffInfoCommand", "parseHeaderLineToIntVector");
1653                 exit(1);
1654         }
1655 }
1656 //***************************************************************************************************************
1657
1658 bool SffInfoCommand::readOligos(string oligoFile){
1659         try {
1660         filehandles.clear();
1661         numSplitReads.clear();
1662         filehandlesHeaders.clear();
1663         
1664                 ifstream inOligos;
1665                 m->openInputFile(oligoFile, inOligos);
1666                 
1667                 string type, oligo, group;
1668         
1669                 int indexPrimer = 0;
1670                 int indexBarcode = 0;
1671                 
1672                 while(!inOligos.eof()){
1673             
1674                         inOligos >> type; 
1675             
1676                         if(type[0] == '#'){
1677                                 while (!inOligos.eof()) {       char c = inOligos.get();  if (c == 10 || c == 13){      break;  }       } // get rest of line if there's any crap there
1678                                 m->gobble(inOligos);
1679                         }
1680                         else{
1681                                 m->gobble(inOligos);
1682                                 //make type case insensitive
1683                                 for(int i=0;i<type.length();i++){       type[i] = toupper(type[i]);  }
1684                                 
1685                                 inOligos >> oligo;
1686                                 
1687                                 for(int i=0;i<oligo.length();i++){
1688                                         oligo[i] = toupper(oligo[i]);
1689                                         if(oligo[i] == 'U')     {       oligo[i] = 'T'; }
1690                                 }
1691                                 
1692                                 if(type == "FORWARD"){
1693                                         group = "";
1694                                         
1695                                         // get rest of line in case there is a primer name
1696                                         while (!inOligos.eof()) {       
1697                                                 char c = inOligos.get(); 
1698                                                 if (c == 10 || c == 13 || c == -1){     break;  }
1699                                                 else if (c == 32 || c == 9){;} //space or tab
1700                                                 else {  group += c;  }
1701                                         } 
1702                                         
1703                                         //check for repeat barcodes
1704                                         map<string, int>::iterator itPrime = primers.find(oligo);
1705                                         if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
1706                                         
1707                                         primers[oligo]=indexPrimer; indexPrimer++;              
1708                                         primerNameVector.push_back(group);
1709                                 }else if(type == "REVERSE"){
1710                                         //Sequence oligoRC("reverse", oligo);
1711                                         //oligoRC.reverseComplement();
1712                     string oligoRC = reverseOligo(oligo);
1713                                         revPrimer.push_back(oligoRC);
1714                                 }
1715                                 else if(type == "BARCODE"){
1716                                         inOligos >> group;
1717                                         
1718                                         //check for repeat barcodes
1719                                         map<string, int>::iterator itBar = barcodes.find(oligo);
1720                                         if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
1721                     
1722                                         barcodes[oligo]=indexBarcode; indexBarcode++;
1723                                         barcodeNameVector.push_back(group);
1724                                 }else if(type == "LINKER"){
1725                                         linker.push_back(oligo);
1726                                 }else if(type == "SPACER"){
1727                                         spacer.push_back(oligo);
1728                                 }
1729                                 else{   m->mothurOut("[WARNING]: " + type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); }
1730                         }
1731                         m->gobble(inOligos);
1732                 }       
1733                 inOligos.close();
1734                 
1735                 if(barcodeNameVector.size() == 0 && primerNameVector[0] == ""){ split = 1;      }
1736                 
1737                 //add in potential combos
1738                 if(barcodeNameVector.size() == 0){
1739                         barcodes[""] = 0;
1740                         barcodeNameVector.push_back("");                        
1741                 }
1742                 
1743                 if(primerNameVector.size() == 0){
1744                         primers[""] = 0;
1745                         primerNameVector.push_back("");                 
1746                 }
1747                 
1748                 filehandles.resize(barcodeNameVector.size());
1749                 for(int i=0;i<filehandles.size();i++){
1750                         filehandles[i].assign(primerNameVector.size(), "");
1751                 }
1752                         
1753                 if(split > 1){
1754                         set<string> uniqueNames; //used to cleanup outputFileNames
1755                         for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
1756                                 for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
1757                                         
1758                                         string primerName = primerNameVector[itPrimer->second];
1759                                         string barcodeName = barcodeNameVector[itBar->second];
1760                                         
1761                                         string comboGroupName = "";
1762                                         string fastaFileName = "";
1763                                         string qualFileName = "";
1764                                         string nameFileName = "";
1765                                         
1766                                         if(primerName == ""){
1767                                                 comboGroupName = barcodeNameVector[itBar->second];
1768                                         }
1769                                         else{
1770                                                 if(barcodeName == ""){
1771                                                         comboGroupName = primerNameVector[itPrimer->second];
1772                                                 }
1773                                                 else{
1774                                                         comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second];
1775                                                 }
1776                                         }
1777                                         
1778                                         ofstream temp;
1779                     map<string, string> variables; 
1780                     variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(currentFileName));
1781                     variables["[group]"] = comboGroupName;
1782                                         string thisFilename = getOutputFileName("sff",variables);
1783                                         if (uniqueNames.count(thisFilename) == 0) {
1784                                                 outputNames.push_back(thisFilename);
1785                                                 outputTypes["sff"].push_back(thisFilename);
1786                                                 uniqueNames.insert(thisFilename);
1787                                         }
1788                                         
1789                                         filehandles[itBar->second][itPrimer->second] = thisFilename;
1790                                         temp.open(thisFilename.c_str(), ios::binary);           temp.close();
1791                                 }
1792                         }
1793                 }
1794                 numFPrimers = primers.size();
1795         numLinkers = linker.size();
1796         numSpacers = spacer.size();
1797         map<string, string> variables; 
1798         variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(currentFileName));
1799         variables["[group]"] = "scrap";
1800                 noMatchFile = getOutputFileName("sff",variables);
1801         m->mothurRemove(noMatchFile);
1802         
1803                 bool allBlank = true;
1804                 for (int i = 0; i < barcodeNameVector.size(); i++) {
1805                         if (barcodeNameVector[i] != "") {
1806                                 allBlank = false;
1807                                 break;
1808                         }
1809                 }
1810                 for (int i = 0; i < primerNameVector.size(); i++) {
1811                         if (primerNameVector[i] != "") {
1812                                 allBlank = false;
1813                                 break;
1814                         }
1815                 }
1816                 
1817         filehandlesHeaders.resize(filehandles.size());
1818         numSplitReads.resize(filehandles.size());
1819         for (int i = 0; i < filehandles.size(); i++) { 
1820             numSplitReads[i].resize(filehandles[i].size(), 0); 
1821             for (int j = 0; j < filehandles[i].size(); j++) {
1822                 filehandlesHeaders[i].push_back(filehandles[i][j]+"headers");
1823             }
1824         }
1825                              
1826                 if (allBlank) {
1827                         m->mothurOut("[WARNING]: your oligos file does not contain any group names.  mothur will not create a split the sff file."); m->mothurOutEndLine();
1828                         split = 1;
1829                         return false;
1830                 }
1831                 
1832                 return true;
1833                 
1834         }
1835         catch(exception& e) {
1836                 m->errorOut(e, "SffInfoCommand", "readOligos");
1837                 exit(1);
1838         }
1839 }
1840 //********************************************************************/
1841 string SffInfoCommand::reverseOligo(string oligo){
1842         try {
1843         string reverse = "";
1844         
1845         for(int i=oligo.length()-1;i>=0;i--){
1846             
1847             if(oligo[i] == 'A')         {       reverse += 'T'; }
1848             else if(oligo[i] == 'T'){   reverse += 'A'; }
1849             else if(oligo[i] == 'U'){   reverse += 'A'; }
1850             
1851             else if(oligo[i] == 'G'){   reverse += 'C'; }
1852             else if(oligo[i] == 'C'){   reverse += 'G'; }
1853             
1854             else if(oligo[i] == 'R'){   reverse += 'Y'; }
1855             else if(oligo[i] == 'Y'){   reverse += 'R'; }
1856             
1857             else if(oligo[i] == 'M'){   reverse += 'K'; }
1858             else if(oligo[i] == 'K'){   reverse += 'M'; }
1859             
1860             else if(oligo[i] == 'W'){   reverse += 'W'; }
1861             else if(oligo[i] == 'S'){   reverse += 'S'; }
1862             
1863             else if(oligo[i] == 'B'){   reverse += 'V'; }
1864             else if(oligo[i] == 'V'){   reverse += 'B'; }
1865             
1866             else if(oligo[i] == 'D'){   reverse += 'H'; }
1867             else if(oligo[i] == 'H'){   reverse += 'D'; }
1868             
1869             else                                                {       reverse += 'N'; }
1870         }
1871         
1872         
1873         return reverse;
1874     }
1875         catch(exception& e) {
1876                 m->errorOut(e, "SffInfoCommand", "reverseOligo");
1877                 exit(1);
1878         }
1879 }
1880
1881 //**********************************************************************************************************************
1882
1883
1884                                 
1885