]> git.donarmstrong.com Git - mothur.git/blob - sffinfocommand.cpp
fixed bug in phylo.diversity rooting. added filename patterns and create filename...
[mothur.git] / sffinfocommand.cpp
1 /*
2  *  sffinfocommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 7/7/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "sffinfocommand.h"
11 #include "endiannessmacros.h"
12 #include "trimoligos.h"
13 #include "sequence.hpp"
14 #include "qualityscores.h"
15
16 //**********************************************************************************************************************
17 vector<string> SffInfoCommand::setParameters(){ 
18         try {           
19                 CommandParameter psff("sff", "InputTypes", "", "", "none", "none", "none","",false,false,true); parameters.push_back(psff);
20         CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(poligos);
21                 CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(paccnos);
22                 CommandParameter psfftxt("sfftxt", "String", "", "", "", "", "","",false,false); parameters.push_back(psfftxt);
23                 CommandParameter pflow("flow", "Boolean", "", "T", "", "", "","flow",false,false); parameters.push_back(pflow);
24                 CommandParameter ptrim("trim", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(ptrim);
25                 CommandParameter pfasta("fasta", "Boolean", "", "T", "", "", "","fasta",false,false); parameters.push_back(pfasta);
26                 CommandParameter pqfile("qfile", "Boolean", "", "T", "", "", "","qfile",false,false); parameters.push_back(pqfile);
27         CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ppdiffs);
28                 CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pbdiffs);
29         CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pldiffs);
30                 CommandParameter psdiffs("sdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(psdiffs);
31         CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ptdiffs);
32                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
33                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
34                 
35                 vector<string> myArray;
36                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
37                 return myArray;
38         }
39         catch(exception& e) {
40                 m->errorOut(e, "SffInfoCommand", "setParameters");
41                 exit(1);
42         }
43 }
44 //**********************************************************************************************************************
45 string SffInfoCommand::getHelpString(){ 
46         try {
47                 string helpString = "";
48                 helpString += "The sffinfo command reads a sff file and extracts the sequence data, or you can use it to parse a sfftxt file.\n";
49                 helpString += "The sffinfo command parameters are sff, fasta, qfile, accnos, flow, sfftxt, oligos, bdiffs, tdiffs, ldiffs, sdiffs, pdiffs and trim. sff is required. \n";
50                 helpString += "The sff parameter allows you to enter the sff file you would like to extract data from.  You may enter multiple files by separating them by -'s.\n";
51                 helpString += "The fasta parameter allows you to indicate if you would like a fasta formatted file generated.  Default=True. \n";
52                 helpString += "The qfile parameter allows you to indicate if you would like a quality file generated.  Default=True. \n";
53         helpString += "The oligos parameter allows you to provide an oligos file to split your sff file into separate sff files by barcode. \n";
54         helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n";
55                 helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";
56                 helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
57         helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n";
58                 helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n";
59                 helpString += "The flow parameter allows you to indicate if you would like a flowgram file generated.  Default=True. \n";
60                 helpString += "The sfftxt parameter allows you to indicate if you would like a sff.txt file generated.  Default=False. \n";
61                 helpString += "If you want to parse an existing sfftxt file into flow, fasta and quality file, enter the file name using the sfftxt parameter. \n";
62                 helpString += "The trim parameter allows you to indicate if you would like a sequences and quality scores trimmed to the clipQualLeft and clipQualRight values.  Default=True. \n";
63                 helpString += "The accnos parameter allows you to provide a accnos file containing the names of the sequences you would like extracted. You may enter multiple files by separating them by -'s. \n";
64                 helpString += "Example sffinfo(sff=mySffFile.sff, trim=F).\n";
65                 helpString += "Note: No spaces between parameter labels (i.e. sff), '=' and parameters (i.e.yourSffFileName).\n";
66                 return helpString;
67         }
68         catch(exception& e) {
69                 m->errorOut(e, "SffInfoCommand", "getHelpString");
70                 exit(1);
71         }
72 }
73
74 //**********************************************************************************************************************
75 string SffInfoCommand::getOutputPattern(string type) {
76     try {
77         string pattern = "";
78         
79         if (type == "fasta")            {   pattern =  "[filename],fasta-[filename],[tag],fasta";   }
80         else if (type == "flow")    {   pattern =  "[filename],flow";   }
81         else if (type == "sfftxt")        {   pattern =  "[filename],sff.txt";   }
82         else if (type == "sff")        {   pattern =  "[filename],[group],sff";   }
83         else if (type == "qfile")       {   pattern =  "[filename],qual-[filename],[tag],qual";   }
84         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
85         
86         return pattern;
87     }
88     catch(exception& e) {
89         m->errorOut(e, "SffInfoCommand", "getOutputPattern");
90         exit(1);
91     }
92 }
93 //**********************************************************************************************************************
94 SffInfoCommand::SffInfoCommand(){       
95         try {
96                 abort = true; calledHelp = true; 
97                 setParameters();
98                 vector<string> tempOutNames;
99                 outputTypes["fasta"] = tempOutNames;
100                 outputTypes["flow"] = tempOutNames;
101                 outputTypes["sfftxt"] = tempOutNames;
102                 outputTypes["qfile"] = tempOutNames;
103         outputTypes["sff"] = tempOutNames;
104         }
105         catch(exception& e) {
106                 m->errorOut(e, "SffInfoCommand", "SffInfoCommand");
107                 exit(1);
108         }
109 }
110 //**********************************************************************************************************************
111
112 SffInfoCommand::SffInfoCommand(string option)  {
113         try {
114                 abort = false; calledHelp = false;   
115                 hasAccnos = false; hasOligos = false;
116         split = 1;
117                 
118                 //allow user to run help
119                 if(option == "help") { help(); abort = true; calledHelp = true; }
120                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
121                 
122                 else {
123                         //valid paramters for this command
124                         vector<string> myArray = setParameters();
125                         
126                         OptionParser parser(option);
127                         map<string, string> parameters = parser.getParameters();
128                         
129                         ValidParameters validParameter;
130                         //check to make sure all parameters are valid for command
131                         for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
132                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
133                         }
134                         
135                         //initialize outputTypes
136                         vector<string> tempOutNames;
137                         outputTypes["fasta"] = tempOutNames;
138                         outputTypes["flow"] = tempOutNames;
139                         outputTypes["sfftxt"] = tempOutNames;
140                         outputTypes["qfile"] = tempOutNames;
141             outputTypes["sff"] = tempOutNames;
142                         
143                         //if the user changes the output directory command factory will send this info to us in the output parameter 
144                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
145                         
146                         //if the user changes the input directory command factory will send this info to us in the output parameter 
147                         string inputDir = validParameter.validFile(parameters, "inputdir", false);        if (inputDir == "not found"){ inputDir = "";          }
148
149                         sffFilename = validParameter.validFile(parameters, "sff", false);
150                         if (sffFilename == "not found") { sffFilename = "";  }
151                         else { 
152                                 m->splitAtDash(sffFilename, filenames);
153                                 
154                                 //go through files and make sure they are good, if not, then disregard them
155                                 for (int i = 0; i < filenames.size(); i++) {
156                                         bool ignore = false;
157                                         if (filenames[i] == "current") { 
158                                                 filenames[i] = m->getSFFFile(); 
159                                                 if (filenames[i] != "") {  m->mothurOut("Using " + filenames[i] + " as input file for the sff parameter where you had given current."); m->mothurOutEndLine(); }
160                                                 else {  
161                                                         m->mothurOut("You have no current sfffile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
162                                                         //erase from file list
163                                                         filenames.erase(filenames.begin()+i);
164                                                         i--;
165                                                 }
166                                         }
167                                         
168                                         if (!ignore) {
169                                                 if (inputDir != "") {
170                                                         string path = m->hasPath(filenames[i]);
171                                                         //if the user has not given a path then, add inputdir. else leave path alone.
172                                                         if (path == "") {       filenames[i] = inputDir + filenames[i];         }
173                                                 }
174                 
175                                                 ifstream in;
176                                                 int ableToOpen = m->openInputFile(filenames[i], in, "noerror");
177                                         
178                                                 //if you can't open it, try default location
179                                                 if (ableToOpen == 1) {
180                                                         if (m->getDefaultPath() != "") { //default path is set
181                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(filenames[i]);
182                                                                 m->mothurOut("Unable to open " + filenames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
183                                                                 ifstream in2;
184                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
185                                                                 in2.close();
186                                                                 filenames[i] = tryPath;
187                                                         }
188                                                 }
189                                                 
190                                                 //if you can't open it, try default location
191                                                 if (ableToOpen == 1) {
192                                                         if (m->getOutputDir() != "") { //default path is set
193                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(filenames[i]);
194                                                                 m->mothurOut("Unable to open " + filenames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
195                                                                 ifstream in2;
196                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
197                                                                 in2.close();
198                                                                 filenames[i] = tryPath;
199                                                         }
200                                                 }
201                                                 
202                                                 in.close();
203                                                 
204                                                 if (ableToOpen == 1) { 
205                                                         m->mothurOut("Unable to open " + filenames[i] + ". It will be disregarded."); m->mothurOutEndLine();
206                                                         //erase from file list
207                                                         filenames.erase(filenames.begin()+i);
208                                                         i--;
209                                                 }else { m->setSFFFile(filenames[i]); }
210                                         }
211                                 }
212                                 
213                                 //make sure there is at least one valid file left
214                                 if (filenames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; }
215                         }
216                         
217                         accnosName = validParameter.validFile(parameters, "accnos", false);
218                         if (accnosName == "not found") { accnosName = "";  }
219                         else { 
220                                 hasAccnos = true;
221                                 m->splitAtDash(accnosName, accnosFileNames);
222                                 
223                                 //go through files and make sure they are good, if not, then disregard them
224                                 for (int i = 0; i < accnosFileNames.size(); i++) {
225                                         bool ignore = false;
226                                         if (accnosFileNames[i] == "current") { 
227                                                 accnosFileNames[i] = m->getAccnosFile(); 
228                                                 if (accnosFileNames[i] != "") {  m->mothurOut("Using " + accnosFileNames[i] + " as input file for the accnos parameter where you had given current."); m->mothurOutEndLine(); }
229                                                 else {  
230                                                         m->mothurOut("You have no current accnosfile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
231                                                         //erase from file list
232                                                         accnosFileNames.erase(accnosFileNames.begin()+i);
233                                                         i--;
234                                                 }
235                                         }
236                                         
237                                         if (!ignore) {
238                                         
239                                                 if (inputDir != "") {
240                                                         string path = m->hasPath(accnosFileNames[i]);
241                                                         //if the user has not given a path then, add inputdir. else leave path alone.
242                                                         if (path == "") {       accnosFileNames[i] = inputDir + accnosFileNames[i];             }
243                                                 }
244                 
245                                                 ifstream in;
246                                                 int ableToOpen = m->openInputFile(accnosFileNames[i], in, "noerror");
247                                         
248                                                 //if you can't open it, try default location
249                                                 if (ableToOpen == 1) {
250                                                         if (m->getDefaultPath() != "") { //default path is set
251                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(accnosFileNames[i]);
252                                                                 m->mothurOut("Unable to open " + accnosFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
253                                                                 ifstream in2;
254                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
255                                                                 in2.close();
256                                                                 accnosFileNames[i] = tryPath;
257                                                         }
258                                                 }
259                                                 //if you can't open it, try default location
260                                                 if (ableToOpen == 1) {
261                                                         if (m->getOutputDir() != "") { //default path is set
262                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(accnosFileNames[i]);
263                                                                 m->mothurOut("Unable to open " + accnosFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
264                                                                 ifstream in2;
265                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
266                                                                 in2.close();
267                                                                 accnosFileNames[i] = tryPath;
268                                                         }
269                                                 }
270                                                 in.close();
271                                                 
272                                                 if (ableToOpen == 1) { 
273                                                         m->mothurOut("Unable to open " + accnosFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();
274                                                         //erase from file list
275                                                         accnosFileNames.erase(accnosFileNames.begin()+i);
276                                                         i--;
277                                                 }
278                                         }
279                                 }
280                                 
281                                 //make sure there is at least one valid file left
282                                 if (accnosFileNames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; }
283                         }
284             
285             oligosfile = validParameter.validFile(parameters, "oligos", false);
286                         if (oligosfile == "not found") { oligosfile = "";  }
287                         else { 
288                                 hasOligos = true;
289                                 m->splitAtDash(oligosfile, oligosFileNames);
290                                 
291                                 //go through files and make sure they are good, if not, then disregard them
292                                 for (int i = 0; i < oligosFileNames.size(); i++) {
293                                         bool ignore = false;
294                                         if (oligosFileNames[i] == "current") { 
295                                                 oligosFileNames[i] = m->getOligosFile(); 
296                                                 if (oligosFileNames[i] != "") {  m->mothurOut("Using " + oligosFileNames[i] + " as input file for the accnos parameter where you had given current."); m->mothurOutEndLine(); }
297                                                 else {  
298                                                         m->mothurOut("You have no current oligosfile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
299                                                         //erase from file list
300                                                         oligosFileNames.erase(oligosFileNames.begin()+i);
301                                                         i--;
302                                                 }
303                                         }
304                                         
305                                         if (!ignore) {
306                         
307                                                 if (inputDir != "") {
308                                                         string path = m->hasPath(oligosFileNames[i]);
309                                                         //if the user has not given a path then, add inputdir. else leave path alone.
310                                                         if (path == "") {       oligosFileNames[i] = inputDir + oligosFileNames[i];             }
311                                                 }
312                         
313                                                 ifstream in;
314                                                 int ableToOpen = m->openInputFile(oligosFileNames[i], in, "noerror");
315                         
316                                                 //if you can't open it, try default location
317                                                 if (ableToOpen == 1) {
318                                                         if (m->getDefaultPath() != "") { //default path is set
319                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(oligosFileNames[i]);
320                                                                 m->mothurOut("Unable to open " + oligosFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
321                                                                 ifstream in2;
322                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
323                                                                 in2.close();
324                                                                 oligosFileNames[i] = tryPath;
325                                                         }
326                                                 }
327                                                 //if you can't open it, try default location
328                                                 if (ableToOpen == 1) {
329                                                         if (m->getOutputDir() != "") { //default path is set
330                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(oligosFileNames[i]);
331                                                                 m->mothurOut("Unable to open " + oligosFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
332                                                                 ifstream in2;
333                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
334                                                                 in2.close();
335                                                                 oligosFileNames[i] = tryPath;
336                                                         }
337                                                 }
338                                                 in.close();
339                                                 
340                                                 if (ableToOpen == 1) { 
341                                                         m->mothurOut("Unable to open " + oligosFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();
342                                                         //erase from file list
343                                                         oligosFileNames.erase(oligosFileNames.begin()+i);
344                                                         i--;
345                                                 }
346                                         }
347                                 }
348                                 
349                                 //make sure there is at least one valid file left
350                                 if (oligosFileNames.size() == 0) { m->mothurOut("no valid oligos files."); m->mothurOutEndLine(); abort = true; }
351                         }
352
353                         if (hasOligos) {
354                 split = 2;
355                                 if (oligosFileNames.size() != filenames.size()) { abort = true; m->mothurOut("If you provide a oligos file, you must have one for each sff file."); m->mothurOutEndLine(); }
356                         }
357             
358                         if (hasAccnos) {
359                                 if (accnosFileNames.size() != filenames.size()) { abort = true; m->mothurOut("If you provide a accnos file, you must have one for each sff file."); m->mothurOutEndLine(); }
360                         }
361                         
362                         string temp = validParameter.validFile(parameters, "qfile", false);                     if (temp == "not found"){       temp = "T";                             }
363                         qual = m->isTrue(temp); 
364                         
365                         temp = validParameter.validFile(parameters, "fasta", false);                            if (temp == "not found"){       temp = "T";                             }
366                         fasta = m->isTrue(temp); 
367                         
368                         temp = validParameter.validFile(parameters, "flow", false);                                     if (temp == "not found"){       temp = "T";                             }
369                         flow = m->isTrue(temp); 
370                         
371                         temp = validParameter.validFile(parameters, "trim", false);                                     if (temp == "not found"){       temp = "T";                             }
372                         trim = m->isTrue(temp); 
373             
374             temp = validParameter.validFile(parameters, "bdiffs", false);               if (temp == "not found") { temp = "0"; }
375                         m->mothurConvert(temp, bdiffs);
376                         
377                         temp = validParameter.validFile(parameters, "pdiffs", false);           if (temp == "not found") { temp = "0"; }
378                         m->mothurConvert(temp, pdiffs);
379             
380             temp = validParameter.validFile(parameters, "ldiffs", false);               if (temp == "not found") { temp = "0"; }
381                         m->mothurConvert(temp, ldiffs);
382             
383             temp = validParameter.validFile(parameters, "sdiffs", false);               if (temp == "not found") { temp = "0"; }
384                         m->mothurConvert(temp, sdiffs);
385                         
386                         temp = validParameter.validFile(parameters, "tdiffs", false);           if (temp == "not found") { int tempTotal = pdiffs + bdiffs + ldiffs + sdiffs;  temp = toString(tempTotal); }
387                         m->mothurConvert(temp, tdiffs);
388                         
389                         if(tdiffs == 0){        tdiffs = bdiffs + pdiffs + ldiffs + sdiffs;     }
390             
391                         temp = validParameter.validFile(parameters, "sfftxt", false);                           
392                         if (temp == "not found")        {       temp = "F";      sfftxt = false; sfftxtFilename = "";           }
393                         else if (m->isTrue(temp))       {       sfftxt = true;          sfftxtFilename = "";                            }
394                         else {
395                                 //you are a filename
396                                 if (inputDir != "") {
397                                         map<string,string>::iterator it = parameters.find("sfftxt");
398                                         //user has given a template file
399                                         if(it != parameters.end()){ 
400                                                 string path = m->hasPath(it->second);
401                                                 //if the user has not given a path then, add inputdir. else leave path alone.
402                                                 if (path == "") {       parameters["sfftxt"] = inputDir + it->second;           }
403                                         }
404                                 }
405                                 
406                                 sfftxtFilename = validParameter.validFile(parameters, "sfftxt", true);
407                                 if (sfftxtFilename == "not found") { sfftxtFilename = "";  }
408                                 else if (sfftxtFilename == "not open") { sfftxtFilename = "";  }
409                         }
410                         
411                         if ((sfftxtFilename == "") && (filenames.size() == 0)) {  
412                                 //if there is a current sff file, use it
413                                 string filename = m->getSFFFile(); 
414                                 if (filename != "") { filenames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the sff parameter."); m->mothurOutEndLine(); }
415                                 else {  m->mothurOut("[ERROR]: you must provide a valid sff or sfftxt file."); m->mothurOutEndLine(); abort=true;  }
416                         }
417             
418             
419                 }
420         }
421         catch(exception& e) {
422                 m->errorOut(e, "SffInfoCommand", "SffInfoCommand");
423                 exit(1);
424         }
425 }
426 //**********************************************************************************************************************
427 int SffInfoCommand::execute(){
428         try {
429                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
430                 
431                 for (int s = 0; s < filenames.size(); s++) {
432                         
433                         if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]);        } return 0; }
434                         
435                         int start = time(NULL);
436                         
437             filenames[s] = m->getFullPathName(filenames[s]);
438                         m->mothurOut("Extracting info from " + filenames[s] + " ..." ); m->mothurOutEndLine();
439                         
440                         string accnos = "";
441                         if (hasAccnos) { accnos = accnosFileNames[s]; }
442             
443             string oligos = "";
444             if (hasOligos) { oligos = oligosFileNames[s]; }
445                         
446                         int numReads = extractSffInfo(filenames[s], accnos, oligos);
447
448                         m->mothurOut("It took " + toString(time(NULL) - start) + " secs to extract " + toString(numReads) + ".");
449                 }
450                 
451                 if (sfftxtFilename != "") {  parseSffTxt(); }
452                 
453                 if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]);        } return 0; }
454                 
455                 //set fasta file as new current fastafile
456                 string current = "";
457                 itTypes = outputTypes.find("fasta");
458                 if (itTypes != outputTypes.end()) {
459                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
460                 }
461                 
462                 itTypes = outputTypes.find("qfile");
463                 if (itTypes != outputTypes.end()) {
464                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); }
465                 }
466                 
467                 itTypes = outputTypes.find("flow");
468                 if (itTypes != outputTypes.end()) {
469                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFlowFile(current); }
470                 }
471                 
472                 //report output filenames
473                 m->mothurOutEndLine();
474                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
475                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
476                 m->mothurOutEndLine();
477
478                 return 0;
479         }
480         catch(exception& e) {
481                 m->errorOut(e, "SffInfoCommand", "execute");
482                 exit(1);
483         }
484 }
485 //**********************************************************************************************************************
486 int SffInfoCommand::extractSffInfo(string input, string accnos, string oligos){
487         try {
488                 currentFileName = input;
489                 if (outputDir == "") {  outputDir += m->hasPath(input); }
490                 
491                 if (accnos != "")       {  readAccnosFile(accnos);  }
492                 else                            {       seqNames.clear();               }
493          
494         if (oligos != "")   {   readOligos(oligos);  split = 2;   }
495
496                 ofstream outSfftxt, outFasta, outQual, outFlow;
497                 string outFastaFileName, outQualFileName;
498         string rootName = outputDir + m->getRootName(m->getSimpleName(input));
499         if(rootName.find_last_of(".") == rootName.npos){ rootName += "."; }
500         
501         map<string, string> variables; 
502                 variables["[filename]"] = rootName;
503                 string sfftxtFileName = getOutputFileName("sfftxt",variables);
504                 string outFlowFileName = getOutputFileName("flow",variables);
505                 if (!trim) { variables["[tag]"] = "raw"; }
506                 outFastaFileName = getOutputFileName("fasta",variables);
507         outQualFileName = getOutputFileName("qfile",variables);
508         
509                 if (sfftxt) { m->openOutputFile(sfftxtFileName, outSfftxt); outSfftxt.setf(ios::fixed, ios::floatfield); outSfftxt.setf(ios::showpoint);  outputNames.push_back(sfftxtFileName);  outputTypes["sfftxt"].push_back(sfftxtFileName); }
510                 if (fasta)      { m->openOutputFile(outFastaFileName, outFasta);        outputNames.push_back(outFastaFileName); outputTypes["fasta"].push_back(outFastaFileName); }
511                 if (qual)       { m->openOutputFile(outQualFileName, outQual);          outputNames.push_back(outQualFileName); outputTypes["qfile"].push_back(outQualFileName);  }
512                 if (flow)       { m->openOutputFile(outFlowFileName, outFlow);          outputNames.push_back(outFlowFileName);  outFlow.setf(ios::fixed, ios::floatfield); outFlow.setf(ios::showpoint); outputTypes["flow"].push_back(outFlowFileName);  }
513                 
514                 ifstream in;
515                 in.open(input.c_str(), ios::binary);
516                 
517                 CommonHeader header; 
518                 readCommonHeader(in, header);
519         
520                 int count = 0;
521                 mycount = 0;
522                 
523                 //check magic number and version
524                 if (header.magicNumber != 779314790) { m->mothurOut("Magic Number is not correct, not a valid .sff file"); m->mothurOutEndLine(); return count; }
525                 if (header.version != "0001") { m->mothurOut("Version is not supported, only support version 0001."); m->mothurOutEndLine(); return count; }
526         
527                 //print common header
528                 if (sfftxt) {   printCommonHeader(outSfftxt, header);           }
529                 if (flow)       {       outFlow << header.numFlowsPerRead << endl;      }
530                         
531                 //read through the sff file
532                 while (!in.eof()) {
533                         
534                         bool print = true;
535                                                 
536                         //read data
537                         seqRead read;  Header readheader;
538                         readSeqData(in, read, header.numFlowsPerRead, readheader);
539             bool okay = sanityCheck(readheader, read);
540             if (!okay) { break; }
541             
542                         //if you have provided an accosfile and this seq is not in it, then dont print
543                         if (seqNames.size() != 0) {   if (seqNames.count(readheader.name) == 0) { print = false; }  }
544                         
545                         //print 
546                         if (print) {
547                                 if (sfftxt) { printHeader(outSfftxt, readheader); printSffTxtSeqData(outSfftxt, read, readheader); }
548                                 if (fasta)      {       printFastaSeqData(outFasta, read, readheader);  }
549                                 if (qual)       {       printQualSeqData(outQual, read, readheader);    }
550                                 if (flow)       {       printFlowSeqData(outFlow, read, readheader);    }
551                         }
552                         
553                         count++;
554                         mycount++;
555         
556                         //report progress
557                         if((count+1) % 10000 == 0){     m->mothurOut(toString(count+1)); m->mothurOutEndLine();         }
558                 
559                         if (m->control_pressed) { count = 0; break;   }
560                         
561                         if (count >= header.numReads) { break; }
562                 }
563                 
564                 //report progress
565                 if (!m->control_pressed) {   if((count) % 10000 != 0){  m->mothurOut(toString(count)); m->mothurOutEndLine();           }  }
566                 
567                 in.close();
568                 
569                 if (sfftxt) {  outSfftxt.close();       }
570                 if (fasta)      {  outFasta.close();    }
571                 if (qual)       {  outQual.close();             }
572                 if (flow)       {  outFlow.close();             }
573                 
574         if (split > 1) {
575             //create new common headers for each file with the correct number of reads
576             adjustCommonHeader(header);
577             
578                         map<string, string>::iterator it;
579                         set<string> namesToRemove;
580                         for(int i=0;i<filehandles.size();i++){
581                                 for(int j=0;j<filehandles[0].size();j++){
582                                         if (filehandles[i][j] != "") {
583                                                 if (namesToRemove.count(filehandles[i][j]) == 0) {
584                                                         if(m->isBlank(filehandles[i][j])){
585                                                                 m->mothurRemove(filehandles[i][j]);
586                                 m->mothurRemove(filehandlesHeaders[i][j]);
587                                                                 namesToRemove.insert(filehandles[i][j]);
588                             }
589                                                 }
590                                         }
591                                 }
592                         }
593             
594             //append new header to reads
595             for (int i = 0; i < filehandles.size(); i++) {
596                 for (int j = 0; j < filehandles[i].size(); j++) {
597                     m->appendFiles(filehandles[i][j], filehandlesHeaders[i][j]);
598                     m->renameFile(filehandlesHeaders[i][j], filehandles[i][j]);
599                     m->mothurRemove(filehandlesHeaders[i][j]);
600                     if (numSplitReads[i][j] == 0) { m->mothurRemove(filehandles[i][j]); }
601                 }
602             }
603                         
604                         //remove names for outputFileNames, just cleans up the output
605                         for(int i = 0; i < outputNames.size(); i++) { 
606                 if (namesToRemove.count(outputNames[i]) != 0) { 
607                     outputNames.erase(outputNames.begin()+i);
608                     i--;
609                 } 
610             }
611             
612             if(m->isBlank(noMatchFile)){  m->mothurRemove(noMatchFile); }
613             else { outputNames.push_back(noMatchFile); outputTypes["sff"].push_back(noMatchFile); }
614         }
615         
616                 return count;
617         }
618         catch(exception& e) {
619                 m->errorOut(e, "SffInfoCommand", "extractSffInfo");
620                 exit(1);
621         }
622 }
623 //**********************************************************************************************************************
624 int SffInfoCommand::readCommonHeader(ifstream& in, CommonHeader& header){
625         try {
626         
627                 if (!in.eof()) {
628
629                         //read magic number
630                         char buffer[4];
631                         in.read(buffer, 4);
632                         header.magicNumber = be_int4(*(unsigned int *)(&buffer));
633             
634                         //read version
635                         char buffer9[4];
636                         in.read(buffer9, 4);
637                         header.version = "";
638                         for (int i = 0; i < 4; i++) {  header.version += toString((int)(buffer9[i]));  }
639     
640                         //read offset
641                         char buffer2 [8];
642                         in.read(buffer2, 8);
643                         header.indexOffset =  be_int8(*(unsigned long long *)(&buffer2));
644                         
645                         //read index length
646                         char buffer3 [4];
647                         in.read(buffer3, 4);
648                         header.indexLength =  be_int4(*(unsigned int *)(&buffer3));
649                         
650                         //read num reads
651                         char buffer4 [4];
652                         in.read(buffer4, 4);
653                         header.numReads =  be_int4(*(unsigned int *)(&buffer4));
654                                 
655                         //read header length
656                         char buffer5 [2];
657                         in.read(buffer5, 2);
658                         header.headerLength =  be_int2(*(unsigned short *)(&buffer5));
659                                         
660                         //read key length
661                         char buffer6 [2];
662                         in.read(buffer6, 2);
663                         header.keyLength = be_int2(*(unsigned short *)(&buffer6));
664                         
665                         //read number of flow reads
666                         char buffer7 [2];
667                         in.read(buffer7, 2);
668                         header.numFlowsPerRead =  be_int2(*(unsigned short *)(&buffer7));
669                                 
670                         //read format code
671                         char buffer8 [1];
672                         in.read(buffer8, 1);
673                         header.flogramFormatCode = (int)(buffer8[0]);
674                         
675                         //read flow chars
676                         char* tempBuffer = new char[header.numFlowsPerRead];
677                         in.read(&(*tempBuffer), header.numFlowsPerRead); 
678                         header.flowChars = tempBuffer;
679                         if (header.flowChars.length() > header.numFlowsPerRead) { header.flowChars = header.flowChars.substr(0, header.numFlowsPerRead);  }
680                         delete[] tempBuffer;
681                         
682                         //read key
683                         char* tempBuffer2 = new char[header.keyLength];
684                         in.read(&(*tempBuffer2), header.keyLength);
685                         header.keySequence = tempBuffer2;
686                         if (header.keySequence.length() > header.keyLength) { header.keySequence = header.keySequence.substr(0, header.keyLength);  }
687                         delete[] tempBuffer2;
688                         
689                         /* Pad to 8 chars */
690                         unsigned long long spotInFile = in.tellg();
691                         unsigned long long spot = (spotInFile + 7)& ~7;  // ~ inverts
692                         in.seekg(spot);
693             
694         }else{
695                         m->mothurOut("Error reading sff common header."); m->mothurOutEndLine();
696                 }
697         
698                 return 0;
699         
700         }
701         catch(exception& e) {
702                 m->errorOut(e, "SffInfoCommand", "readCommonHeader");
703                 exit(1);
704         }
705 }
706 //**********************************************************************************************************************
707 int SffInfoCommand::adjustCommonHeader(CommonHeader header){
708         try {
709
710         char* mybuffer = new char[4];
711         ifstream in;
712         in.open(currentFileName.c_str(), ios::binary);
713         
714         //magic number
715         in.read(mybuffer,4);
716         for (int i = 0; i < filehandlesHeaders.size(); i++) {  
717             for (int j = 0; j < filehandlesHeaders[i].size(); j++) {
718                 ofstream out;
719                 m->openOutputFileAppend(filehandlesHeaders[i][j], out);
720                 out.write(mybuffer, in.gcount()); 
721                 out.close();
722             }
723         }
724         delete[] mybuffer;
725         
726         //version
727         mybuffer = new char[4];
728         in.read(mybuffer,4);
729         for (int i = 0; i < filehandlesHeaders.size(); i++) {  
730             for (int j = 0; j < filehandlesHeaders[i].size(); j++) {
731                 ofstream out;
732                 m->openOutputFileAppend(filehandlesHeaders[i][j], out);
733                 out.write(mybuffer, in.gcount()); 
734                 out.close();
735             }
736         }
737         delete[] mybuffer;
738         
739         //offset
740         mybuffer = new char[8];
741         in.read(mybuffer,8);
742         for (int i = 0; i < filehandlesHeaders.size(); i++) {  
743             for (int j = 0; j < filehandlesHeaders[i].size(); j++) {
744                 ofstream out;
745                 m->openOutputFileAppend(filehandlesHeaders[i][j], out);
746                 out.write(mybuffer, in.gcount()); 
747                 out.close();
748             }
749         }
750         delete[] mybuffer;
751             
752                         
753         //read index length
754                 mybuffer = new char[4];
755         in.read(mybuffer,4);
756         for (int i = 0; i < filehandlesHeaders.size(); i++) {  
757             for (int j = 0; j < filehandlesHeaders[i].size(); j++) {
758                 ofstream out;
759                 m->openOutputFileAppend(filehandlesHeaders[i][j], out);
760                 out.write(mybuffer, in.gcount()); 
761                 out.close();
762             }
763         }
764         delete[] mybuffer;
765                 
766         //change num reads
767         mybuffer = new char[4];
768         in.read(mybuffer,4);
769         delete[] mybuffer;
770         for (int i = 0; i < filehandlesHeaders.size(); i++) {  
771             for (int j = 0; j < filehandlesHeaders[i].size(); j++) {
772                 ofstream out;
773                 m->openOutputFileAppend(filehandlesHeaders[i][j], out);
774                 //convert number of reads to 4 byte char*
775                 char* thisbuffer = new char[4];
776                 thisbuffer[0] = (numSplitReads[i][j] >> 24) & 0xFF;
777                 thisbuffer[1] = (numSplitReads[i][j] >> 16) & 0xFF;
778                 thisbuffer[2] = (numSplitReads[i][j] >> 8) & 0xFF;
779                 thisbuffer[3] = numSplitReads[i][j] & 0xFF;
780                 out.write(thisbuffer, 4);
781                 out.close();
782                 delete[] thisbuffer;
783             }
784         }
785             
786         //read header length
787         mybuffer = new char[2];
788         in.read(mybuffer,2);
789         for (int i = 0; i < filehandlesHeaders.size(); i++) {  
790             for (int j = 0; j < filehandlesHeaders[i].size(); j++) {
791                 ofstream out;
792                 m->openOutputFileAppend(filehandlesHeaders[i][j], out);
793                 out.write(mybuffer, in.gcount()); 
794                 out.close();
795             }
796         }
797         delete[] mybuffer;
798             
799         //read key length
800         mybuffer = new char[2];
801         in.read(mybuffer,2);
802         for (int i = 0; i < filehandlesHeaders.size(); i++) {  
803             for (int j = 0; j < filehandlesHeaders[i].size(); j++) {
804                 ofstream out;
805                 m->openOutputFileAppend(filehandlesHeaders[i][j], out);
806                 out.write(mybuffer, in.gcount()); 
807                 out.close();
808             }
809         }
810         delete[] mybuffer;
811                         
812         //read number of flow reads
813         mybuffer = new char[2];
814         in.read(mybuffer,2);
815         for (int i = 0; i < filehandlesHeaders.size(); i++) {  
816             for (int j = 0; j < filehandlesHeaders[i].size(); j++) {
817                 ofstream out;
818                 m->openOutputFileAppend(filehandlesHeaders[i][j], out);
819                 out.write(mybuffer, in.gcount()); 
820                 out.close();
821             }
822         }
823         delete[] mybuffer;
824             
825         //read format code
826         mybuffer = new char[1];
827         in.read(mybuffer,1);
828         for (int i = 0; i < filehandlesHeaders.size(); i++) {  
829             for (int j = 0; j < filehandlesHeaders[i].size(); j++) {
830                 ofstream out;
831                 m->openOutputFileAppend(filehandlesHeaders[i][j], out);
832                 out.write(mybuffer, in.gcount()); 
833                 out.close();
834             }
835         }
836         delete[] mybuffer;
837                         
838         //read flow chars
839         mybuffer = new char[header.numFlowsPerRead];
840         in.read(mybuffer,header.numFlowsPerRead);
841         for (int i = 0; i < filehandlesHeaders.size(); i++) {  
842             for (int j = 0; j < filehandlesHeaders[i].size(); j++) {
843                 ofstream out;
844                 m->openOutputFileAppend(filehandlesHeaders[i][j], out);
845                 out.write(mybuffer, in.gcount()); 
846                 out.close();
847             }
848         }
849         delete[] mybuffer;
850                         
851         //read key
852         mybuffer = new char[header.keyLength];
853         in.read(mybuffer,header.keyLength);
854         for (int i = 0; i < filehandlesHeaders.size(); i++) {  
855             for (int j = 0; j < filehandlesHeaders[i].size(); j++) {
856                 ofstream out;
857                 m->openOutputFileAppend(filehandlesHeaders[i][j], out);
858                 out.write(mybuffer, in.gcount()); 
859                 out.close();
860             }
861         }
862         delete[] mybuffer;
863         
864                         
865         /* Pad to 8 chars */
866         unsigned long long spotInFile = in.tellg();
867         unsigned long long spot = (spotInFile + 7)& ~7;  // ~ inverts
868         in.seekg(spot);
869         
870         mybuffer = new char[spot-spotInFile];
871         for (int i = 0; i < filehandlesHeaders.size(); i++) { 
872             for (int j = 0; j < filehandlesHeaders[i].size(); j++) {
873                 ofstream out;
874                 m->openOutputFileAppend(filehandlesHeaders[i][j], out);
875                 out.write(mybuffer, spot-spotInFile); 
876                 out.close();
877             }
878         }
879         delete[] mybuffer;
880         in.close();
881                 return 0;
882         
883         }
884         catch(exception& e) {
885                 m->errorOut(e, "SffInfoCommand", "adjustCommonHeader");
886                 exit(1);
887         }
888 }
889 //**********************************************************************************************************************
890 int SffInfoCommand::readSeqData(ifstream& in, seqRead& read, int numFlowReads, Header& header){
891         try {
892         unsigned long long startSpotInFile = in.tellg();
893                 if (!in.eof()) {
894             
895             /*****************************************/
896             //read header
897             
898             //read header length
899                         char buffer [2];
900                         in.read(buffer, 2);
901                         header.headerLength = be_int2(*(unsigned short *)(&buffer));
902             
903                         //read name length
904                         char buffer2 [2];
905                         in.read(buffer2, 2);
906                         header.nameLength = be_int2(*(unsigned short *)(&buffer2));
907             
908                         //read num bases
909                         char buffer3 [4];
910                         in.read(buffer3, 4);
911                         header.numBases =  be_int4(*(unsigned int *)(&buffer3));
912                         
913                         //read clip qual left
914                         char buffer4 [2];
915                         in.read(buffer4, 2);
916                         header.clipQualLeft =  be_int2(*(unsigned short *)(&buffer4));
917                         header.clipQualLeft = 5; 
918                         
919                         //read clip qual right
920                         char buffer5 [2];
921                         in.read(buffer5, 2);
922                         header.clipQualRight =  be_int2(*(unsigned short *)(&buffer5));
923                         
924                         //read clipAdapterLeft
925                         char buffer6 [2];
926                         in.read(buffer6, 2);
927                         header.clipAdapterLeft = be_int2(*(unsigned short *)(&buffer6));
928             
929                         //read clipAdapterRight
930                         char buffer7 [2];
931                         in.read(buffer7, 2);
932                         header.clipAdapterRight = be_int2(*(unsigned short *)(&buffer7));
933             
934                         //read name
935                         char* tempBuffer = new char[header.nameLength];
936                         in.read(&(*tempBuffer), header.nameLength);
937                         header.name = tempBuffer;
938                         if (header.name.length() > header.nameLength) { header.name = header.name.substr(0, header.nameLength);  }
939                         delete[] tempBuffer;
940                         
941                         //extract info from name
942                         decodeName(header.timestamp, header.region, header.xy, header.name);
943                         
944                         /* Pad to 8 chars */
945                         unsigned long long spotInFile = in.tellg();
946                         unsigned long long spot = (spotInFile + 7)& ~7;
947                         in.seekg(spot);
948
949             /*****************************************/
950             //sequence read 
951             
952                         //read flowgram
953                         read.flowgram.resize(numFlowReads);
954                         for (int i = 0; i < numFlowReads; i++) {  
955                                 char buffer [2];
956                                 in.read(buffer, 2);
957                                 read.flowgram[i] = be_int2(*(unsigned short *)(&buffer));
958                         }
959             
960                         //read flowIndex
961                         read.flowIndex.resize(header.numBases);
962                         for (int i = 0; i < header.numBases; i++) {  
963                                 char temp[1];
964                                 in.read(temp, 1);
965                                 read.flowIndex[i] = be_int1(*(unsigned char *)(&temp));
966                         }
967         
968                         //read bases
969                         char* tempBuffer6 = new char[header.numBases];
970                         in.read(&(*tempBuffer6), header.numBases);
971                         read.bases = tempBuffer6;
972                         if (read.bases.length() > header.numBases) { read.bases = read.bases.substr(0, header.numBases);  }
973                         delete[] tempBuffer6;
974
975                         //read qual scores
976                         read.qualScores.resize(header.numBases);
977                         for (int i = 0; i < header.numBases; i++) {  
978                                 char temp[1];
979                                 in.read(temp, 1);
980                                 read.qualScores[i] = be_int1(*(unsigned char *)(&temp));
981                         }
982         
983                         /* Pad to 8 chars */
984                         spotInFile = in.tellg();
985                         spot = (spotInFile + 7)& ~7;
986                         in.seekg(spot);
987             
988             if (split > 1) {
989                 char * mybuffer;
990                 mybuffer = new char [spot-startSpotInFile];
991                 ifstream in2;
992                 m->openInputFile(currentFileName, in2);
993                 in2.seekg(startSpotInFile);
994                 in2.read(mybuffer,spot-startSpotInFile);
995                 in2.close();
996                 
997                 int barcodeIndex, primerIndex;
998                 int trashCodeLength = findGroup(header, read, barcodeIndex, primerIndex);
999                                 
1000                 if(trashCodeLength == 0){
1001                     ofstream out;
1002                     m->openOutputFileAppend(filehandles[barcodeIndex][primerIndex], out);
1003                     out.write(mybuffer, in2.gcount()); 
1004                     out.close();
1005                     delete[] mybuffer;
1006                     numSplitReads[barcodeIndex][primerIndex]++;
1007                                 }
1008                                 else{
1009                                         ofstream out;
1010                     m->openOutputFileAppend(noMatchFile, out);
1011                     out.write(mybuffer, in2.gcount()); 
1012                     out.close();
1013                     delete[] mybuffer;
1014                                 }
1015                                 
1016                         }
1017                 }else{
1018                         m->mothurOut("Error reading."); m->mothurOutEndLine();
1019                 }
1020
1021                 return 0;
1022         }
1023         catch(exception& e) {
1024                 m->errorOut(e, "SffInfoCommand", "readSeqData");
1025                 exit(1);
1026         }
1027 }
1028 //**********************************************************************************************************************
1029 int SffInfoCommand::findGroup(Header header, seqRead read, int& barcode, int& primer) {
1030         try {
1031         //find group read belongs to
1032         TrimOligos trimOligos(pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, revPrimer, linker, spacer);
1033         
1034         int success = 1;
1035         string trashCode = "";
1036         int currentSeqsDiffs = 0;
1037         
1038         string seq = read.bases;
1039         
1040         if (trim) {
1041             if(header.clipQualRight < header.clipQualLeft){
1042                 seq = "NNNN";
1043             }
1044             else if((header.clipQualRight != 0) && ((header.clipQualRight-header.clipQualLeft) >= 0)){
1045                 seq = seq.substr((header.clipQualLeft-1), (header.clipQualRight-header.clipQualLeft));
1046             }
1047             else {
1048                 seq = seq.substr(header.clipQualLeft-1);
1049             }
1050         }else{
1051             //if you wanted the sfftxt then you already converted the bases to the right case
1052             if (!sfftxt) {
1053                 //make the bases you want to clip lowercase and the bases you want to keep upper case
1054                 if(header.clipQualRight == 0){  header.clipQualRight = seq.length();    }
1055                 for (int i = 0; i < (header.clipQualLeft-1); i++) { seq[i] = tolower(seq[i]);  }
1056                 for (int i = (header.clipQualLeft-1); i < (header.clipQualRight-1); i++)  {   seq[i] = toupper(seq[i]);  }
1057                 for (int i = (header.clipQualRight-1); i < seq.length(); i++) {   seq[i] = tolower(seq[i]);  }
1058             }
1059         }
1060         
1061         Sequence currSeq(header.name, seq);
1062         QualityScores currQual;
1063         
1064         if(numLinkers != 0){
1065             success = trimOligos.stripLinker(currSeq, currQual);
1066             if(success > ldiffs)                {       trashCode += 'k';       }
1067             else{ currentSeqsDiffs += success;  }
1068             
1069         }
1070         
1071         if(barcodes.size() != 0){
1072             success = trimOligos.stripBarcode(currSeq, currQual, barcode);
1073             if(success > bdiffs)                {       trashCode += 'b';       }
1074             else{ currentSeqsDiffs += success;  }
1075         }
1076         
1077         if(numSpacers != 0){
1078             success = trimOligos.stripSpacer(currSeq, currQual);
1079             if(success > sdiffs)                {       trashCode += 's';       }
1080             else{ currentSeqsDiffs += success;  }
1081             
1082         }
1083         
1084         if(numFPrimers != 0){
1085             success = trimOligos.stripForward(currSeq, currQual, primer, true);
1086             if(success > pdiffs)                {       trashCode += 'f';       }
1087             else{ currentSeqsDiffs += success;  }
1088         }
1089         
1090         if (currentSeqsDiffs > tdiffs)  {       trashCode += 't';   }
1091         
1092         if(revPrimer.size() != 0){
1093             success = trimOligos.stripReverse(currSeq, currQual);
1094             if(!success)                                {       trashCode += 'r';       }
1095         }
1096
1097         
1098         return trashCode.length();
1099     }
1100         catch(exception& e) {
1101                 m->errorOut(e, "SffInfoCommand", "findGroup");
1102                 exit(1);
1103         }
1104 }     
1105 //**********************************************************************************************************************
1106 int SffInfoCommand::decodeName(string& timestamp, string& region, string& xy, string name) {
1107         try {
1108                 
1109                 if (name.length() >= 6) {
1110                         string time = name.substr(0, 6);
1111                         unsigned int timeNum = m->fromBase36(time);
1112                         
1113                         int q1 = timeNum / 60;
1114                         int sec = timeNum - 60 * q1;
1115                         int q2 = q1 / 60;
1116                         int minute = q1 - 60 * q2;
1117                         int q3 = q2 / 24;
1118                         int hr = q2 - 24 * q3;
1119                         int q4 = q3 / 32;
1120                         int day = q3 - 32 * q4;
1121                         int q5 = q4 / 13;
1122                         int mon = q4 - 13 * q5;
1123                         int year = 2000 + q5;
1124                 
1125                         timestamp = toString(year) + "_" + toString(mon) + "_" + toString(day) + "_" + toString(hr) + "_" + toString(minute) + "_" + toString(sec);
1126                 }
1127                 
1128                 if (name.length() >= 9) {
1129                         region = name.substr(7, 2);
1130                 
1131                         string xyNum = name.substr(9);
1132                         unsigned int myXy = m->fromBase36(xyNum);
1133                         int x = myXy >> 12;
1134                         int y = myXy & 4095;
1135                 
1136                         xy = toString(x) + "_" + toString(y);
1137                 }
1138                 
1139                 return 0;
1140         }
1141         catch(exception& e) {
1142                 m->errorOut(e, "SffInfoCommand", "decodeName");
1143                 exit(1);
1144         }
1145 }
1146 //**********************************************************************************************************************
1147 int SffInfoCommand::printCommonHeader(ofstream& out, CommonHeader& header) {
1148         try {
1149         
1150                 out << "Common Header:\nMagic Number: " << header.magicNumber << endl;
1151                 out << "Version: " << header.version << endl;
1152                 out << "Index Offset: " << header.indexOffset << endl;
1153                 out << "Index Length: " << header.indexLength << endl;
1154                 out << "Number of Reads: " << header.numReads << endl;
1155                 out << "Header Length: " << header.headerLength << endl;
1156                 out << "Key Length: " << header.keyLength << endl;
1157                 out << "Number of Flows: " << header.numFlowsPerRead << endl;
1158                 out << "Format Code: " << header.flogramFormatCode << endl;
1159                 out << "Flow Chars: " << header.flowChars << endl;
1160                 out << "Key Sequence: " << header.keySequence << endl << endl;
1161                         
1162                 return 0;
1163         }
1164         catch(exception& e) {
1165                 m->errorOut(e, "SffInfoCommand", "printCommonHeader");
1166                 exit(1);
1167         }
1168 }
1169 //**********************************************************************************************************************
1170 int SffInfoCommand::printHeader(ofstream& out, Header& header) {
1171         try {
1172                 
1173                 out << ">" << header.name << endl;
1174                 out << "Run Prefix: " << header.timestamp << endl;
1175                 out << "Region #:  " << header.region << endl;
1176                 out << "XY Location: " << header.xy << endl << endl;
1177                 
1178                 out << "Run Name:  " << endl;
1179                 out << "Analysis Name:  " << endl;
1180                 out << "Full Path: " << endl << endl;
1181                 
1182                 out << "Read Header Len: " << header.headerLength << endl;
1183                 out << "Name Length: " << header.nameLength << endl;
1184                 out << "# of Bases: " << header.numBases << endl;
1185                 out << "Clip Qual Left: " << header.clipQualLeft << endl;
1186                 out << "Clip Qual Right: " << header.clipQualRight << endl;
1187                 out << "Clip Adap Left: " << header.clipAdapterLeft << endl;
1188                 out << "Clip Adap Right: " << header.clipAdapterRight << endl << endl;
1189                 
1190                 return 0;
1191         }
1192         catch(exception& e) {
1193                 m->errorOut(e, "SffInfoCommand", "printHeader");
1194                 exit(1);
1195         }
1196 }
1197 //**********************************************************************************************************************
1198 bool SffInfoCommand::sanityCheck(Header& header, seqRead& read) {
1199         try {
1200         bool okay = true;
1201         string message = "[WARNING]: Your sff file may be corrupted! Sequence: " + header.name + "\n";
1202         
1203         if (header.clipQualLeft > read.bases.length()) {
1204             okay = false; message += "Clip Qual Left = " + toString(header.clipQualLeft) + ", but we only read " + toString(read.bases.length()) + " bases.\n";
1205         }
1206         if (header.clipQualRight > read.bases.length()) {
1207             okay = false; message += "Clip Qual Right = " + toString(header.clipQualRight) + ", but we only read " + toString(read.bases.length()) + " bases.\n";
1208         }
1209         if (header.clipQualLeft > read.qualScores.size()) {
1210             okay = false; message += "Clip Qual Left = " + toString(header.clipQualLeft) + ", but we only read " + toString(read.qualScores.size()) + " quality scores.\n";
1211         }
1212         if (header.clipQualRight > read.qualScores.size()) {
1213             okay = false; message += "Clip Qual Right = " + toString(header.clipQualRight) + ", but we only read " + toString(read.qualScores.size()) + " quality scores.\n";
1214         }
1215         
1216         if (okay == false) {
1217             m->mothurOut(message); m->mothurOutEndLine();
1218         }
1219         
1220                 return okay;
1221         }
1222         catch(exception& e) {
1223                 m->errorOut(e, "SffInfoCommand", "sanityCheck");
1224                 exit(1);
1225         }
1226 }
1227 //**********************************************************************************************************************
1228 int SffInfoCommand::printSffTxtSeqData(ofstream& out, seqRead& read, Header& header) {
1229         try {
1230                 out << "Flowgram: ";
1231                 for (int i = 0; i < read.flowgram.size(); i++) { out << setprecision(2) << (read.flowgram[i]/(float)100) << '\t';  }
1232                 
1233                 out << endl <<  "Flow Indexes: ";
1234                 int sum = 0;
1235                 for (int i = 0; i < read.flowIndex.size(); i++) {  sum +=  read.flowIndex[i];  out << sum << '\t'; }
1236                 
1237                 //make the bases you want to clip lowercase and the bases you want to keep upper case
1238                 if(header.clipQualRight == 0){  header.clipQualRight = read.bases.length();     }
1239                 for (int i = 0; i < (header.clipQualLeft-1); i++) { read.bases[i] = tolower(read.bases[i]); }
1240                 for (int i = (header.clipQualLeft-1); i < (header.clipQualRight-1); i++) {   read.bases[i] = toupper(read.bases[i]);  }
1241                 for (int i = (header.clipQualRight-1); i < read.bases.length(); i++) {   read.bases[i] = tolower(read.bases[i]);  }
1242                 
1243                 out << endl <<  "Bases: " << read.bases << endl << "Quality Scores: ";
1244                 for (int i = 0; i < read.qualScores.size(); i++) {   out << read.qualScores[i] << '\t';  }
1245         
1246                 
1247                 out << endl << endl;
1248                 
1249                 return 0;
1250         }
1251         catch(exception& e) {
1252                 m->errorOut(e, "SffInfoCommand", "printSffTxtSeqData");
1253                 exit(1);
1254         }
1255 }
1256 //**********************************************************************************************************************
1257 int SffInfoCommand::printFastaSeqData(ofstream& out, seqRead& read, Header& header) {
1258         try {
1259                 string seq = read.bases;
1260                 
1261         if (trim) {
1262                         if(header.clipQualRight < header.clipQualLeft){
1263                                 seq = "NNNN";
1264                         }
1265                         else if((header.clipQualRight != 0) && ((header.clipQualRight-header.clipQualLeft) >= 0)){
1266                                 seq = seq.substr((header.clipQualLeft-1), (header.clipQualRight-header.clipQualLeft));
1267                         }
1268                         else {
1269                                 seq = seq.substr(header.clipQualLeft-1);
1270                         }
1271                 }else{
1272                         //if you wanted the sfftxt then you already converted the bases to the right case
1273                         if (!sfftxt) {
1274                                 //make the bases you want to clip lowercase and the bases you want to keep upper case
1275                                 if(header.clipQualRight == 0){  header.clipQualRight = seq.length();    }
1276                                 for (int i = 0; i < (header.clipQualLeft-1); i++) { seq[i] = tolower(seq[i]);  }
1277                                 for (int i = (header.clipQualLeft-1); i < (header.clipQualRight-1); i++)  {   seq[i] = toupper(seq[i]);  }
1278                                 for (int i = (header.clipQualRight-1); i < seq.length(); i++) {   seq[i] = tolower(seq[i]);  }
1279                         }
1280                 }
1281                 
1282                 out << ">" << header.name  << " xy=" << header.xy << endl;
1283                 out << seq << endl;
1284                 
1285                 return 0;
1286         }
1287         catch(exception& e) {
1288                 m->errorOut(e, "SffInfoCommand", "printFastaSeqData");
1289                 exit(1);
1290         }
1291 }
1292
1293 //**********************************************************************************************************************
1294 int SffInfoCommand::printQualSeqData(ofstream& out, seqRead& read, Header& header) {
1295         try {
1296                 
1297                 if (trim) {
1298                         if(header.clipQualRight < header.clipQualLeft){
1299                                 out << ">" << header.name << " xy=" << header.xy << endl;
1300                                 out << "0\t0\t0\t0";
1301                         }
1302                         else if((header.clipQualRight != 0) && ((header.clipQualRight-header.clipQualLeft) >= 0)){
1303                                 out << ">" << header.name << " xy=" << header.xy << " length=" << (header.clipQualRight-header.clipQualLeft) << endl;
1304                                 for (int i = (header.clipQualLeft-1); i < (header.clipQualRight-1); i++) {   out << read.qualScores[i] << '\t'; }
1305                         }
1306                         else{
1307                                 out << ">" << header.name << " xy=" << header.xy << " length=" << (header.clipQualRight-header.clipQualLeft) << endl;
1308                                 for (int i = (header.clipQualLeft-1); i < read.qualScores.size(); i++) {   out << read.qualScores[i] << '\t';   }                       
1309                         }
1310                 }else{
1311                         out << ">" << header.name << " xy=" << header.xy << " length=" << read.qualScores.size() << endl;
1312                         for (int i = 0; i < read.qualScores.size(); i++) {   out << read.qualScores[i] << '\t';  }
1313                 }
1314                 
1315                 out << endl;
1316                 
1317                 return 0;
1318         }
1319         catch(exception& e) {
1320                 m->errorOut(e, "SffInfoCommand", "printQualSeqData");
1321                 exit(1);
1322         }
1323 }
1324
1325 //**********************************************************************************************************************
1326 int SffInfoCommand::printFlowSeqData(ofstream& out, seqRead& read, Header& header) {
1327         try {
1328                 if(header.clipQualRight > header.clipQualLeft){
1329                         
1330                         int rightIndex = 0;
1331                         for (int i = 0; i < header.clipQualRight; i++) {  rightIndex +=  read.flowIndex[i];     }
1332
1333                         out << header.name << ' ' << rightIndex;
1334                         for (int i = 0; i < read.flowgram.size(); i++) { out << setprecision(2) << ' ' << (read.flowgram[i]/(float)100);  }
1335                         out << endl;
1336                 }
1337                 
1338                 
1339                 return 0;
1340         }
1341         catch(exception& e) {
1342                 m->errorOut(e, "SffInfoCommand", "printFlowSeqData");
1343                 exit(1);
1344         }
1345 }
1346 //**********************************************************************************************************************
1347 int SffInfoCommand::readAccnosFile(string filename) {
1348         try {
1349                 //remove old names
1350                 seqNames.clear();
1351                 
1352                 ifstream in;
1353                 m->openInputFile(filename, in);
1354                 string name;
1355                 
1356                 while(!in.eof()){
1357                         in >> name; m->gobble(in);
1358                                                 
1359                         seqNames.insert(name);
1360                         
1361                         if (m->control_pressed) { seqNames.clear(); break; }
1362                 }
1363                 in.close();             
1364                 
1365                 return 0;
1366         }
1367         catch(exception& e) {
1368                 m->errorOut(e, "SffInfoCommand", "readAccnosFile");
1369                 exit(1);
1370         }
1371 }
1372 //**********************************************************************************************************************
1373 int SffInfoCommand::parseSffTxt() {
1374         try {
1375                 
1376                 ifstream inSFF;
1377                 m->openInputFile(sfftxtFilename, inSFF);
1378                 
1379                 if (outputDir == "") {  outputDir += m->hasPath(sfftxtFilename); }
1380                 
1381                 //output file names
1382                 ofstream outFasta, outQual, outFlow;
1383                 string outFastaFileName, outQualFileName;
1384                 string fileRoot = m->getRootName(m->getSimpleName(sfftxtFilename));
1385                 if (fileRoot.length() > 0) {
1386                         //rip off last .
1387                         fileRoot = fileRoot.substr(0, fileRoot.length()-1);
1388                         fileRoot = m->getRootName(fileRoot);
1389                 }
1390                 
1391         map<string, string> variables; 
1392                 variables["[filename]"] = fileRoot;
1393                 string sfftxtFileName = getOutputFileName("sfftxt",variables);
1394                 string outFlowFileName = getOutputFileName("flow",variables);
1395                 if (!trim) { variables["[tag]"] = "raw"; }
1396                 outFastaFileName = getOutputFileName("fasta",variables);
1397         outQualFileName = getOutputFileName("qfile",variables);
1398                 
1399                 if (fasta)      { m->openOutputFile(outFastaFileName, outFasta);        outputNames.push_back(outFastaFileName); outputTypes["fasta"].push_back(outFastaFileName); }
1400                 if (qual)       { m->openOutputFile(outQualFileName, outQual);          outputNames.push_back(outQualFileName); outputTypes["qfile"].push_back(outQualFileName);  }
1401                 if (flow)       { m->openOutputFile(outFlowFileName, outFlow);          outputNames.push_back(outFlowFileName);  outFlow.setf(ios::fixed, ios::floatfield); outFlow.setf(ios::showpoint); outputTypes["flow"].push_back(outFlowFileName);  }
1402                 
1403                 //read common header
1404                 string commonHeader = m->getline(inSFF);
1405                 string magicNumber = m->getline(inSFF); 
1406                 string version = m->getline(inSFF);
1407                 string indexOffset = m->getline(inSFF);
1408                 string indexLength = m->getline(inSFF);
1409                 int numReads = parseHeaderLineToInt(inSFF);
1410                 string headerLength = m->getline(inSFF);
1411                 string keyLength = m->getline(inSFF);
1412                 int numFlows = parseHeaderLineToInt(inSFF);
1413                 string flowgramCode = m->getline(inSFF);
1414                 string flowChars = m->getline(inSFF);
1415                 string keySequence = m->getline(inSFF);
1416                 m->gobble(inSFF);
1417                 
1418                 string seqName;
1419                 
1420                 if (flow)       {       outFlow << numFlows << endl;    }
1421                 
1422                 for(int i=0;i<numReads;i++){
1423                         
1424                         //sanity check
1425                         if (inSFF.eof()) { m->mothurOut("[ERROR]: Expected " + toString(numReads) + " but reached end of file at " + toString(i+1) + "."); m->mothurOutEndLine(); break; }
1426                         
1427                         Header header;
1428                         
1429                         //parse read header
1430                         inSFF >> seqName;
1431                         seqName = seqName.substr(1);
1432                         m->gobble(inSFF);
1433                         header.name = seqName;
1434                         
1435                         string runPrefix = parseHeaderLineToString(inSFF);              header.timestamp = runPrefix;
1436                         string regionNumber = parseHeaderLineToString(inSFF);   header.region = regionNumber;
1437                         string xyLocation = parseHeaderLineToString(inSFF);             header.xy = xyLocation;
1438                         m->gobble(inSFF);
1439                                 
1440                         string runName = parseHeaderLineToString(inSFF);
1441                         string analysisName = parseHeaderLineToString(inSFF);
1442                         string fullPath = parseHeaderLineToString(inSFF);
1443                         m->gobble(inSFF);
1444                         
1445                         string readHeaderLen = parseHeaderLineToString(inSFF);  convert(readHeaderLen, header.headerLength);
1446                         string nameLength = parseHeaderLineToString(inSFF);             convert(nameLength, header.nameLength);
1447                         int numBases = parseHeaderLineToInt(inSFF);                             header.numBases = numBases;
1448                         string clipQualLeft = parseHeaderLineToString(inSFF);   convert(clipQualLeft, header.clipQualLeft);
1449                         int clipQualRight = parseHeaderLineToInt(inSFF);                header.clipQualRight = clipQualRight;
1450                         string clipAdapLeft = parseHeaderLineToString(inSFF);   convert(clipAdapLeft, header.clipAdapterLeft);
1451                         string clipAdapRight = parseHeaderLineToString(inSFF);  convert(clipAdapRight, header.clipAdapterRight);
1452                         m->gobble(inSFF);
1453                                 
1454                         seqRead read;
1455                         
1456                         //parse read
1457                         vector<unsigned short> flowVector = parseHeaderLineToFloatVector(inSFF, numFlows);      read.flowgram = flowVector;
1458                         vector<unsigned int> flowIndices = parseHeaderLineToIntVector(inSFF, numBases); 
1459                         
1460                         //adjust for print
1461                         vector<unsigned int> flowIndicesAdjusted; flowIndicesAdjusted.push_back(flowIndices[0]);
1462                         for (int j = 1; j < flowIndices.size(); j++) {   flowIndicesAdjusted.push_back(flowIndices[j] - flowIndices[j-1]);   }
1463                         read.flowIndex = flowIndicesAdjusted;
1464                         
1465                         string bases = parseHeaderLineToString(inSFF);                                                                          read.bases = bases;
1466                         vector<unsigned int> qualityScores = parseHeaderLineToIntVector(inSFF, numBases);       read.qualScores = qualityScores;
1467                         m->gobble(inSFF);
1468                                         
1469                         //if you have provided an accosfile and this seq is not in it, then dont print
1470                         bool print = true;
1471                         if (seqNames.size() != 0) {   if (seqNames.count(header.name) == 0) { print = false; }  }
1472                         
1473                         //print 
1474                         if (print) {
1475                                 if (fasta)      {       printFastaSeqData(outFasta, read, header);      }
1476                                 if (qual)       {       printQualSeqData(outQual, read, header);        }
1477                                 if (flow)       {       printFlowSeqData(outFlow, read, header);        }
1478                         }
1479                         
1480                         //report progress
1481                         if((i+1) % 10000 == 0){ m->mothurOut(toString(i+1)); m->mothurOutEndLine();             }
1482                         
1483                         if (m->control_pressed) {  break;  }
1484                 }
1485                 
1486                 //report progress
1487                 if (!m->control_pressed) {   if((numReads) % 10000 != 0){       m->mothurOut(toString(numReads)); m->mothurOutEndLine();                }  }
1488                 
1489                 inSFF.close();
1490                 
1491                 if (fasta)      {  outFasta.close();    }
1492                 if (qual)       {  outQual.close();             }
1493                 if (flow)       {  outFlow.close();             }
1494                 
1495                 return 0;
1496         }
1497         catch(exception& e) {
1498                 m->errorOut(e, "SffInfoCommand", "parseSffTxt");
1499                 exit(1);
1500         }
1501 }
1502 //**********************************************************************************************************************
1503
1504 int SffInfoCommand::parseHeaderLineToInt(ifstream& file){
1505         try {
1506                 int number;
1507                 
1508                 while (!file.eof())     {
1509                         
1510                         char c = file.get(); 
1511                         if (c == ':'){
1512                                 file >> number;
1513                                 break;
1514                         }
1515                         
1516                 }
1517                 m->gobble(file);
1518                 return number;
1519         }
1520         catch(exception& e) {
1521                 m->errorOut(e, "SffInfoCommand", "parseHeaderLineToInt");
1522                 exit(1);
1523         }
1524         
1525 }
1526
1527 //**********************************************************************************************************************
1528
1529 string SffInfoCommand::parseHeaderLineToString(ifstream& file){
1530         try {
1531                 string text;
1532                 
1533                 while (!file.eof())     {
1534                         char c = file.get(); 
1535                         
1536                         if (c == ':'){
1537                                 //m->gobble(file);
1538                                 //text = m->getline(file);      
1539                                 file >> text;
1540                                 break;
1541                         }
1542                 }
1543                 m->gobble(file);
1544                 
1545                 return text;
1546         }
1547         catch(exception& e) {
1548                 m->errorOut(e, "SffInfoCommand", "parseHeaderLineToString");
1549                 exit(1);
1550         }
1551 }
1552
1553 //**********************************************************************************************************************
1554
1555 vector<unsigned short> SffInfoCommand::parseHeaderLineToFloatVector(ifstream& file, int length){
1556         try {
1557                 vector<unsigned short> floatVector(length);
1558                 
1559                 while (!file.eof())     {
1560                         char c = file.get(); 
1561                         if (c == ':'){
1562                                 float temp;
1563                                 for(int i=0;i<length;i++){
1564                                         file >> temp;
1565                                         floatVector[i] = temp * 100;
1566                                 }
1567                                 break;
1568                         }
1569                 }
1570                 m->gobble(file);        
1571                 return floatVector;
1572         }
1573         catch(exception& e) {
1574                 m->errorOut(e, "SffInfoCommand", "parseHeaderLineToFloatVector");
1575                 exit(1);
1576         }
1577 }
1578
1579 //**********************************************************************************************************************
1580
1581 vector<unsigned int> SffInfoCommand::parseHeaderLineToIntVector(ifstream& file, int length){
1582         try {
1583                 vector<unsigned int> intVector(length);
1584                 
1585                 while (!file.eof())     {
1586                         char c = file.get(); 
1587                         if (c == ':'){
1588                                 for(int i=0;i<length;i++){
1589                                         file >> intVector[i];
1590                                 }
1591                                 break;
1592                         }
1593                 }
1594                 m->gobble(file);        
1595                 return intVector;
1596         }
1597         catch(exception& e) {
1598                 m->errorOut(e, "SffInfoCommand", "parseHeaderLineToIntVector");
1599                 exit(1);
1600         }
1601 }
1602 //***************************************************************************************************************
1603
1604 bool SffInfoCommand::readOligos(string oligoFile){
1605         try {
1606         filehandles.clear();
1607         numSplitReads.clear();
1608         filehandlesHeaders.clear();
1609         
1610                 ifstream inOligos;
1611                 m->openInputFile(oligoFile, inOligos);
1612                 
1613                 string type, oligo, group;
1614         
1615                 int indexPrimer = 0;
1616                 int indexBarcode = 0;
1617                 
1618                 while(!inOligos.eof()){
1619             
1620                         inOligos >> type; 
1621             
1622                         if(type[0] == '#'){
1623                                 while (!inOligos.eof()) {       char c = inOligos.get();  if (c == 10 || c == 13){      break;  }       } // get rest of line if there's any crap there
1624                                 m->gobble(inOligos);
1625                         }
1626                         else{
1627                                 m->gobble(inOligos);
1628                                 //make type case insensitive
1629                                 for(int i=0;i<type.length();i++){       type[i] = toupper(type[i]);  }
1630                                 
1631                                 inOligos >> oligo;
1632                                 
1633                                 for(int i=0;i<oligo.length();i++){
1634                                         oligo[i] = toupper(oligo[i]);
1635                                         if(oligo[i] == 'U')     {       oligo[i] = 'T'; }
1636                                 }
1637                                 
1638                                 if(type == "FORWARD"){
1639                                         group = "";
1640                                         
1641                                         // get rest of line in case there is a primer name
1642                                         while (!inOligos.eof()) {       
1643                                                 char c = inOligos.get(); 
1644                                                 if (c == 10 || c == 13){        break;  }
1645                                                 else if (c == 32 || c == 9){;} //space or tab
1646                                                 else {  group += c;  }
1647                                         } 
1648                                         
1649                                         //check for repeat barcodes
1650                                         map<string, int>::iterator itPrime = primers.find(oligo);
1651                                         if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
1652                                         
1653                                         primers[oligo]=indexPrimer; indexPrimer++;              
1654                                         primerNameVector.push_back(group);
1655                                 }else if(type == "REVERSE"){
1656                                         //Sequence oligoRC("reverse", oligo);
1657                                         //oligoRC.reverseComplement();
1658                     string oligoRC = reverseOligo(oligo);
1659                                         revPrimer.push_back(oligoRC);
1660                                 }
1661                                 else if(type == "BARCODE"){
1662                                         inOligos >> group;
1663                                         
1664                                         //check for repeat barcodes
1665                                         map<string, int>::iterator itBar = barcodes.find(oligo);
1666                                         if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
1667                     
1668                                         barcodes[oligo]=indexBarcode; indexBarcode++;
1669                                         barcodeNameVector.push_back(group);
1670                                 }else if(type == "LINKER"){
1671                                         linker.push_back(oligo);
1672                                 }else if(type == "SPACER"){
1673                                         spacer.push_back(oligo);
1674                                 }
1675                                 else{   m->mothurOut("[WARNING]: " + type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); }
1676                         }
1677                         m->gobble(inOligos);
1678                 }       
1679                 inOligos.close();
1680                 
1681                 if(barcodeNameVector.size() == 0 && primerNameVector[0] == ""){ split = 1;      }
1682                 
1683                 //add in potential combos
1684                 if(barcodeNameVector.size() == 0){
1685                         barcodes[""] = 0;
1686                         barcodeNameVector.push_back("");                        
1687                 }
1688                 
1689                 if(primerNameVector.size() == 0){
1690                         primers[""] = 0;
1691                         primerNameVector.push_back("");                 
1692                 }
1693                 
1694                 filehandles.resize(barcodeNameVector.size());
1695                 for(int i=0;i<filehandles.size();i++){
1696                         filehandles[i].assign(primerNameVector.size(), "");
1697                 }
1698                         
1699                 if(split > 1){
1700                         set<string> uniqueNames; //used to cleanup outputFileNames
1701                         for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
1702                                 for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
1703                                         
1704                                         string primerName = primerNameVector[itPrimer->second];
1705                                         string barcodeName = barcodeNameVector[itBar->second];
1706                                         
1707                                         string comboGroupName = "";
1708                                         string fastaFileName = "";
1709                                         string qualFileName = "";
1710                                         string nameFileName = "";
1711                                         
1712                                         if(primerName == ""){
1713                                                 comboGroupName = barcodeNameVector[itBar->second];
1714                                         }
1715                                         else{
1716                                                 if(barcodeName == ""){
1717                                                         comboGroupName = primerNameVector[itPrimer->second];
1718                                                 }
1719                                                 else{
1720                                                         comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second];
1721                                                 }
1722                                         }
1723                                         
1724                                         ofstream temp;
1725                     map<string, string> variables; 
1726                     variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(currentFileName));
1727                     variables["[group]"] = comboGroupName;
1728                                         string thisFilename = getOutputFileName("sff",variables);
1729                                         if (uniqueNames.count(thisFilename) == 0) {
1730                                                 outputNames.push_back(thisFilename);
1731                                                 outputTypes["sff"].push_back(thisFilename);
1732                                                 uniqueNames.insert(thisFilename);
1733                                         }
1734                                         
1735                                         filehandles[itBar->second][itPrimer->second] = thisFilename;
1736                                         m->openOutputFile(thisFilename, temp);          temp.close();
1737                                 }
1738                         }
1739                 }
1740                 numFPrimers = primers.size();
1741         numLinkers = linker.size();
1742         numSpacers = spacer.size();
1743         map<string, string> variables; 
1744         variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(currentFileName));
1745         variables["[group]"] = "scrap";
1746                 noMatchFile = getOutputFileName("sff",variables);
1747         m->mothurRemove(noMatchFile);
1748         
1749                 bool allBlank = true;
1750                 for (int i = 0; i < barcodeNameVector.size(); i++) {
1751                         if (barcodeNameVector[i] != "") {
1752                                 allBlank = false;
1753                                 break;
1754                         }
1755                 }
1756                 for (int i = 0; i < primerNameVector.size(); i++) {
1757                         if (primerNameVector[i] != "") {
1758                                 allBlank = false;
1759                                 break;
1760                         }
1761                 }
1762                 
1763         filehandlesHeaders.resize(filehandles.size());
1764         numSplitReads.resize(filehandles.size());
1765         for (int i = 0; i < filehandles.size(); i++) { 
1766             numSplitReads[i].resize(filehandles[i].size(), 0); 
1767             for (int j = 0; j < filehandles[i].size(); j++) {
1768                 filehandlesHeaders[i].push_back(filehandles[i][j]+"headers");
1769             }
1770         }
1771                              
1772                 if (allBlank) {
1773                         m->mothurOut("[WARNING]: your oligos file does not contain any group names.  mothur will not create a split the sff file."); m->mothurOutEndLine();
1774                         split = 1;
1775                         return false;
1776                 }
1777                 
1778                 return true;
1779                 
1780         }
1781         catch(exception& e) {
1782                 m->errorOut(e, "SffInfoCommand", "readOligos");
1783                 exit(1);
1784         }
1785 }
1786 //********************************************************************/
1787 string SffInfoCommand::reverseOligo(string oligo){
1788         try {
1789         string reverse = "";
1790         
1791         for(int i=oligo.length()-1;i>=0;i--){
1792             
1793             if(oligo[i] == 'A')         {       reverse += 'T'; }
1794             else if(oligo[i] == 'T'){   reverse += 'A'; }
1795             else if(oligo[i] == 'U'){   reverse += 'A'; }
1796             
1797             else if(oligo[i] == 'G'){   reverse += 'C'; }
1798             else if(oligo[i] == 'C'){   reverse += 'G'; }
1799             
1800             else if(oligo[i] == 'R'){   reverse += 'Y'; }
1801             else if(oligo[i] == 'Y'){   reverse += 'R'; }
1802             
1803             else if(oligo[i] == 'M'){   reverse += 'K'; }
1804             else if(oligo[i] == 'K'){   reverse += 'M'; }
1805             
1806             else if(oligo[i] == 'W'){   reverse += 'W'; }
1807             else if(oligo[i] == 'S'){   reverse += 'S'; }
1808             
1809             else if(oligo[i] == 'B'){   reverse += 'V'; }
1810             else if(oligo[i] == 'V'){   reverse += 'B'; }
1811             
1812             else if(oligo[i] == 'D'){   reverse += 'H'; }
1813             else if(oligo[i] == 'H'){   reverse += 'D'; }
1814             
1815             else                                                {       reverse += 'N'; }
1816         }
1817         
1818         
1819         return reverse;
1820     }
1821         catch(exception& e) {
1822                 m->errorOut(e, "SffInfoCommand", "reverseOligo");
1823                 exit(1);
1824         }
1825 }
1826
1827 //**********************************************************************************************************************
1828
1829
1830                                 
1831