]> git.donarmstrong.com Git - mothur.git/blob - sffinfocommand.cpp
fixes while testing 1.33.0
[mothur.git] / sffinfocommand.cpp
1 /*\r
2  *  sffinfocommand.cpp\r
3  *  Mothur\r
4  *\r
5  *  Created by westcott on 7/7/10.\r
6  *  Copyright 2010 Schloss Lab. All rights reserved.\r
7  *\r
8  */\r
9 \r
10 #include "sffinfocommand.h"\r
11 #include "endiannessmacros.h"\r
12 #include "trimoligos.h"\r
13 #include "sequence.hpp"\r
14 #include "qualityscores.h"\r
15 \r
16 //**********************************************************************************************************************\r
17 vector<string> SffInfoCommand::setParameters(){ \r
18         try {           \r
19                 CommandParameter psff("sff", "InputTypes", "", "", "none", "none", "none","",false,false,true); parameters.push_back(psff);\r
20         CommandParameter poligos("oligos", "InputTypes", "", "", "oligosGroup", "none", "none","",false,false); parameters.push_back(poligos);\r
21         CommandParameter pgroup("group", "InputTypes", "", "", "oligosGroup", "none", "none","",false,false); parameters.push_back(pgroup);\r
22                 CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(paccnos);\r
23                 CommandParameter psfftxt("sfftxt", "String", "", "", "", "", "","",false,false); parameters.push_back(psfftxt);\r
24                 CommandParameter pflow("flow", "Boolean", "", "T", "", "", "","flow",false,false); parameters.push_back(pflow);\r
25                 CommandParameter ptrim("trim", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(ptrim);\r
26                 CommandParameter pfasta("fasta", "Boolean", "", "T", "", "", "","fasta",false,false); parameters.push_back(pfasta);\r
27                 CommandParameter pqfile("qfile", "Boolean", "", "T", "", "", "","qfile",false,false); parameters.push_back(pqfile);\r
28         CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ppdiffs);\r
29                 CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pbdiffs);\r
30         CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pldiffs);\r
31                 CommandParameter psdiffs("sdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(psdiffs);\r
32         CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ptdiffs);\r
33                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);\r
34                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);\r
35                 \r
36                 vector<string> myArray;\r
37                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }\r
38                 return myArray;\r
39         }\r
40         catch(exception& e) {\r
41                 m->errorOut(e, "SffInfoCommand", "setParameters");\r
42                 exit(1);\r
43         }\r
44 }\r
45 //**********************************************************************************************************************\r
46 string SffInfoCommand::getHelpString(){ \r
47         try {\r
48                 string helpString = "";\r
49                 helpString += "The sffinfo command reads a sff file and extracts the sequence data, or you can use it to parse a sfftxt file.\n";\r
50                 helpString += "The sffinfo command parameters are sff, fasta, qfile, accnos, flow, sfftxt, oligos, group, bdiffs, tdiffs, ldiffs, sdiffs, pdiffs and trim. sff is required. \n";\r
51                 helpString += "The sff parameter allows you to enter the sff file you would like to extract data from.  You may enter multiple files by separating them by -'s.\n";\r
52                 helpString += "The fasta parameter allows you to indicate if you would like a fasta formatted file generated.  Default=True. \n";\r
53                 helpString += "The qfile parameter allows you to indicate if you would like a quality file generated.  Default=True. \n";\r
54         helpString += "The oligos parameter allows you to provide an oligos file to split your sff file into separate sff files by barcode. \n";\r
55         helpString += "The group parameter allows you to provide a group file to split your sff file into separate sff files by group. \n";\r
56         helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n";\r
57                 helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";\r
58                 helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";\r
59         helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n";\r
60                 helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n";\r
61                 helpString += "The flow parameter allows you to indicate if you would like a flowgram file generated.  Default=True. \n";\r
62                 helpString += "The sfftxt parameter allows you to indicate if you would like a sff.txt file generated.  Default=False. \n";\r
63                 helpString += "If you want to parse an existing sfftxt file into flow, fasta and quality file, enter the file name using the sfftxt parameter. \n";\r
64                 helpString += "The trim parameter allows you to indicate if you would like a sequences and quality scores trimmed to the clipQualLeft and clipQualRight values.  Default=True. \n";\r
65                 helpString += "The accnos parameter allows you to provide a accnos file containing the names of the sequences you would like extracted. You may enter multiple files by separating them by -'s. \n";\r
66                 helpString += "Example sffinfo(sff=mySffFile.sff, trim=F).\n";\r
67                 helpString += "Note: No spaces between parameter labels (i.e. sff), '=' and parameters (i.e.yourSffFileName).\n";\r
68                 return helpString;\r
69         }\r
70         catch(exception& e) {\r
71                 m->errorOut(e, "SffInfoCommand", "getHelpString");\r
72                 exit(1);\r
73         }\r
74 }\r
75 \r
76 //**********************************************************************************************************************\r
77 string SffInfoCommand::getOutputPattern(string type) {\r
78     try {\r
79         string pattern = "";\r
80         \r
81         if (type == "fasta")            {   pattern =  "[filename],fasta-[filename],[tag],fasta";   }\r
82         else if (type == "flow")    {   pattern =  "[filename],flow";   }\r
83         else if (type == "sfftxt")        {   pattern =  "[filename],sff.txt";   }\r
84         else if (type == "sff")        {   pattern =  "[filename],[group],sff";   }\r
85         else if (type == "qfile")       {   pattern =  "[filename],qual-[filename],[tag],qual";   }\r
86         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }\r
87         \r
88         return pattern;\r
89     }\r
90     catch(exception& e) {\r
91         m->errorOut(e, "SffInfoCommand", "getOutputPattern");\r
92         exit(1);\r
93     }\r
94 }\r
95 //**********************************************************************************************************************\r
96 SffInfoCommand::SffInfoCommand(){       \r
97         try {\r
98                 abort = true; calledHelp = true; \r
99                 setParameters();\r
100                 vector<string> tempOutNames;\r
101                 outputTypes["fasta"] = tempOutNames;\r
102                 outputTypes["flow"] = tempOutNames;\r
103                 outputTypes["sfftxt"] = tempOutNames;\r
104                 outputTypes["qfile"] = tempOutNames;\r
105         outputTypes["sff"] = tempOutNames;\r
106         }\r
107         catch(exception& e) {\r
108                 m->errorOut(e, "SffInfoCommand", "SffInfoCommand");\r
109                 exit(1);\r
110         }\r
111 }\r
112 //**********************************************************************************************************************\r
113 \r
114 SffInfoCommand::SffInfoCommand(string option)  {\r
115         try {\r
116                 abort = false; calledHelp = false;   \r
117                 hasAccnos = false; hasOligos = false; hasGroup = false;\r
118         split = 1;\r
119                 \r
120                 //allow user to run help\r
121                 if(option == "help") { help(); abort = true; calledHelp = true; }\r
122                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}\r
123                 \r
124                 else {\r
125                         //valid paramters for this command\r
126                         vector<string> myArray = setParameters();\r
127                         \r
128                         OptionParser parser(option);\r
129                         map<string, string> parameters = parser.getParameters();\r
130                         \r
131                         ValidParameters validParameter;\r
132                         //check to make sure all parameters are valid for command\r
133                         for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { \r
134                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }\r
135                         }\r
136                         \r
137                         //initialize outputTypes\r
138                         vector<string> tempOutNames;\r
139                         outputTypes["fasta"] = tempOutNames;\r
140                         outputTypes["flow"] = tempOutNames;\r
141                         outputTypes["sfftxt"] = tempOutNames;\r
142                         outputTypes["qfile"] = tempOutNames;\r
143             outputTypes["sff"] = tempOutNames;\r
144                         \r
145                         //if the user changes the output directory command factory will send this info to us in the output parameter \r
146                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }\r
147                         \r
148                         //if the user changes the input directory command factory will send this info to us in the output parameter \r
149                         string inputDir = validParameter.validFile(parameters, "inputdir", false);        if (inputDir == "not found"){ inputDir = "";          }\r
150 \r
151                         sffFilename = validParameter.validFile(parameters, "sff", false);\r
152                         if (sffFilename == "not found") { sffFilename = "";  }\r
153                         else { \r
154                                 m->splitAtDash(sffFilename, filenames);\r
155                                 \r
156                                 //go through files and make sure they are good, if not, then disregard them\r
157                                 for (int i = 0; i < filenames.size(); i++) {\r
158                                         bool ignore = false;\r
159                                         if (filenames[i] == "current") { \r
160                                                 filenames[i] = m->getSFFFile(); \r
161                                                 if (filenames[i] != "") {  m->mothurOut("Using " + filenames[i] + " as input file for the sff parameter where you had given current."); m->mothurOutEndLine(); }\r
162                                                 else {  \r
163                                                         m->mothurOut("You have no current sfffile, ignoring current."); m->mothurOutEndLine(); ignore=true; \r
164                                                         //erase from file list\r
165                                                         filenames.erase(filenames.begin()+i);\r
166                                                         i--;\r
167                                                 }\r
168                                         }\r
169                                         \r
170                                         if (!ignore) {\r
171                                                 if (inputDir != "") {\r
172                                                         string path = m->hasPath(filenames[i]);\r
173                                                         //if the user has not given a path then, add inputdir. else leave path alone.\r
174                                                         if (path == "") {       filenames[i] = inputDir + filenames[i];         }\r
175                                                 }\r
176                 \r
177                                                 ifstream in;\r
178                                                 int ableToOpen = m->openInputFile(filenames[i], in, "noerror");\r
179                                         \r
180                                                 //if you can't open it, try default location\r
181                                                 if (ableToOpen == 1) {\r
182                                                         if (m->getDefaultPath() != "") { //default path is set\r
183                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(filenames[i]);\r
184                                                                 m->mothurOut("Unable to open " + filenames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();\r
185                                                                 ifstream in2;\r
186                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");\r
187                                                                 in2.close();\r
188                                                                 filenames[i] = tryPath;\r
189                                                         }\r
190                                                 }\r
191                                                 \r
192                                                 //if you can't open it, try default location\r
193                                                 if (ableToOpen == 1) {\r
194                                                         if (m->getOutputDir() != "") { //default path is set\r
195                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(filenames[i]);\r
196                                                                 m->mothurOut("Unable to open " + filenames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();\r
197                                                                 ifstream in2;\r
198                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");\r
199                                                                 in2.close();\r
200                                                                 filenames[i] = tryPath;\r
201                                                         }\r
202                                                 }\r
203                                                 \r
204                                                 in.close();\r
205                                                 \r
206                                                 if (ableToOpen == 1) { \r
207                                                         m->mothurOut("Unable to open " + filenames[i] + ". It will be disregarded."); m->mothurOutEndLine();\r
208                                                         //erase from file list\r
209                                                         filenames.erase(filenames.begin()+i);\r
210                                                         i--;\r
211                                                 }else { m->setSFFFile(filenames[i]); }\r
212                                         }\r
213                                 }\r
214                                 \r
215                                 //make sure there is at least one valid file left\r
216                                 if (filenames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; }\r
217                         }\r
218                         \r
219                         accnosName = validParameter.validFile(parameters, "accnos", false);\r
220                         if (accnosName == "not found") { accnosName = "";  }\r
221                         else { \r
222                                 hasAccnos = true;\r
223                                 m->splitAtDash(accnosName, accnosFileNames);\r
224                                 \r
225                                 //go through files and make sure they are good, if not, then disregard them\r
226                                 for (int i = 0; i < accnosFileNames.size(); i++) {\r
227                                         bool ignore = false;\r
228                                         if (accnosFileNames[i] == "current") { \r
229                                                 accnosFileNames[i] = m->getAccnosFile(); \r
230                                                 if (accnosFileNames[i] != "") {  m->mothurOut("Using " + accnosFileNames[i] + " as input file for the accnos parameter where you had given current."); m->mothurOutEndLine(); }\r
231                                                 else {  \r
232                                                         m->mothurOut("You have no current accnosfile, ignoring current."); m->mothurOutEndLine(); ignore=true; \r
233                                                         //erase from file list\r
234                                                         accnosFileNames.erase(accnosFileNames.begin()+i);\r
235                                                         i--;\r
236                                                 }\r
237                                         }\r
238                                         \r
239                                         if (!ignore) {\r
240                                         \r
241                                                 if (inputDir != "") {\r
242                                                         string path = m->hasPath(accnosFileNames[i]);\r
243                                                         //if the user has not given a path then, add inputdir. else leave path alone.\r
244                                                         if (path == "") {       accnosFileNames[i] = inputDir + accnosFileNames[i];             }\r
245                                                 }\r
246                 \r
247                                                 ifstream in;\r
248                                                 int ableToOpen = m->openInputFile(accnosFileNames[i], in, "noerror");\r
249                                         \r
250                                                 //if you can't open it, try default location\r
251                                                 if (ableToOpen == 1) {\r
252                                                         if (m->getDefaultPath() != "") { //default path is set\r
253                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(accnosFileNames[i]);\r
254                                                                 m->mothurOut("Unable to open " + accnosFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();\r
255                                                                 ifstream in2;\r
256                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");\r
257                                                                 in2.close();\r
258                                                                 accnosFileNames[i] = tryPath;\r
259                                                         }\r
260                                                 }\r
261                                                 //if you can't open it, try default location\r
262                                                 if (ableToOpen == 1) {\r
263                                                         if (m->getOutputDir() != "") { //default path is set\r
264                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(accnosFileNames[i]);\r
265                                                                 m->mothurOut("Unable to open " + accnosFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();\r
266                                                                 ifstream in2;\r
267                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");\r
268                                                                 in2.close();\r
269                                                                 accnosFileNames[i] = tryPath;\r
270                                                         }\r
271                                                 }\r
272                                                 in.close();\r
273                                                 \r
274                                                 if (ableToOpen == 1) { \r
275                                                         m->mothurOut("Unable to open " + accnosFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();\r
276                                                         //erase from file list\r
277                                                         accnosFileNames.erase(accnosFileNames.begin()+i);\r
278                                                         i--;\r
279                                                 }\r
280                                         }\r
281                                 }\r
282                                 \r
283                                 //make sure there is at least one valid file left\r
284                                 if (accnosFileNames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; }\r
285                         }\r
286             \r
287             oligosfile = validParameter.validFile(parameters, "oligos", false);\r
288                         if (oligosfile == "not found") { oligosfile = "";  }\r
289                         else { \r
290                                 hasOligos = true;\r
291                                 m->splitAtDash(oligosfile, oligosFileNames);\r
292                                 \r
293                                 //go through files and make sure they are good, if not, then disregard them\r
294                                 for (int i = 0; i < oligosFileNames.size(); i++) {\r
295                                         bool ignore = false;\r
296                                         if (oligosFileNames[i] == "current") { \r
297                                                 oligosFileNames[i] = m->getOligosFile(); \r
298                                                 if (oligosFileNames[i] != "") {  m->mothurOut("Using " + oligosFileNames[i] + " as input file for the oligos parameter where you had given current."); m->mothurOutEndLine(); }\r
299                                                 else {  \r
300                                                         m->mothurOut("You have no current oligosfile, ignoring current."); m->mothurOutEndLine(); ignore=true; \r
301                                                         //erase from file list\r
302                                                         oligosFileNames.erase(oligosFileNames.begin()+i);\r
303                                                         i--;\r
304                                                 }\r
305                                         }\r
306                                         \r
307                                         if (!ignore) {\r
308                         \r
309                                                 if (inputDir != "") {\r
310                                                         string path = m->hasPath(oligosFileNames[i]);\r
311                                                         //if the user has not given a path then, add inputdir. else leave path alone.\r
312                                                         if (path == "") {       oligosFileNames[i] = inputDir + oligosFileNames[i];             }\r
313                                                 }\r
314                         \r
315                                                 ifstream in;\r
316                                                 int ableToOpen = m->openInputFile(oligosFileNames[i], in, "noerror");\r
317                         \r
318                                                 //if you can't open it, try default location\r
319                                                 if (ableToOpen == 1) {\r
320                                                         if (m->getDefaultPath() != "") { //default path is set\r
321                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(oligosFileNames[i]);\r
322                                                                 m->mothurOut("Unable to open " + oligosFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();\r
323                                                                 ifstream in2;\r
324                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");\r
325                                                                 in2.close();\r
326                                                                 oligosFileNames[i] = tryPath;\r
327                                                         }\r
328                                                 }\r
329                                                 //if you can't open it, try default location\r
330                                                 if (ableToOpen == 1) {\r
331                                                         if (m->getOutputDir() != "") { //default path is set\r
332                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(oligosFileNames[i]);\r
333                                                                 m->mothurOut("Unable to open " + oligosFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();\r
334                                                                 ifstream in2;\r
335                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");\r
336                                                                 in2.close();\r
337                                                                 oligosFileNames[i] = tryPath;\r
338                                                         }\r
339                                                 }\r
340                                                 in.close();\r
341                                                 \r
342                                                 if (ableToOpen == 1) { \r
343                                                         m->mothurOut("Unable to open " + oligosFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();\r
344                                                         //erase from file list\r
345                                                         oligosFileNames.erase(oligosFileNames.begin()+i);\r
346                                                         i--;\r
347                                                 }\r
348                                         }\r
349                                 }\r
350                                 \r
351                                 //make sure there is at least one valid file left\r
352                                 if (oligosFileNames.size() == 0) { m->mothurOut("no valid oligos files."); m->mothurOutEndLine(); abort = true; }\r
353                         }\r
354             \r
355             groupfile = validParameter.validFile(parameters, "group", false);\r
356                         if (groupfile == "not found") { groupfile = "";  }\r
357                         else {\r
358                                 hasGroup = true;\r
359                                 m->splitAtDash(groupfile, groupFileNames);\r
360                                 \r
361                                 //go through files and make sure they are good, if not, then disregard them\r
362                                 for (int i = 0; i < groupFileNames.size(); i++) {\r
363                                         bool ignore = false;\r
364                                         if (groupFileNames[i] == "current") {\r
365                                                 groupFileNames[i] = m->getGroupFile();\r
366                                                 if (groupFileNames[i] != "") {  m->mothurOut("Using " + groupFileNames[i] + " as input file for the group parameter where you had given current."); m->mothurOutEndLine(); }\r
367                                                 else {\r
368                                                         m->mothurOut("You have no current group file, ignoring current."); m->mothurOutEndLine(); ignore=true;\r
369                                                         //erase from file list\r
370                                                         groupFileNames.erase(groupFileNames.begin()+i);\r
371                                                         i--;\r
372                                                 }\r
373                                         }\r
374                                         \r
375                                         if (!ignore) {\r
376                         \r
377                                                 if (inputDir != "") {\r
378                                                         string path = m->hasPath(groupFileNames[i]);\r
379                                                         //if the user has not given a path then, add inputdir. else leave path alone.\r
380                                                         if (path == "") {       groupFileNames[i] = inputDir + groupFileNames[i];               }\r
381                                                 }\r
382                         \r
383                                                 ifstream in;\r
384                                                 int ableToOpen = m->openInputFile(groupFileNames[i], in, "noerror");\r
385                         \r
386                                                 //if you can't open it, try default location\r
387                                                 if (ableToOpen == 1) {\r
388                                                         if (m->getDefaultPath() != "") { //default path is set\r
389                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(groupFileNames[i]);\r
390                                                                 m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();\r
391                                                                 ifstream in2;\r
392                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");\r
393                                                                 in2.close();\r
394                                                                 groupFileNames[i] = tryPath;\r
395                                                         }\r
396                                                 }\r
397                                                 //if you can't open it, try default location\r
398                                                 if (ableToOpen == 1) {\r
399                                                         if (m->getOutputDir() != "") { //default path is set\r
400                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(groupFileNames[i]);\r
401                                                                 m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();\r
402                                                                 ifstream in2;\r
403                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");\r
404                                                                 in2.close();\r
405                                                                 groupFileNames[i] = tryPath;\r
406                                                         }\r
407                                                 }\r
408                                                 in.close();\r
409                                                 \r
410                                                 if (ableToOpen == 1) {\r
411                                                         m->mothurOut("Unable to open " + groupFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();\r
412                                                         //erase from file list\r
413                                                         groupFileNames.erase(groupFileNames.begin()+i);\r
414                                                         i--;\r
415                                                 }\r
416                                         }\r
417                                 }\r
418                                 \r
419                                 //make sure there is at least one valid file left\r
420                                 if (groupFileNames.size() == 0) { m->mothurOut("no valid group files."); m->mothurOutEndLine(); abort = true; }\r
421                         }\r
422 \r
423                         if (hasGroup) {\r
424                 split = 2;\r
425                                 if (groupFileNames.size() != filenames.size()) { abort = true; m->mothurOut("If you provide a group file, you must have one for each sff file."); m->mothurOutEndLine(); }\r
426                         }\r
427             \r
428             if (hasOligos) {\r
429                 split = 2;\r
430                                 if (oligosFileNames.size() != filenames.size()) { abort = true; m->mothurOut("If you provide an oligos file, you must have one for each sff file."); m->mothurOutEndLine(); }\r
431                         }\r
432             \r
433             if (hasGroup && hasOligos) { m->mothurOut("You must enter ONLY ONE of the following: oligos or group."); m->mothurOutEndLine(); abort = true;}\r
434             \r
435                         if (hasAccnos) {\r
436                                 if (accnosFileNames.size() != filenames.size()) { abort = true; m->mothurOut("If you provide a accnos file, you must have one for each sff file."); m->mothurOutEndLine(); }\r
437                         }\r
438                         \r
439                         string temp = validParameter.validFile(parameters, "qfile", false);                     if (temp == "not found"){       temp = "T";                             }\r
440                         qual = m->isTrue(temp); \r
441                         \r
442                         temp = validParameter.validFile(parameters, "fasta", false);                            if (temp == "not found"){       temp = "T";                             }\r
443                         fasta = m->isTrue(temp); \r
444                         \r
445                         temp = validParameter.validFile(parameters, "flow", false);                                     if (temp == "not found"){       temp = "T";                             }\r
446                         flow = m->isTrue(temp); \r
447                         \r
448                         temp = validParameter.validFile(parameters, "trim", false);                                     if (temp == "not found"){       temp = "T";                             }\r
449                         trim = m->isTrue(temp); \r
450             \r
451             temp = validParameter.validFile(parameters, "bdiffs", false);               if (temp == "not found") { temp = "0"; }\r
452                         m->mothurConvert(temp, bdiffs);\r
453                         \r
454                         temp = validParameter.validFile(parameters, "pdiffs", false);           if (temp == "not found") { temp = "0"; }\r
455                         m->mothurConvert(temp, pdiffs);\r
456             \r
457             temp = validParameter.validFile(parameters, "ldiffs", false);               if (temp == "not found") { temp = "0"; }\r
458                         m->mothurConvert(temp, ldiffs);\r
459             \r
460             temp = validParameter.validFile(parameters, "sdiffs", false);               if (temp == "not found") { temp = "0"; }\r
461                         m->mothurConvert(temp, sdiffs);\r
462                         \r
463                         temp = validParameter.validFile(parameters, "tdiffs", false);           if (temp == "not found") { int tempTotal = pdiffs + bdiffs + ldiffs + sdiffs;  temp = toString(tempTotal); }\r
464                         m->mothurConvert(temp, tdiffs);\r
465                         \r
466                         if(tdiffs == 0){        tdiffs = bdiffs + pdiffs + ldiffs + sdiffs;     }\r
467             \r
468                         temp = validParameter.validFile(parameters, "sfftxt", false);                           \r
469                         if (temp == "not found")        {       temp = "F";      sfftxt = false; sfftxtFilename = "";           }\r
470                         else if (m->isTrue(temp))       {       sfftxt = true;          sfftxtFilename = "";                            }\r
471                         else {\r
472                                 //you are a filename\r
473                                 if (inputDir != "") {\r
474                                         map<string,string>::iterator it = parameters.find("sfftxt");\r
475                                         //user has given a template file\r
476                                         if(it != parameters.end()){ \r
477                                                 string path = m->hasPath(it->second);\r
478                                                 //if the user has not given a path then, add inputdir. else leave path alone.\r
479                                                 if (path == "") {       parameters["sfftxt"] = inputDir + it->second;           }\r
480                                         }\r
481                                 }\r
482                                 \r
483                                 sfftxtFilename = validParameter.validFile(parameters, "sfftxt", true);\r
484                                 if (sfftxtFilename == "not found") { sfftxtFilename = "";  }\r
485                                 else if (sfftxtFilename == "not open") { sfftxtFilename = "";  }\r
486                         }\r
487                         \r
488                         if ((sfftxtFilename == "") && (filenames.size() == 0)) {  \r
489                                 //if there is a current sff file, use it\r
490                                 string filename = m->getSFFFile(); \r
491                                 if (filename != "") { filenames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the sff parameter."); m->mothurOutEndLine(); }\r
492                                 else {  m->mothurOut("[ERROR]: you must provide a valid sff or sfftxt file."); m->mothurOutEndLine(); abort=true;  }\r
493                         }\r
494             \r
495             \r
496                 }\r
497         }\r
498         catch(exception& e) {\r
499                 m->errorOut(e, "SffInfoCommand", "SffInfoCommand");\r
500                 exit(1);\r
501         }\r
502 }\r
503 //**********************************************************************************************************************\r
504 int SffInfoCommand::execute(){\r
505         try {\r
506                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }\r
507                 \r
508                 for (int s = 0; s < filenames.size(); s++) {\r
509                         \r
510                         if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]);        } return 0; }\r
511                         \r
512                         int start = time(NULL);\r
513                         \r
514             filenames[s] = m->getFullPathName(filenames[s]);\r
515                         m->mothurOut("Extracting info from " + filenames[s] + " ..." ); m->mothurOutEndLine();\r
516                         \r
517                         string accnos = "";\r
518                         if (hasAccnos) { accnos = accnosFileNames[s]; }\r
519             \r
520             string oligos = "";\r
521             if (hasOligos) { oligos = oligosFileNames[s]; }\r
522             if (hasGroup) { oligos = groupFileNames[s]; }\r
523             \r
524                         int numReads = extractSffInfo(filenames[s], accnos, oligos);\r
525 \r
526                         m->mothurOut("It took " + toString(time(NULL) - start) + " secs to extract " + toString(numReads) + ".");\r
527                 }\r
528                 \r
529                 if (sfftxtFilename != "") {  parseSffTxt(); }\r
530                 \r
531                 if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]);        } return 0; }\r
532                 \r
533                 //set fasta file as new current fastafile\r
534                 string current = "";\r
535                 itTypes = outputTypes.find("fasta");\r
536                 if (itTypes != outputTypes.end()) {\r
537                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }\r
538                 }\r
539                 \r
540                 itTypes = outputTypes.find("qfile");\r
541                 if (itTypes != outputTypes.end()) {\r
542                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); }\r
543                 }\r
544                 \r
545                 itTypes = outputTypes.find("flow");\r
546                 if (itTypes != outputTypes.end()) {\r
547                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFlowFile(current); }\r
548                 }\r
549                 \r
550                 //report output filenames\r
551                 m->mothurOutEndLine();\r
552                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();\r
553                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }\r
554                 m->mothurOutEndLine();\r
555 \r
556                 return 0;\r
557         }\r
558         catch(exception& e) {\r
559                 m->errorOut(e, "SffInfoCommand", "execute");\r
560                 exit(1);\r
561         }\r
562 }\r
563 //**********************************************************************************************************************\r
564 int SffInfoCommand::extractSffInfo(string input, string accnos, string oligos){\r
565         try {\r
566                 currentFileName = input;\r
567                 if (outputDir == "") {  outputDir += m->hasPath(input); }\r
568                 \r
569                 if (accnos != "")       {  readAccnosFile(accnos);  }\r
570                 else                            {       seqNames.clear();               }\r
571         \r
572         if (hasOligos)   {   readOligos(oligos);    split = 2;      }\r
573         if (hasGroup)    {   readGroup(oligos);     split = 2;      }\r
574         \r
575                 ofstream outSfftxt, outFasta, outQual, outFlow;\r
576                 string outFastaFileName, outQualFileName;\r
577         string rootName = outputDir + m->getRootName(m->getSimpleName(input));\r
578         if(rootName.find_last_of(".") == rootName.npos){ rootName += "."; }\r
579         \r
580         map<string, string> variables; \r
581                 variables["[filename]"] = rootName;\r
582                 string sfftxtFileName = getOutputFileName("sfftxt",variables);\r
583                 string outFlowFileName = getOutputFileName("flow",variables);\r
584                 if (!trim) { variables["[tag]"] = "raw"; }\r
585                 outFastaFileName = getOutputFileName("fasta",variables);\r
586         outQualFileName = getOutputFileName("qfile",variables);\r
587         \r
588                 if (sfftxt) { m->openOutputFile(sfftxtFileName, outSfftxt); outSfftxt.setf(ios::fixed, ios::floatfield); outSfftxt.setf(ios::showpoint);  outputNames.push_back(sfftxtFileName);  outputTypes["sfftxt"].push_back(sfftxtFileName); }\r
589                 if (fasta)      { m->openOutputFile(outFastaFileName, outFasta);        outputNames.push_back(outFastaFileName); outputTypes["fasta"].push_back(outFastaFileName); }\r
590                 if (qual)       { m->openOutputFile(outQualFileName, outQual);          outputNames.push_back(outQualFileName); outputTypes["qfile"].push_back(outQualFileName);  }\r
591                 if (flow)       { m->openOutputFile(outFlowFileName, outFlow);          outputNames.push_back(outFlowFileName);  outFlow.setf(ios::fixed, ios::floatfield); outFlow.setf(ios::showpoint); outputTypes["flow"].push_back(outFlowFileName);  }\r
592                 \r
593                 ifstream in;\r
594                 m->openInputFileBinary(input, in);\r
595                 \r
596                 CommonHeader header;\r
597                 readCommonHeader(in, header);\r
598         \r
599                 int count = 0;\r
600                 \r
601                 //check magic number and version\r
602                 if (header.magicNumber != 779314790) { m->mothurOut("Magic Number is not correct, not a valid .sff file"); m->mothurOutEndLine(); return count; }\r
603                 if (header.version != "0001") { m->mothurOut("Version is not supported, only support version 0001."); m->mothurOutEndLine(); return count; }\r
604         \r
605                 //print common header\r
606                 if (sfftxt) {   printCommonHeader(outSfftxt, header);           }\r
607                 if (flow)       {       outFlow << header.numFlowsPerRead << endl;      }\r
608                 \r
609                 //read through the sff file\r
610                 while (!in.eof()) {\r
611                         \r
612                         bool print = true;\r
613                                                 \r
614                         //read data\r
615                         seqRead read;  Header readheader;\r
616             readSeqData(in, read, header.numFlowsPerRead, readheader);\r
617             \r
618             bool okay = sanityCheck(readheader, read);\r
619             if (!okay) { break; }\r
620             \r
621                         //if you have provided an accosfile and this seq is not in it, then dont print\r
622                         if (seqNames.size() != 0) {   if (seqNames.count(readheader.name) == 0) { print = false; }  }\r
623                         \r
624                         //print \r
625                         if (print) {\r
626                                 if (sfftxt) { printHeader(outSfftxt, readheader); printSffTxtSeqData(outSfftxt, read, readheader); }\r
627                                 if (fasta)      {       printFastaSeqData(outFasta, read, readheader);  }\r
628                                 if (qual)       {       printQualSeqData(outQual, read, readheader);    }\r
629                                 if (flow)       {       printFlowSeqData(outFlow, read, readheader);    }\r
630                         }\r
631                         \r
632                         count++;\r
633             \r
634                         //report progress\r
635                         if((count+1) % 10000 == 0){     m->mothurOut(toString(count+1)); m->mothurOutEndLine();         }\r
636                 \r
637                         if (m->control_pressed) { count = 0; break;   }\r
638                         \r
639                         if (count >= header.numReads) { break; }\r
640                 }\r
641                 \r
642                 //report progress\r
643                 if (!m->control_pressed) {   if((count) % 10000 != 0){  m->mothurOut(toString(count)); m->mothurOutEndLine();           }  }\r
644                 \r
645                 in.close();\r
646                 \r
647                 if (sfftxt) {  outSfftxt.close();       }\r
648                 if (fasta)      {  outFasta.close();    }\r
649                 if (qual)       {  outQual.close();             }\r
650                 if (flow)       {  outFlow.close();             }\r
651                 \r
652         if (split > 1) {\r
653             //create new common headers for each file with the correct number of reads\r
654             adjustCommonHeader(header);\r
655             \r
656             if (hasGroup) { delete groupMap; }\r
657             \r
658             //cout << "here" << endl;\r
659                         map<string, string>::iterator it;\r
660                         set<string> namesToRemove;\r
661                         for(int i=0;i<filehandles.size();i++){\r
662                                 for(int j=0;j<filehandles[0].size();j++){\r
663                     //cout << i << '\t' << '\t' << j  << '\t' << filehandles[i][j] << endl;\r
664                                         if (filehandles[i][j] != "") {\r
665                                                 if (namesToRemove.count(filehandles[i][j]) == 0) {\r
666                                                         if(m->isBlank(filehandles[i][j])){\r
667                                 //cout << i << '\t' << '\t' << j  << '\t' << filehandles[i][j] << " is blank removing" << endl;\r
668                                                                 m->mothurRemove(filehandles[i][j]);\r
669                                 m->mothurRemove(filehandlesHeaders[i][j]);\r
670                                                                 namesToRemove.insert(filehandles[i][j]);\r
671                             }\r
672                                                 }\r
673                                         }\r
674                                 }\r
675                         }\r
676             //cout << "here2" << endl;\r
677             //append new header to reads\r
678             for (int i = 0; i < filehandles.size(); i++) {\r
679                 for (int j = 0; j < filehandles[i].size(); j++) {\r
680                     m->appendBinaryFiles(filehandles[i][j], filehandlesHeaders[i][j]);\r
681                     m->renameFile(filehandlesHeaders[i][j], filehandles[i][j]);\r
682                     m->mothurRemove(filehandlesHeaders[i][j]);\r
683                     //cout << i << '\t' << '\t' << j  << '\t' << filehandles[i][j] << " done appending headers and removing " << filehandlesHeaders[i][j] << endl;\r
684                     if (numSplitReads[i][j] == 0) { m->mothurRemove(filehandles[i][j]); }\r
685                 }\r
686             }\r
687                         //cout << "here3" << endl;\r
688                         //remove names for outputFileNames, just cleans up the output\r
689                         for(int i = 0; i < outputNames.size(); i++) { \r
690                 if (namesToRemove.count(outputNames[i]) != 0) {\r
691                     //cout << "erasing " << i << '\t' << outputNames[i] << endl;\r
692                     outputNames.erase(outputNames.begin()+i);\r
693                     i--;\r
694                 } \r
695             }\r
696             //cout << "here4" << endl;\r
697             if(m->isBlank(noMatchFile)){  m->mothurRemove(noMatchFile); }\r
698             else { outputNames.push_back(noMatchFile); outputTypes["sff"].push_back(noMatchFile); }\r
699         }\r
700         \r
701                 return count;\r
702         }\r
703         catch(exception& e) {\r
704                 m->errorOut(e, "SffInfoCommand", "extractSffInfo");\r
705                 exit(1);\r
706         }\r
707 }\r
708 //**********************************************************************************************************************\r
709 int SffInfoCommand::readCommonHeader(ifstream& in, CommonHeader& header){\r
710         try {\r
711         \r
712                 if (!in.eof()) {\r
713 \r
714                         //read magic number\r
715                         char buffer[4];\r
716                         in.read(buffer, 4);\r
717                         header.magicNumber = be_int4(*(unsigned int *)(&buffer));\r
718             \r
719                         //read version\r
720                         char buffer9[4];\r
721                         in.read(buffer9, 4);\r
722                         header.version = "";\r
723                         for (int i = 0; i < 4; i++) {  header.version += toString((int)(buffer9[i]));  }\r
724     \r
725                         //read offset\r
726                         char buffer2 [8];\r
727                         in.read(buffer2, 8);\r
728                         header.indexOffset =  be_int8(*(unsigned long long *)(&buffer2));\r
729                         \r
730                         //read index length\r
731                         char buffer3 [4];\r
732                         in.read(buffer3, 4);\r
733                         header.indexLength =  be_int4(*(unsigned int *)(&buffer3));\r
734             \r
735                         //read num reads\r
736                         char buffer4 [4];\r
737                         in.read(buffer4, 4);\r
738                         header.numReads =  be_int4(*(unsigned int *)(&buffer4));\r
739             \r
740             if (m->debug) { m->mothurOut("[DEBUG]: numReads = " + toString(header.numReads) + "\n"); }\r
741                                 \r
742                         //read header length\r
743                         char buffer5 [2];\r
744                         in.read(buffer5, 2);\r
745                         header.headerLength =  be_int2(*(unsigned short *)(&buffer5));\r
746                                         \r
747                         //read key length\r
748                         char buffer6 [2];\r
749                         in.read(buffer6, 2);\r
750                         header.keyLength = be_int2(*(unsigned short *)(&buffer6));\r
751                         \r
752                         //read number of flow reads\r
753                         char buffer7 [2];\r
754                         in.read(buffer7, 2);\r
755                         header.numFlowsPerRead =  be_int2(*(unsigned short *)(&buffer7));\r
756                                 \r
757                         //read format code\r
758                         char buffer8 [1];\r
759                         in.read(buffer8, 1);\r
760                         header.flogramFormatCode = (int)(buffer8[0]);\r
761                         \r
762                         //read flow chars\r
763                         char* tempBuffer = new char[header.numFlowsPerRead];\r
764                         in.read(&(*tempBuffer), header.numFlowsPerRead); \r
765                         header.flowChars = tempBuffer;\r
766                         if (header.flowChars.length() > header.numFlowsPerRead) { header.flowChars = header.flowChars.substr(0, header.numFlowsPerRead);  }\r
767                         delete[] tempBuffer;\r
768                         \r
769                         //read key\r
770                         char* tempBuffer2 = new char[header.keyLength];\r
771                         in.read(&(*tempBuffer2), header.keyLength);\r
772                         header.keySequence = tempBuffer2;\r
773                         if (header.keySequence.length() > header.keyLength) { header.keySequence = header.keySequence.substr(0, header.keyLength);  }\r
774                         delete[] tempBuffer2;\r
775                         \r
776                         /* Pad to 8 chars */\r
777                         unsigned long long spotInFile = in.tellg();\r
778                         unsigned long long spot = (spotInFile + 7)& ~7;  // ~ inverts\r
779                         in.seekg(spot);\r
780             \r
781         }else{\r
782                         m->mothurOut("Error reading sff common header."); m->mothurOutEndLine();\r
783                 }\r
784         \r
785                 return 0;\r
786         \r
787         }\r
788         catch(exception& e) {\r
789                 m->errorOut(e, "SffInfoCommand", "readCommonHeader");\r
790                 exit(1);\r
791         }\r
792 }\r
793 //**********************************************************************************************************************\r
794 int SffInfoCommand::adjustCommonHeader(CommonHeader header){\r
795         try {\r
796         string endian = m->findEdianness();\r
797         char* mybuffer = new char[4];\r
798         ifstream in;\r
799         m->openInputFileBinary(currentFileName, in);\r
800         \r
801         ofstream outNoMatchHeader;\r
802         string tempNoHeader = "tempNoMatchHeader";\r
803         m->openOutputFileBinary(tempNoHeader, outNoMatchHeader);\r
804         \r
805         //magic number\r
806         in.read(mybuffer,4);\r
807         for (int i = 0; i < filehandlesHeaders.size(); i++) {  \r
808             for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
809                 ofstream out;\r
810                 m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);\r
811                 out.write(mybuffer, in.gcount());\r
812                 out.close();\r
813             }\r
814         }\r
815         outNoMatchHeader.write(mybuffer, in.gcount());\r
816         delete[] mybuffer;\r
817         \r
818         //version\r
819         mybuffer = new char[4];\r
820         in.read(mybuffer,4);\r
821         for (int i = 0; i < filehandlesHeaders.size(); i++) {  \r
822             for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
823                 ofstream out;\r
824                 m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);\r
825                 out.write(mybuffer, in.gcount());\r
826                 out.close();\r
827             }\r
828         }\r
829         outNoMatchHeader.write(mybuffer, in.gcount());\r
830         delete[] mybuffer;\r
831         \r
832         //offset\r
833         mybuffer = new char[8];\r
834         in.read(mybuffer,8);\r
835         unsigned long long offset = 0;\r
836         char* thisbuffer = new char[8];\r
837         thisbuffer[0] = (offset >> 56) & 0xFF;\r
838         thisbuffer[1] = (offset >> 48) & 0xFF;\r
839         thisbuffer[2] = (offset >> 40) & 0xFF;\r
840         thisbuffer[3] = (offset >> 32) & 0xFF;\r
841         thisbuffer[4] = (offset >> 24) & 0xFF;\r
842         thisbuffer[5] = (offset >> 16) & 0xFF;\r
843         thisbuffer[6] = (offset >> 8) & 0xFF;\r
844         thisbuffer[7] = offset & 0xFF;\r
845         for (int i = 0; i < filehandlesHeaders.size(); i++) {\r
846             for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
847                 ofstream out;\r
848                 m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);\r
849                 out.write(thisbuffer, 8);\r
850                 out.close();\r
851             }\r
852         }\r
853         outNoMatchHeader.write(thisbuffer, 8);\r
854         delete[] thisbuffer;\r
855         delete[] mybuffer;\r
856             \r
857                         \r
858         //read index length\r
859                 mybuffer = new char[4];\r
860         in.read(mybuffer,4);\r
861         offset = 0;\r
862         char* thisbuffer2 = new char[4];\r
863         thisbuffer2[0] = (offset >> 24) & 0xFF;\r
864         thisbuffer2[1] = (offset >> 16) & 0xFF;\r
865         thisbuffer2[2] = (offset >> 8) & 0xFF;\r
866         thisbuffer2[3] = offset & 0xFF;\r
867         for (int i = 0; i < filehandlesHeaders.size(); i++) {\r
868             for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
869                 ofstream out;\r
870                 m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);\r
871                 out.write(thisbuffer2, 4);\r
872                 out.close();\r
873             }\r
874         }\r
875         outNoMatchHeader.write(thisbuffer2, 4);\r
876         delete[] thisbuffer2;\r
877         delete[] mybuffer;\r
878                 \r
879         //change num reads\r
880         mybuffer = new char[4];\r
881         in.read(mybuffer,4);\r
882         delete[] mybuffer;\r
883         for (int i = 0; i < filehandlesHeaders.size(); i++) {  \r
884             for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
885                 char* thisbuffer = new char[4];\r
886                 if (endian == "BIG_ENDIAN") {\r
887                     thisbuffer[0] = (numSplitReads[i][j] >> 24) & 0xFF;\r
888                     thisbuffer[1] = (numSplitReads[i][j] >> 16) & 0xFF;\r
889                     thisbuffer[2] = (numSplitReads[i][j] >> 8) & 0xFF;\r
890                     thisbuffer[3] = numSplitReads[i][j] & 0xFF;\r
891                 }else {\r
892                     thisbuffer[0] = numSplitReads[i][j] & 0xFF;\r
893                     thisbuffer[1] = (numSplitReads[i][j] >> 8) & 0xFF;\r
894                     thisbuffer[2] = (numSplitReads[i][j] >> 16) & 0xFF;\r
895                     thisbuffer[3] = (numSplitReads[i][j] >> 24) & 0xFF;\r
896                  }\r
897                 ofstream out;\r
898                 m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);\r
899                 out.write(thisbuffer, 4);\r
900                 out.close();\r
901                 delete[] thisbuffer;\r
902             }\r
903         }\r
904         char* thisbuffer3 = new char[4];\r
905         if (endian == "BIG_ENDIAN") {\r
906             thisbuffer3[0] = (numNoMatch >> 24) & 0xFF;\r
907             thisbuffer3[1] = (numNoMatch >> 16) & 0xFF;\r
908             thisbuffer3[2] = (numNoMatch >> 8) & 0xFF;\r
909             thisbuffer3[3] = numNoMatch & 0xFF;\r
910         }else {\r
911             thisbuffer3[0] = numNoMatch & 0xFF;\r
912             thisbuffer3[1] = (numNoMatch >> 8) & 0xFF;\r
913             thisbuffer3[2] = (numNoMatch >> 16) & 0xFF;\r
914             thisbuffer3[3] = (numNoMatch >> 24) & 0xFF;\r
915         }\r
916         outNoMatchHeader.write(thisbuffer3, 4);\r
917         delete[] thisbuffer3;\r
918         \r
919         \r
920         //read header length\r
921         mybuffer = new char[2];\r
922         in.read(mybuffer,2);\r
923         for (int i = 0; i < filehandlesHeaders.size(); i++) {  \r
924             for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
925                 ofstream out;\r
926                 m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);\r
927                 out.write(mybuffer, in.gcount());\r
928                 out.close();\r
929             }\r
930         }\r
931         outNoMatchHeader.write(mybuffer, in.gcount());\r
932         delete[] mybuffer;\r
933             \r
934         //read key length\r
935         mybuffer = new char[2];\r
936         in.read(mybuffer,2);\r
937         for (int i = 0; i < filehandlesHeaders.size(); i++) {  \r
938             for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
939                 ofstream out;\r
940                 m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);\r
941                 out.write(mybuffer, in.gcount());\r
942                 out.close();\r
943             }\r
944         }\r
945         outNoMatchHeader.write(mybuffer, in.gcount());\r
946         delete[] mybuffer;\r
947                         \r
948         //read number of flow reads\r
949         mybuffer = new char[2];\r
950         in.read(mybuffer,2);\r
951         for (int i = 0; i < filehandlesHeaders.size(); i++) {  \r
952             for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
953                 ofstream out;\r
954                 m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);\r
955                 out.write(mybuffer, in.gcount());\r
956                 out.close();\r
957             }\r
958         }\r
959         outNoMatchHeader.write(mybuffer, in.gcount());\r
960         delete[] mybuffer;\r
961             \r
962         //read format code\r
963         mybuffer = new char[1];\r
964         in.read(mybuffer,1);\r
965         for (int i = 0; i < filehandlesHeaders.size(); i++) {  \r
966             for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
967                 ofstream out;\r
968                 m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);\r
969                 out.write(mybuffer, in.gcount());\r
970                 out.close();\r
971             }\r
972         }\r
973         outNoMatchHeader.write(mybuffer, in.gcount());\r
974         delete[] mybuffer;\r
975                         \r
976         //read flow chars\r
977         mybuffer = new char[header.numFlowsPerRead];\r
978         in.read(mybuffer,header.numFlowsPerRead);\r
979         for (int i = 0; i < filehandlesHeaders.size(); i++) {  \r
980             for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
981                 ofstream out;\r
982                 m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);\r
983                 out.write(mybuffer, in.gcount());\r
984                 out.close();\r
985             }\r
986         }\r
987         outNoMatchHeader.write(mybuffer, in.gcount());\r
988         delete[] mybuffer;\r
989                         \r
990         //read key\r
991         mybuffer = new char[header.keyLength];\r
992         in.read(mybuffer,header.keyLength);\r
993         for (int i = 0; i < filehandlesHeaders.size(); i++) {  \r
994             for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
995                 ofstream out;\r
996                 m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);\r
997                 out.write(mybuffer, in.gcount());\r
998                 out.close();\r
999             }\r
1000         }\r
1001         outNoMatchHeader.write(mybuffer, in.gcount());\r
1002         delete[] mybuffer;\r
1003         \r
1004                         \r
1005         /* Pad to 8 chars */\r
1006         unsigned long long spotInFile = in.tellg();\r
1007         unsigned long long spot = (spotInFile + 7)& ~7;  // ~ inverts\r
1008         in.seekg(spot);\r
1009         \r
1010         mybuffer = new char[spot-spotInFile];\r
1011         for (int i = 0; i < filehandlesHeaders.size(); i++) { \r
1012             for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
1013                 ofstream out;\r
1014                 m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);\r
1015                 out.write(mybuffer, spot-spotInFile);\r
1016                 out.close();\r
1017             }\r
1018         }\r
1019         outNoMatchHeader.write(mybuffer, spot-spotInFile);\r
1020         outNoMatchHeader.close();\r
1021         delete[] mybuffer;\r
1022         in.close();\r
1023         \r
1024         m->appendBinaryFiles(noMatchFile, tempNoHeader);\r
1025         m->renameFile(tempNoHeader, noMatchFile);\r
1026         m->mothurRemove(tempNoHeader);\r
1027         \r
1028                 return 0;\r
1029         \r
1030         }\r
1031         catch(exception& e) {\r
1032                 m->errorOut(e, "SffInfoCommand", "adjustCommonHeader");\r
1033                 exit(1);\r
1034         }\r
1035 }\r
1036 //**********************************************************************************************************************\r
1037 bool SffInfoCommand::readSeqData(ifstream& in, seqRead& read, int numFlowReads, Header& header){\r
1038         try {\r
1039         unsigned long long startSpotInFile = in.tellg();\r
1040                 if (!in.eof()) {\r
1041             \r
1042             /*****************************************/\r
1043             //read header\r
1044             \r
1045             //read header length\r
1046                         char buffer [2];\r
1047                         in.read(buffer, 2); \r
1048                         header.headerLength = be_int2(*(unsigned short *)(&buffer));\r
1049             \r
1050                         //read name length\r
1051                         char buffer2 [2];\r
1052                         in.read(buffer2, 2);\r
1053                         header.nameLength = be_int2(*(unsigned short *)(&buffer2));\r
1054             \r
1055                         //read num bases\r
1056                         char buffer3 [4];\r
1057                         in.read(buffer3, 4);\r
1058                         header.numBases =  be_int4(*(unsigned int *)(&buffer3));\r
1059             \r
1060                         \r
1061                         //read clip qual left\r
1062                         char buffer4 [2];\r
1063                         in.read(buffer4, 2);\r
1064                         header.clipQualLeft =  be_int2(*(unsigned short *)(&buffer4));\r
1065                         header.clipQualLeft = 5;\r
1066             \r
1067                         \r
1068                         //read clip qual right\r
1069                         char buffer5 [2];\r
1070                         in.read(buffer5, 2);\r
1071                         header.clipQualRight =  be_int2(*(unsigned short *)(&buffer5));\r
1072            \r
1073             \r
1074                         //read clipAdapterLeft\r
1075                         char buffer6 [2];\r
1076                         in.read(buffer6, 2);\r
1077                         header.clipAdapterLeft = be_int2(*(unsigned short *)(&buffer6));\r
1078             \r
1079             \r
1080                         //read clipAdapterRight\r
1081                         char buffer7 [2];\r
1082                         in.read(buffer7, 2);\r
1083                         header.clipAdapterRight = be_int2(*(unsigned short *)(&buffer7));\r
1084             \r
1085             \r
1086                         //read name\r
1087                         char* tempBuffer = new char[header.nameLength];\r
1088                         in.read(&(*tempBuffer), header.nameLength);\r
1089                         header.name = tempBuffer;\r
1090                         if (header.name.length() > header.nameLength) { header.name = header.name.substr(0, header.nameLength);  }\r
1091             \r
1092                         delete[] tempBuffer;\r
1093                         \r
1094                         //extract info from name\r
1095                         decodeName(header.timestamp, header.region, header.xy, header.name);\r
1096                         \r
1097                         /* Pad to 8 chars */\r
1098                         unsigned long long spotInFile = in.tellg();\r
1099                         unsigned long long spot = (spotInFile + 7)& ~7;\r
1100                         in.seekg(spot);\r
1101 \r
1102             /*****************************************/\r
1103             //sequence read \r
1104             \r
1105                         //read flowgram\r
1106                         read.flowgram.resize(numFlowReads);\r
1107                         for (int i = 0; i < numFlowReads; i++) {  \r
1108                                 char buffer [2];\r
1109                                 in.read(buffer, 2);\r
1110                                 read.flowgram[i] = be_int2(*(unsigned short *)(&buffer));\r
1111                         }\r
1112             \r
1113                         //read flowIndex\r
1114                         read.flowIndex.resize(header.numBases);\r
1115                         for (int i = 0; i < header.numBases; i++) {  \r
1116                                 char temp[1];\r
1117                                 in.read(temp, 1);\r
1118                                 read.flowIndex[i] = be_int1(*(unsigned char *)(&temp));\r
1119                         }\r
1120         \r
1121                         //read bases\r
1122                         char* tempBuffer6 = new char[header.numBases];\r
1123                         in.read(&(*tempBuffer6), header.numBases);\r
1124                         read.bases = tempBuffer6;\r
1125                         if (read.bases.length() > header.numBases) { read.bases = read.bases.substr(0, header.numBases);  }\r
1126                         delete[] tempBuffer6;\r
1127 \r
1128                         //read qual scores\r
1129                         read.qualScores.resize(header.numBases);\r
1130                         for (int i = 0; i < header.numBases; i++) {  \r
1131                                 char temp[1];\r
1132                                 in.read(temp, 1);\r
1133                                 read.qualScores[i] = be_int1(*(unsigned char *)(&temp));\r
1134                         }\r
1135         \r
1136                         /* Pad to 8 chars */\r
1137                         spotInFile = in.tellg();\r
1138                         spot = (spotInFile + 7)& ~7;\r
1139                         in.seekg(spot);\r
1140             \r
1141             if (split > 1) { \r
1142                \r
1143                 int barcodeIndex, primerIndex, trashCodeLength;\r
1144                 \r
1145                 if (hasOligos)      {  trashCodeLength = findGroup(header, read, barcodeIndex, primerIndex);                }\r
1146                 else if (hasGroup)  {  trashCodeLength = findGroup(header, read, barcodeIndex, primerIndex, "groupMode");   }\r
1147                 else {  m->mothurOut("[ERROR]: uh oh, we shouldn't be here...\n"); }\r
1148 \r
1149                 char * mybuffer;\r
1150                 mybuffer = new char [spot-startSpotInFile];\r
1151                 \r
1152                 ifstream in2;\r
1153                 m->openInputFileBinary(currentFileName, in2);\r
1154                 in2.seekg(startSpotInFile);\r
1155                 in2.read(mybuffer,spot-startSpotInFile);\r
1156                 \r
1157                 \r
1158                 if(trashCodeLength == 0){\r
1159                     ofstream out;\r
1160                     m->openOutputFileBinaryAppend(filehandles[barcodeIndex][primerIndex], out);\r
1161                     out.write(mybuffer, in2.gcount());\r
1162                     out.close();\r
1163                     numSplitReads[barcodeIndex][primerIndex]++;\r
1164                                 }\r
1165                                 else{\r
1166                                         ofstream out;\r
1167                     m->openOutputFileBinaryAppend(noMatchFile, out);\r
1168                     out.write(mybuffer, in2.gcount());\r
1169                     out.close();\r
1170                     numNoMatch++;\r
1171                                 }\r
1172                                 delete[] mybuffer;\r
1173                 in2.close();\r
1174         }    \r
1175             \r
1176                 }else{\r
1177                         m->mothurOut("Error reading."); m->mothurOutEndLine();\r
1178                 }\r
1179         \r
1180         if (in.eof()) {  return true; }\r
1181         \r
1182                 return false;\r
1183         }\r
1184         catch(exception& e) {\r
1185                 m->errorOut(e, "SffInfoCommand", "readSeqData");\r
1186                 exit(1);\r
1187         }\r
1188 }\r
1189 //**********************************************************************************************************************\r
1190 int SffInfoCommand::findGroup(Header header, seqRead read, int& barcode, int& primer) {\r
1191         try {\r
1192         //find group read belongs to\r
1193         TrimOligos trimOligos(pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, revPrimer, linker, spacer);\r
1194         \r
1195         int success = 1;\r
1196         string trashCode = "";\r
1197         int currentSeqsDiffs = 0;\r
1198         \r
1199         string seq = read.bases;\r
1200         \r
1201         if (trim) {\r
1202             if(header.clipQualRight < header.clipQualLeft){\r
1203                 if (header.clipQualRight == 0) { //don't trim right\r
1204                     seq = seq.substr(header.clipQualLeft-1);\r
1205                 }else {\r
1206                     seq = "NNNN";\r
1207                 }\r
1208             }\r
1209             else if((header.clipQualRight != 0) && ((header.clipQualRight-header.clipQualLeft) >= 0)){\r
1210                 seq = seq.substr((header.clipQualLeft-1), (header.clipQualRight-header.clipQualLeft));\r
1211             }\r
1212             else {\r
1213                 seq = seq.substr(header.clipQualLeft-1);\r
1214             }\r
1215         }else{\r
1216             //if you wanted the sfftxt then you already converted the bases to the right case\r
1217             if (!sfftxt) {\r
1218                 int endValue = header.clipQualRight;\r
1219                 //make the bases you want to clip lowercase and the bases you want to keep upper case\r
1220                 if(endValue == 0){      endValue = seq.length();        }\r
1221                 for (int i = 0; i < (header.clipQualLeft-1); i++) { seq[i] = tolower(seq[i]);  }\r
1222                 for (int i = (header.clipQualLeft-1); i < (endValue-1); i++)  {   seq[i] = toupper(seq[i]);  }\r
1223                 for (int i = (endValue-1); i < seq.length(); i++) {   seq[i] = tolower(seq[i]);  }\r
1224             }\r
1225         }\r
1226         \r
1227         Sequence currSeq(header.name, seq);\r
1228         QualityScores currQual;\r
1229         \r
1230         if(numLinkers != 0){\r
1231             success = trimOligos.stripLinker(currSeq, currQual);\r
1232             if(success > ldiffs)                {       trashCode += 'k';       }\r
1233             else{ currentSeqsDiffs += success;  }\r
1234             \r
1235         }\r
1236         \r
1237         if(barcodes.size() != 0){\r
1238             success = trimOligos.stripBarcode(currSeq, currQual, barcode);\r
1239             if(success > bdiffs)                {       trashCode += 'b';       }\r
1240             else{ currentSeqsDiffs += success;  }\r
1241         }\r
1242         \r
1243         if(numSpacers != 0){\r
1244             success = trimOligos.stripSpacer(currSeq, currQual);\r
1245             if(success > sdiffs)                {       trashCode += 's';       }\r
1246             else{ currentSeqsDiffs += success;  }\r
1247             \r
1248         }\r
1249         \r
1250         if(numFPrimers != 0){\r
1251             success = trimOligos.stripForward(currSeq, currQual, primer, true);\r
1252             if(success > pdiffs)                {       trashCode += 'f';       }\r
1253             else{ currentSeqsDiffs += success;  }\r
1254         }\r
1255         \r
1256         if (currentSeqsDiffs > tdiffs)  {       trashCode += 't';   }\r
1257         \r
1258         if(revPrimer.size() != 0){\r
1259             success = trimOligos.stripReverse(currSeq, currQual);\r
1260             if(!success)                                {       trashCode += 'r';       }\r
1261         }\r
1262 \r
1263         \r
1264         return trashCode.length();\r
1265     }\r
1266         catch(exception& e) {\r
1267                 m->errorOut(e, "SffInfoCommand", "findGroup");\r
1268                 exit(1);\r
1269         }\r
1270 }\r
1271 //**********************************************************************************************************************\r
1272 int SffInfoCommand::findGroup(Header header, seqRead read, int& barcode, int& primer, string groupMode) {\r
1273         try {\r
1274         string trashCode = "";\r
1275         primer = 0;\r
1276         \r
1277         string group = groupMap->getGroup(header.name);\r
1278         if (group == "not found") {     trashCode += "g";   } //scrap for group\r
1279         else { //find file group\r
1280             map<string, int>::iterator it = barcodes.find(group);\r
1281             if (it != barcodes.end()) {\r
1282                 barcode = it->second;\r
1283             }else { trashCode += "g"; }\r
1284         }\r
1285         \r
1286         return trashCode.length();\r
1287     }\r
1288         catch(exception& e) {\r
1289                 m->errorOut(e, "SffInfoCommand", "findGroup");\r
1290                 exit(1);\r
1291         }\r
1292 }\r
1293 //**********************************************************************************************************************\r
1294 int SffInfoCommand::decodeName(string& timestamp, string& region, string& xy, string name) {\r
1295         try {\r
1296                 \r
1297                 if (name.length() >= 6) {\r
1298                         string time = name.substr(0, 6);\r
1299                         unsigned int timeNum = m->fromBase36(time);\r
1300                         \r
1301                         int q1 = timeNum / 60;\r
1302                         int sec = timeNum - 60 * q1;\r
1303                         int q2 = q1 / 60;\r
1304                         int minute = q1 - 60 * q2;\r
1305                         int q3 = q2 / 24;\r
1306                         int hr = q2 - 24 * q3;\r
1307                         int q4 = q3 / 32;\r
1308                         int day = q3 - 32 * q4;\r
1309                         int q5 = q4 / 13;\r
1310                         int mon = q4 - 13 * q5;\r
1311                         int year = 2000 + q5;\r
1312                 \r
1313                         timestamp = toString(year) + "_" + toString(mon) + "_" + toString(day) + "_" + toString(hr) + "_" + toString(minute) + "_" + toString(sec);\r
1314                 }\r
1315                 \r
1316                 if (name.length() >= 9) {\r
1317                         region = name.substr(7, 2);\r
1318                 \r
1319                         string xyNum = name.substr(9);\r
1320                         unsigned int myXy = m->fromBase36(xyNum);\r
1321                         int x = myXy >> 12;\r
1322                         int y = myXy & 4095;\r
1323                 \r
1324                         xy = toString(x) + "_" + toString(y);\r
1325                 }\r
1326                 \r
1327                 return 0;\r
1328         }\r
1329         catch(exception& e) {\r
1330                 m->errorOut(e, "SffInfoCommand", "decodeName");\r
1331                 exit(1);\r
1332         }\r
1333 }\r
1334 //**********************************************************************************************************************\r
1335 int SffInfoCommand::printCommonHeader(ofstream& out, CommonHeader& header) {\r
1336         try {\r
1337         \r
1338                 out << "Common Header:\nMagic Number: " << header.magicNumber << endl;\r
1339                 out << "Version: " << header.version << endl;\r
1340                 out << "Index Offset: " << header.indexOffset << endl;\r
1341                 out << "Index Length: " << header.indexLength << endl;\r
1342                 out << "Number of Reads: " << header.numReads << endl;\r
1343                 out << "Header Length: " << header.headerLength << endl;\r
1344                 out << "Key Length: " << header.keyLength << endl;\r
1345                 out << "Number of Flows: " << header.numFlowsPerRead << endl;\r
1346                 out << "Format Code: " << header.flogramFormatCode << endl;\r
1347                 out << "Flow Chars: " << header.flowChars << endl;\r
1348                 out << "Key Sequence: " << header.keySequence << endl << endl;\r
1349                         \r
1350                 return 0;\r
1351         }\r
1352         catch(exception& e) {\r
1353                 m->errorOut(e, "SffInfoCommand", "printCommonHeader");\r
1354                 exit(1);\r
1355         }\r
1356 }\r
1357 //**********************************************************************************************************************\r
1358 int SffInfoCommand::printHeader(ofstream& out, Header& header) {\r
1359         try {\r
1360                 \r
1361                 out << ">" << header.name << endl;\r
1362                 out << "Run Prefix: " << header.timestamp << endl;\r
1363                 out << "Region #:  " << header.region << endl;\r
1364                 out << "XY Location: " << header.xy << endl << endl;\r
1365                 \r
1366                 out << "Run Name:  " << endl;\r
1367                 out << "Analysis Name:  " << endl;\r
1368                 out << "Full Path: " << endl << endl;\r
1369                 \r
1370                 out << "Read Header Len: " << header.headerLength << endl;\r
1371                 out << "Name Length: " << header.nameLength << endl;\r
1372                 out << "# of Bases: " << header.numBases << endl;\r
1373                 out << "Clip Qual Left: " << header.clipQualLeft << endl;\r
1374                 out << "Clip Qual Right: " << header.clipQualRight << endl;\r
1375                 out << "Clip Adap Left: " << header.clipAdapterLeft << endl;\r
1376                 out << "Clip Adap Right: " << header.clipAdapterRight << endl << endl;\r
1377                 \r
1378                 return 0;\r
1379         }\r
1380         catch(exception& e) {\r
1381                 m->errorOut(e, "SffInfoCommand", "printHeader");\r
1382                 exit(1);\r
1383         }\r
1384 }\r
1385 //**********************************************************************************************************************\r
1386 bool SffInfoCommand::sanityCheck(Header& header, seqRead& read) {\r
1387         try {\r
1388         bool okay = true;\r
1389         string message = "[WARNING]: Your sff file may be corrupted! Sequence: " + header.name + "\n";\r
1390         \r
1391         if (header.clipQualLeft > read.bases.length()) {\r
1392             okay = false; message += "Clip Qual Left = " + toString(header.clipQualLeft) + ", but we only read " + toString(read.bases.length()) + " bases.\n";\r
1393         }\r
1394         if (header.clipQualRight > read.bases.length()) {\r
1395             okay = false; message += "Clip Qual Right = " + toString(header.clipQualRight) + ", but we only read " + toString(read.bases.length()) + " bases.\n";\r
1396         }\r
1397         if (header.clipQualLeft > read.qualScores.size()) {\r
1398             okay = false; message += "Clip Qual Left = " + toString(header.clipQualLeft) + ", but we only read " + toString(read.qualScores.size()) + " quality scores.\n";\r
1399         }\r
1400         if (header.clipQualRight > read.qualScores.size()) {\r
1401             okay = false; message += "Clip Qual Right = " + toString(header.clipQualRight) + ", but we only read " + toString(read.qualScores.size()) + " quality scores.\n";\r
1402         }\r
1403         \r
1404         if (okay == false) {\r
1405             m->mothurOut(message); m->mothurOutEndLine();\r
1406         }\r
1407         \r
1408                 return okay;\r
1409         }\r
1410         catch(exception& e) {\r
1411                 m->errorOut(e, "SffInfoCommand", "sanityCheck");\r
1412                 exit(1);\r
1413         }\r
1414 }\r
1415 //**********************************************************************************************************************\r
1416 int SffInfoCommand::printSffTxtSeqData(ofstream& out, seqRead& read, Header& header) {\r
1417         try {\r
1418                 out << "Flowgram: ";\r
1419                 for (int i = 0; i < read.flowgram.size(); i++) { out << setprecision(2) << (read.flowgram[i]/(float)100) << '\t';  }\r
1420                 \r
1421                 out << endl <<  "Flow Indexes: ";\r
1422                 int sum = 0;\r
1423                 for (int i = 0; i < read.flowIndex.size(); i++) {  sum +=  read.flowIndex[i];  out << sum << '\t'; }\r
1424                 \r
1425                 //make the bases you want to clip lowercase and the bases you want to keep upper case\r
1426         int endValue = header.clipQualRight;\r
1427                 if(endValue == 0){      endValue = read.bases.length(); }\r
1428                 for (int i = 0; i < (header.clipQualLeft-1); i++) { read.bases[i] = tolower(read.bases[i]); }\r
1429                 for (int i = (header.clipQualLeft-1); i < (endValue-1); i++) {   read.bases[i] = toupper(read.bases[i]);  }\r
1430                 for (int i = (endValue-1); i < read.bases.length(); i++) {   read.bases[i] = tolower(read.bases[i]);  }\r
1431                 \r
1432                 out << endl <<  "Bases: " << read.bases << endl << "Quality Scores: ";\r
1433                 for (int i = 0; i < read.qualScores.size(); i++) {   out << read.qualScores[i] << '\t';  }\r
1434         \r
1435                 \r
1436                 out << endl << endl;\r
1437                 \r
1438                 return 0;\r
1439         }\r
1440         catch(exception& e) {\r
1441                 m->errorOut(e, "SffInfoCommand", "printSffTxtSeqData");\r
1442                 exit(1);\r
1443         }\r
1444 }\r
1445 //**********************************************************************************************************************\r
1446 int SffInfoCommand::printFastaSeqData(ofstream& out, seqRead& read, Header& header) {\r
1447         try {\r
1448                 string seq = read.bases;\r
1449                 \r
1450         if (trim) {\r
1451                         if(header.clipQualRight < header.clipQualLeft){\r
1452                                 if (header.clipQualRight == 0) { //don't trim right\r
1453                     seq = seq.substr(header.clipQualLeft-1);\r
1454                 }else {\r
1455                     seq = "NNNN";\r
1456                 }\r
1457                         }\r
1458                         else if((header.clipQualRight != 0) && ((header.clipQualRight-header.clipQualLeft) >= 0)){\r
1459                                 seq = seq.substr((header.clipQualLeft-1), (header.clipQualRight-header.clipQualLeft));\r
1460                         }\r
1461                         else {\r
1462                                 seq = seq.substr(header.clipQualLeft-1);\r
1463                         }\r
1464                 }else{\r
1465                         //if you wanted the sfftxt then you already converted the bases to the right case\r
1466                         if (!sfftxt) {\r
1467                 int endValue = header.clipQualRight;\r
1468                                 //make the bases you want to clip lowercase and the bases you want to keep upper case\r
1469                                 if(endValue == 0){      endValue = seq.length();        }\r
1470                                 for (int i = 0; i < (header.clipQualLeft-1); i++) { seq[i] = tolower(seq[i]);  }\r
1471                                 for (int i = (header.clipQualLeft-1); i < (endValue-1); i++)  {   seq[i] = toupper(seq[i]);  }\r
1472                                 for (int i = (endValue-1); i < seq.length(); i++) {   seq[i] = tolower(seq[i]);  }\r
1473                         }\r
1474                 }\r
1475                 \r
1476                 out << ">" << header.name  << " xy=" << header.xy << endl;\r
1477                 out << seq << endl;\r
1478                 \r
1479                 return 0;\r
1480         }\r
1481         catch(exception& e) {\r
1482                 m->errorOut(e, "SffInfoCommand", "printFastaSeqData");\r
1483                 exit(1);\r
1484         }\r
1485 }\r
1486 \r
1487 //**********************************************************************************************************************\r
1488 int SffInfoCommand::printQualSeqData(ofstream& out, seqRead& read, Header& header) {\r
1489         try {\r
1490                 \r
1491                 if (trim) {\r
1492                         if(header.clipQualRight < header.clipQualLeft){\r
1493                 if (header.clipQualRight == 0) { //don't trim right\r
1494                     out << ">" << header.name << " xy=" << header.xy << " length=" << (read.qualScores.size()-header.clipQualLeft) << endl;\r
1495                     for (int i = (header.clipQualLeft-1); i < read.qualScores.size(); i++) {   out << read.qualScores[i] << '\t';       }       \r
1496                 }else {\r
1497                     out << ">" << header.name << " xy=" << header.xy << endl;\r
1498                     out << "0\t0\t0\t0";\r
1499                 }\r
1500                         }\r
1501                         else if((header.clipQualRight != 0) && ((header.clipQualRight-header.clipQualLeft) >= 0)){\r
1502                                 out << ">" << header.name << " xy=" << header.xy << " length=" << (header.clipQualRight-header.clipQualLeft) << endl;\r
1503                                 for (int i = (header.clipQualLeft-1); i < (header.clipQualRight-1); i++) {   out << read.qualScores[i] << '\t'; }\r
1504                         }\r
1505                         else{\r
1506                                 out << ">" << header.name << " xy=" << header.xy << " length=" << (header.clipQualRight-header.clipQualLeft) << endl;\r
1507                                 for (int i = (header.clipQualLeft-1); i < read.qualScores.size(); i++) {   out << read.qualScores[i] << '\t';   }                       \r
1508                         }\r
1509                 }else{\r
1510                         out << ">" << header.name << " xy=" << header.xy << " length=" << read.qualScores.size() << endl;\r
1511                         for (int i = 0; i < read.qualScores.size(); i++) {   out << read.qualScores[i] << '\t';  }\r
1512                 }\r
1513                 \r
1514                 out << endl;\r
1515                 \r
1516                 return 0;\r
1517         }\r
1518         catch(exception& e) {\r
1519                 m->errorOut(e, "SffInfoCommand", "printQualSeqData");\r
1520                 exit(1);\r
1521         }\r
1522 }\r
1523 \r
1524 //**********************************************************************************************************************\r
1525 int SffInfoCommand::printFlowSeqData(ofstream& out, seqRead& read, Header& header) {\r
1526         try {\r
1527         \r
1528         int endValue = header.clipQualRight;\r
1529         if (header.clipQualRight == 0) {\r
1530             endValue = read.flowIndex.size();\r
1531             if (m->debug) { m->mothurOut("[DEBUG]: " + header.name + " has clipQualRight=0.\n"); }\r
1532         }\r
1533         if(endValue > header.clipQualLeft){\r
1534             \r
1535             int rightIndex = 0;\r
1536             for (int i = 0; i < endValue; i++) {  rightIndex +=  read.flowIndex[i];      }\r
1537             \r
1538             out << header.name << ' ' << rightIndex;\r
1539             for (int i = 0; i < read.flowgram.size(); i++) { out << setprecision(2) << ' ' << (read.flowgram[i]/(float)100);  }\r
1540             out << endl;\r
1541         }\r
1542                 \r
1543                 \r
1544                 return 0;\r
1545         }\r
1546         catch(exception& e) {\r
1547                 m->errorOut(e, "SffInfoCommand", "printFlowSeqData");\r
1548                 exit(1);\r
1549         }\r
1550 }\r
1551 //**********************************************************************************************************************\r
1552 int SffInfoCommand::readAccnosFile(string filename) {\r
1553         try {\r
1554                 //remove old names\r
1555                 seqNames.clear();\r
1556                 \r
1557                 ifstream in;\r
1558                 m->openInputFile(filename, in);\r
1559                 string name;\r
1560                 \r
1561                 while(!in.eof()){\r
1562                         in >> name; m->gobble(in);\r
1563                                                 \r
1564                         seqNames.insert(name);\r
1565                         \r
1566                         if (m->control_pressed) { seqNames.clear(); break; }\r
1567                 }\r
1568                 in.close();             \r
1569                 \r
1570                 return 0;\r
1571         }\r
1572         catch(exception& e) {\r
1573                 m->errorOut(e, "SffInfoCommand", "readAccnosFile");\r
1574                 exit(1);\r
1575         }\r
1576 }\r
1577 //**********************************************************************************************************************\r
1578 int SffInfoCommand::parseSffTxt() {\r
1579         try {\r
1580                 \r
1581                 ifstream inSFF;\r
1582                 m->openInputFile(sfftxtFilename, inSFF);\r
1583                 \r
1584                 if (outputDir == "") {  outputDir += m->hasPath(sfftxtFilename); }\r
1585                 \r
1586                 //output file names\r
1587                 ofstream outFasta, outQual, outFlow;\r
1588                 string outFastaFileName, outQualFileName;\r
1589                 string fileRoot = m->getRootName(m->getSimpleName(sfftxtFilename));\r
1590                 if (fileRoot.length() > 0) {\r
1591                         //rip off last .\r
1592                         fileRoot = fileRoot.substr(0, fileRoot.length()-1);\r
1593                         fileRoot = m->getRootName(fileRoot);\r
1594                 }\r
1595                 \r
1596         map<string, string> variables; \r
1597                 variables["[filename]"] = fileRoot;\r
1598                 string sfftxtFileName = getOutputFileName("sfftxt",variables);\r
1599                 string outFlowFileName = getOutputFileName("flow",variables);\r
1600                 if (!trim) { variables["[tag]"] = "raw"; }\r
1601                 outFastaFileName = getOutputFileName("fasta",variables);\r
1602         outQualFileName = getOutputFileName("qfile",variables);\r
1603                 \r
1604                 if (fasta)      { m->openOutputFile(outFastaFileName, outFasta);        outputNames.push_back(outFastaFileName); outputTypes["fasta"].push_back(outFastaFileName); }\r
1605                 if (qual)       { m->openOutputFile(outQualFileName, outQual);          outputNames.push_back(outQualFileName); outputTypes["qfile"].push_back(outQualFileName);  }\r
1606                 if (flow)       { m->openOutputFile(outFlowFileName, outFlow);          outputNames.push_back(outFlowFileName);  outFlow.setf(ios::fixed, ios::floatfield); outFlow.setf(ios::showpoint); outputTypes["flow"].push_back(outFlowFileName);  }\r
1607                 \r
1608                 //read common header\r
1609                 string commonHeader = m->getline(inSFF);\r
1610                 string magicNumber = m->getline(inSFF); \r
1611                 string version = m->getline(inSFF);\r
1612                 string indexOffset = m->getline(inSFF);\r
1613                 string indexLength = m->getline(inSFF);\r
1614                 int numReads = parseHeaderLineToInt(inSFF);\r
1615                 string headerLength = m->getline(inSFF);\r
1616                 string keyLength = m->getline(inSFF);\r
1617                 int numFlows = parseHeaderLineToInt(inSFF);\r
1618                 string flowgramCode = m->getline(inSFF);\r
1619                 string flowChars = m->getline(inSFF);\r
1620                 string keySequence = m->getline(inSFF);\r
1621                 m->gobble(inSFF);\r
1622                 \r
1623                 string seqName;\r
1624                 \r
1625                 if (flow)       {       outFlow << numFlows << endl;    }\r
1626                 \r
1627                 for(int i=0;i<numReads;i++){\r
1628                         \r
1629                         //sanity check\r
1630                         if (inSFF.eof()) { m->mothurOut("[ERROR]: Expected " + toString(numReads) + " but reached end of file at " + toString(i+1) + "."); m->mothurOutEndLine(); break; }\r
1631                         \r
1632                         Header header;\r
1633                         \r
1634                         //parse read header\r
1635                         inSFF >> seqName;\r
1636                         seqName = seqName.substr(1);\r
1637                         m->gobble(inSFF);\r
1638                         header.name = seqName;\r
1639                         \r
1640                         string runPrefix = parseHeaderLineToString(inSFF);              header.timestamp = runPrefix;\r
1641                         string regionNumber = parseHeaderLineToString(inSFF);   header.region = regionNumber;\r
1642                         string xyLocation = parseHeaderLineToString(inSFF);             header.xy = xyLocation;\r
1643                         m->gobble(inSFF);\r
1644                                 \r
1645                         string runName = parseHeaderLineToString(inSFF);\r
1646                         string analysisName = parseHeaderLineToString(inSFF);\r
1647                         string fullPath = parseHeaderLineToString(inSFF);\r
1648                         m->gobble(inSFF);\r
1649                         \r
1650                         string readHeaderLen = parseHeaderLineToString(inSFF);  convert(readHeaderLen, header.headerLength);\r
1651                         string nameLength = parseHeaderLineToString(inSFF);             convert(nameLength, header.nameLength);\r
1652                         int numBases = parseHeaderLineToInt(inSFF);                             header.numBases = numBases;\r
1653                         string clipQualLeft = parseHeaderLineToString(inSFF);   convert(clipQualLeft, header.clipQualLeft);\r
1654                         int clipQualRight = parseHeaderLineToInt(inSFF);                header.clipQualRight = clipQualRight;\r
1655                         string clipAdapLeft = parseHeaderLineToString(inSFF);   convert(clipAdapLeft, header.clipAdapterLeft);\r
1656                         string clipAdapRight = parseHeaderLineToString(inSFF);  convert(clipAdapRight, header.clipAdapterRight);\r
1657                         m->gobble(inSFF);\r
1658                                 \r
1659                         seqRead read;\r
1660                         \r
1661                         //parse read\r
1662                         vector<unsigned short> flowVector = parseHeaderLineToFloatVector(inSFF, numFlows);      read.flowgram = flowVector;\r
1663                         vector<unsigned int> flowIndices = parseHeaderLineToIntVector(inSFF, numBases); \r
1664                         \r
1665                         //adjust for print\r
1666                         vector<unsigned int> flowIndicesAdjusted; flowIndicesAdjusted.push_back(flowIndices[0]);\r
1667                         for (int j = 1; j < flowIndices.size(); j++) {   flowIndicesAdjusted.push_back(flowIndices[j] - flowIndices[j-1]);   }\r
1668                         read.flowIndex = flowIndicesAdjusted;\r
1669                         \r
1670                         string bases = parseHeaderLineToString(inSFF);                                                                          read.bases = bases;\r
1671                         vector<unsigned int> qualityScores = parseHeaderLineToIntVector(inSFF, numBases);       read.qualScores = qualityScores;\r
1672                         m->gobble(inSFF);\r
1673                                         \r
1674                         //if you have provided an accosfile and this seq is not in it, then dont print\r
1675                         bool print = true;\r
1676                         if (seqNames.size() != 0) {   if (seqNames.count(header.name) == 0) { print = false; }  }\r
1677                         \r
1678                         //print \r
1679                         if (print) {\r
1680                                 if (fasta)      {       printFastaSeqData(outFasta, read, header);      }\r
1681                                 if (qual)       {       printQualSeqData(outQual, read, header);        }\r
1682                                 if (flow)       {       printFlowSeqData(outFlow, read, header);        }\r
1683                         }\r
1684                         \r
1685                         //report progress\r
1686                         if((i+1) % 10000 == 0){ m->mothurOut(toString(i+1)); m->mothurOutEndLine();             }\r
1687                         \r
1688                         if (m->control_pressed) {  break;  }\r
1689                 }\r
1690                 \r
1691                 //report progress\r
1692                 if (!m->control_pressed) {   if((numReads) % 10000 != 0){       m->mothurOut(toString(numReads)); m->mothurOutEndLine();                }  }\r
1693                 \r
1694                 inSFF.close();\r
1695                 \r
1696                 if (fasta)      {  outFasta.close();    }\r
1697                 if (qual)       {  outQual.close();             }\r
1698                 if (flow)       {  outFlow.close();             }\r
1699                 \r
1700                 return 0;\r
1701         }\r
1702         catch(exception& e) {\r
1703                 m->errorOut(e, "SffInfoCommand", "parseSffTxt");\r
1704                 exit(1);\r
1705         }\r
1706 }\r
1707 //**********************************************************************************************************************\r
1708 \r
1709 int SffInfoCommand::parseHeaderLineToInt(ifstream& file){\r
1710         try {\r
1711                 int number;\r
1712                 \r
1713                 while (!file.eof())     {\r
1714                         \r
1715                         char c = file.get(); \r
1716                         if (c == ':'){\r
1717                                 file >> number;\r
1718                                 break;\r
1719                         }\r
1720                         \r
1721                 }\r
1722                 m->gobble(file);\r
1723                 return number;\r
1724         }\r
1725         catch(exception& e) {\r
1726                 m->errorOut(e, "SffInfoCommand", "parseHeaderLineToInt");\r
1727                 exit(1);\r
1728         }\r
1729         \r
1730 }\r
1731 \r
1732 //**********************************************************************************************************************\r
1733 \r
1734 string SffInfoCommand::parseHeaderLineToString(ifstream& file){\r
1735         try {\r
1736                 string text;\r
1737                 \r
1738                 while (!file.eof())     {\r
1739                         char c = file.get(); \r
1740                         \r
1741                         if (c == ':'){\r
1742                                 //m->gobble(file);\r
1743                                 //text = m->getline(file);      \r
1744                                 file >> text;\r
1745                                 break;\r
1746                         }\r
1747                 }\r
1748                 m->gobble(file);\r
1749                 \r
1750                 return text;\r
1751         }\r
1752         catch(exception& e) {\r
1753                 m->errorOut(e, "SffInfoCommand", "parseHeaderLineToString");\r
1754                 exit(1);\r
1755         }\r
1756 }\r
1757 \r
1758 //**********************************************************************************************************************\r
1759 \r
1760 vector<unsigned short> SffInfoCommand::parseHeaderLineToFloatVector(ifstream& file, int length){\r
1761         try {\r
1762                 vector<unsigned short> floatVector(length);\r
1763                 \r
1764                 while (!file.eof())     {\r
1765                         char c = file.get(); \r
1766                         if (c == ':'){\r
1767                                 float temp;\r
1768                                 for(int i=0;i<length;i++){\r
1769                                         file >> temp;\r
1770                                         floatVector[i] = temp * 100;\r
1771                                 }\r
1772                                 break;\r
1773                         }\r
1774                 }\r
1775                 m->gobble(file);        \r
1776                 return floatVector;\r
1777         }\r
1778         catch(exception& e) {\r
1779                 m->errorOut(e, "SffInfoCommand", "parseHeaderLineToFloatVector");\r
1780                 exit(1);\r
1781         }\r
1782 }\r
1783 \r
1784 //**********************************************************************************************************************\r
1785 \r
1786 vector<unsigned int> SffInfoCommand::parseHeaderLineToIntVector(ifstream& file, int length){\r
1787         try {\r
1788                 vector<unsigned int> intVector(length);\r
1789                 \r
1790                 while (!file.eof())     {\r
1791                         char c = file.get(); \r
1792                         if (c == ':'){\r
1793                                 for(int i=0;i<length;i++){\r
1794                                         file >> intVector[i];\r
1795                                 }\r
1796                                 break;\r
1797                         }\r
1798                 }\r
1799                 m->gobble(file);        \r
1800                 return intVector;\r
1801         }\r
1802         catch(exception& e) {\r
1803                 m->errorOut(e, "SffInfoCommand", "parseHeaderLineToIntVector");\r
1804                 exit(1);\r
1805         }\r
1806 }\r
1807 //***************************************************************************************************************\r
1808 \r
1809 bool SffInfoCommand::readOligos(string oligoFile){\r
1810         try {\r
1811         filehandles.clear();\r
1812         numSplitReads.clear();\r
1813         filehandlesHeaders.clear();\r
1814         \r
1815                 ifstream inOligos;\r
1816                 m->openInputFile(oligoFile, inOligos);\r
1817                 \r
1818                 string type, oligo, group;\r
1819         \r
1820                 int indexPrimer = 0;\r
1821                 int indexBarcode = 0;\r
1822                 \r
1823                 while(!inOligos.eof()){\r
1824             \r
1825                         inOligos >> type;\r
1826             \r
1827                         if(type[0] == '#'){\r
1828                                 while (!inOligos.eof()) {       char c = inOligos.get();  if (c == 10 || c == 13){      break;  }       } // get rest of line if there's any crap there\r
1829                                 m->gobble(inOligos);\r
1830                         }\r
1831                         else{\r
1832                                 m->gobble(inOligos);\r
1833                                 //make type case insensitive\r
1834                                 for(int i=0;i<type.length();i++){       type[i] = toupper(type[i]);  }\r
1835                                 \r
1836                                 inOligos >> oligo;\r
1837                                 \r
1838                                 for(int i=0;i<oligo.length();i++){\r
1839                                         oligo[i] = toupper(oligo[i]);\r
1840                                         if(oligo[i] == 'U')     {       oligo[i] = 'T'; }\r
1841                                 }\r
1842                                 \r
1843                                 if(type == "FORWARD"){\r
1844                                         group = "";\r
1845                                         \r
1846                                         // get rest of line in case there is a primer name\r
1847                                         while (!inOligos.eof()) {\r
1848                                                 char c = inOligos.get();\r
1849                                                 if (c == 10 || c == 13 || c == -1){     break;  }\r
1850                                                 else if (c == 32 || c == 9){;} //space or tab\r
1851                                                 else {  group += c;  }\r
1852                                         }\r
1853                                         \r
1854                                         //check for repeat barcodes\r
1855                                         map<string, int>::iterator itPrime = primers.find(oligo);\r
1856                                         if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }\r
1857                                         \r
1858                                         primers[oligo]=indexPrimer; indexPrimer++;\r
1859                                         primerNameVector.push_back(group);\r
1860                    \r
1861                                 }else if(type == "REVERSE"){\r
1862                                         //Sequence oligoRC("reverse", oligo);\r
1863                                         //oligoRC.reverseComplement();\r
1864                     string oligoRC = reverseOligo(oligo);\r
1865                                         revPrimer.push_back(oligoRC);\r
1866                                 }\r
1867                                 else if(type == "BARCODE"){\r
1868                                         inOligos >> group;\r
1869                     \r
1870                                         \r
1871                                         //check for repeat barcodes\r
1872                                         map<string, int>::iterator itBar = barcodes.find(oligo);\r
1873                                         if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }\r
1874                     \r
1875                                         barcodes[oligo]=indexBarcode; indexBarcode++;\r
1876                                         barcodeNameVector.push_back(group);\r
1877                                 }else if(type == "LINKER"){\r
1878                                         linker.push_back(oligo);\r
1879                                 }else if(type == "SPACER"){\r
1880                                         spacer.push_back(oligo);\r
1881                                 }\r
1882                                 else{   m->mothurOut("[WARNING]: " + type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); }\r
1883                         }\r
1884                         m->gobble(inOligos);\r
1885                 }\r
1886                 inOligos.close();\r
1887     \r
1888                 if(barcodeNameVector.size() == 0 && primerNameVector[0] == ""){ split = 1;      }\r
1889     \r
1890                 //add in potential combos\r
1891                 if(barcodeNameVector.size() == 0){\r
1892                         barcodes[""] = 0;\r
1893                         barcodeNameVector.push_back("");\r
1894                 }\r
1895                 \r
1896                 if(primerNameVector.size() == 0){\r
1897                         primers[""] = 0;\r
1898                         primerNameVector.push_back("");\r
1899                 }\r
1900                 \r
1901                 filehandles.resize(barcodeNameVector.size());\r
1902                 for(int i=0;i<filehandles.size();i++){\r
1903                         filehandles[i].assign(primerNameVector.size(), "");\r
1904                 }\r
1905         \r
1906                 if(split > 1){\r
1907                         set<string> uniqueNames; //used to cleanup outputFileNames\r
1908                         for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){\r
1909                                 for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){\r
1910                                         \r
1911                                         string primerName = primerNameVector[itPrimer->second];\r
1912                                         string barcodeName = barcodeNameVector[itBar->second];\r
1913                                         \r
1914                                         string comboGroupName = "";\r
1915                                         string fastaFileName = "";\r
1916                                         string qualFileName = "";\r
1917                                         string nameFileName = "";\r
1918                                         \r
1919                                         if(primerName == ""){\r
1920                                                 comboGroupName = barcodeNameVector[itBar->second];\r
1921                                         }\r
1922                                         else{\r
1923                                                 if(barcodeName == ""){\r
1924                                                         comboGroupName = primerNameVector[itPrimer->second];\r
1925                                                 }\r
1926                                                 else{\r
1927                                                         comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second];\r
1928                                                 }\r
1929                                         }\r
1930                                         \r
1931                                         ofstream temp;\r
1932                     map<string, string> variables;\r
1933                     variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(currentFileName));\r
1934                     variables["[group]"] = comboGroupName;\r
1935                                         string thisFilename = getOutputFileName("sff",variables);\r
1936                                         if (uniqueNames.count(thisFilename) == 0) {\r
1937                                                 outputNames.push_back(thisFilename);\r
1938                                                 outputTypes["sff"].push_back(thisFilename);\r
1939                                                 uniqueNames.insert(thisFilename);\r
1940                                         }\r
1941                                         \r
1942                                         filehandles[itBar->second][itPrimer->second] = thisFilename;\r
1943                                         temp.open(thisFilename.c_str(), ios::binary);           temp.close();\r
1944                                 }\r
1945                         }\r
1946                 }\r
1947                 numFPrimers = primers.size();\r
1948         numLinkers = linker.size();\r
1949         numSpacers = spacer.size();\r
1950         map<string, string> variables;\r
1951         variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(currentFileName));\r
1952         variables["[group]"] = "scrap";\r
1953                 noMatchFile = getOutputFileName("sff",variables);\r
1954         m->mothurRemove(noMatchFile);\r
1955         numNoMatch = 0;\r
1956         \r
1957         \r
1958                 bool allBlank = true;\r
1959                 for (int i = 0; i < barcodeNameVector.size(); i++) {\r
1960                         if (barcodeNameVector[i] != "") {\r
1961                                 allBlank = false;\r
1962                                 break;\r
1963                         }\r
1964                 }\r
1965                 for (int i = 0; i < primerNameVector.size(); i++) {\r
1966                         if (primerNameVector[i] != "") {\r
1967                                 allBlank = false;\r
1968                                 break;\r
1969                         }\r
1970                 }\r
1971                 \r
1972         filehandlesHeaders.resize(filehandles.size());\r
1973         numSplitReads.resize(filehandles.size());\r
1974         for (int i = 0; i < filehandles.size(); i++) {\r
1975             numSplitReads[i].resize(filehandles[i].size(), 0);\r
1976             for (int j = 0; j < filehandles[i].size(); j++) {\r
1977                 filehandlesHeaders[i].push_back(filehandles[i][j]+"headers");\r
1978             }\r
1979         }\r
1980         \r
1981                 if (allBlank) {\r
1982                         m->mothurOut("[WARNING]: your oligos file does not contain any group names.  mothur will not create a split the sff file."); m->mothurOutEndLine();\r
1983                         split = 1;\r
1984                         return false;\r
1985                 }\r
1986                 \r
1987                 return true;\r
1988                 \r
1989         }\r
1990         catch(exception& e) {\r
1991                 m->errorOut(e, "SffInfoCommand", "readOligos");\r
1992                 exit(1);\r
1993         }\r
1994 }\r
1995 //***************************************************************************************************************\r
1996 \r
1997 bool SffInfoCommand::readGroup(string oligoFile){\r
1998         try {\r
1999         filehandles.clear();\r
2000         numSplitReads.clear();\r
2001         filehandlesHeaders.clear();\r
2002         barcodes.clear();\r
2003         \r
2004         groupMap = new GroupMap();\r
2005         groupMap->readMap(oligoFile);\r
2006     \r
2007         //like barcodeNameVector - no primer names\r
2008         vector<string> groups = groupMap->getNamesOfGroups();\r
2009                 \r
2010                 filehandles.resize(groups.size());\r
2011         for (int i = 0; i < filehandles.size(); i++) {\r
2012             for (int j = 0; j < 1; j++) {\r
2013                 \r
2014                 map<string, string> variables;\r
2015                 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(currentFileName));\r
2016                 variables["[group]"] = groups[i];\r
2017                 string thisFilename = getOutputFileName("sff",variables);\r
2018                 outputNames.push_back(thisFilename);\r
2019                 outputTypes["sff"].push_back(thisFilename);\r
2020                \r
2021                 ofstream temp;\r
2022                 m->openOutputFileBinary(thisFilename, temp); temp.close();\r
2023                 filehandles[i].push_back(thisFilename);\r
2024                 barcodes[groups[i]] = i;\r
2025             }\r
2026         }\r
2027         \r
2028         map<string, string> variables;\r
2029         variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(currentFileName));\r
2030         variables["[group]"] = "scrap";\r
2031                 noMatchFile = getOutputFileName("sff",variables);\r
2032         m->mothurRemove(noMatchFile);\r
2033         numNoMatch = 0;\r
2034         \r
2035                 \r
2036         filehandlesHeaders.resize(groups.size());\r
2037         numSplitReads.resize(filehandles.size());\r
2038         for (int i = 0; i < filehandles.size(); i++) {\r
2039             numSplitReads[i].resize(filehandles[i].size(), 0);\r
2040             for (int j = 0; j < filehandles[i].size(); j++) {\r
2041                 ofstream temp ;\r
2042                 string thisHeader = filehandles[i][j]+"headers";\r
2043                 m->openOutputFileBinary(thisHeader, temp); temp.close();\r
2044                 filehandlesHeaders[i].push_back(thisHeader);\r
2045             }\r
2046         }\r
2047                 \r
2048                 return true;\r
2049                 \r
2050         }\r
2051         catch(exception& e) {\r
2052                 m->errorOut(e, "SffInfoCommand", "readGroup");\r
2053                 exit(1);\r
2054         }\r
2055 }\r
2056 \r
2057 //********************************************************************/\r
2058 string SffInfoCommand::reverseOligo(string oligo){\r
2059         try {\r
2060         string reverse = "";\r
2061         \r
2062         for(int i=oligo.length()-1;i>=0;i--){\r
2063             \r
2064             if(oligo[i] == 'A')         {       reverse += 'T'; }\r
2065             else if(oligo[i] == 'T'){   reverse += 'A'; }\r
2066             else if(oligo[i] == 'U'){   reverse += 'A'; }\r
2067             \r
2068             else if(oligo[i] == 'G'){   reverse += 'C'; }\r
2069             else if(oligo[i] == 'C'){   reverse += 'G'; }\r
2070             \r
2071             else if(oligo[i] == 'R'){   reverse += 'Y'; }\r
2072             else if(oligo[i] == 'Y'){   reverse += 'R'; }\r
2073             \r
2074             else if(oligo[i] == 'M'){   reverse += 'K'; }\r
2075             else if(oligo[i] == 'K'){   reverse += 'M'; }\r
2076             \r
2077             else if(oligo[i] == 'W'){   reverse += 'W'; }\r
2078             else if(oligo[i] == 'S'){   reverse += 'S'; }\r
2079             \r
2080             else if(oligo[i] == 'B'){   reverse += 'V'; }\r
2081             else if(oligo[i] == 'V'){   reverse += 'B'; }\r
2082             \r
2083             else if(oligo[i] == 'D'){   reverse += 'H'; }\r
2084             else if(oligo[i] == 'H'){   reverse += 'D'; }\r
2085             \r
2086             else                                                {       reverse += 'N'; }\r
2087         }\r
2088         \r
2089         \r
2090         return reverse;\r
2091     }\r
2092         catch(exception& e) {\r
2093                 m->errorOut(e, "SffInfoCommand", "reverseOligo");\r
2094                 exit(1);\r
2095         }\r
2096 }\r
2097 \r
2098 //**********************************************************************************************************************\r
2099 \r
2100 \r
2101                                 \r
2102                                 \r