]> git.donarmstrong.com Git - mothur.git/blob - sffinfocommand.cpp
added oligos class. added check orient parameter to trim.flows, sffinfo, fastq.info...
[mothur.git] / sffinfocommand.cpp
1 /*\r
2  *  sffinfocommand.cpp\r
3  *  Mothur\r
4  *\r
5  *  Created by westcott on 7/7/10.\r
6  *  Copyright 2010 Schloss Lab. All rights reserved.\r
7  *\r
8  */\r
9 \r
10 #include "sffinfocommand.h"\r
11 #include "endiannessmacros.h"\r
12 #include "trimoligos.h"\r
13 #include "sequence.hpp"\r
14 #include "qualityscores.h"\r
15 \r
16 //**********************************************************************************************************************\r
17 vector<string> SffInfoCommand::setParameters(){ \r
18         try {           \r
19                 CommandParameter psff("sff", "InputTypes", "", "", "none", "none", "none","",false,false,true); parameters.push_back(psff);\r
20         CommandParameter poligos("oligos", "InputTypes", "", "", "oligosGroup", "none", "none","",false,false); parameters.push_back(poligos);\r
21         CommandParameter preorient("checkorient", "Boolean", "", "F", "", "", "","",false,false,true); parameters.push_back(preorient);\r
22         CommandParameter pgroup("group", "InputTypes", "", "", "oligosGroup", "none", "none","",false,false); parameters.push_back(pgroup);\r
23                 CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(paccnos);\r
24                 CommandParameter psfftxt("sfftxt", "String", "", "", "", "", "","",false,false); parameters.push_back(psfftxt);\r
25                 CommandParameter pflow("flow", "Boolean", "", "T", "", "", "","flow",false,false); parameters.push_back(pflow);\r
26                 CommandParameter ptrim("trim", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(ptrim);\r
27                 CommandParameter pfasta("fasta", "Boolean", "", "T", "", "", "","fasta",false,false); parameters.push_back(pfasta);\r
28                 CommandParameter pqfile("qfile", "Boolean", "", "T", "", "", "","qfile",false,false); parameters.push_back(pqfile);\r
29         CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ppdiffs);\r
30                 CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pbdiffs);\r
31         CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pldiffs);\r
32                 CommandParameter psdiffs("sdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(psdiffs);\r
33         CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ptdiffs);\r
34                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);\r
35                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);\r
36                 \r
37                 vector<string> myArray;\r
38                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }\r
39                 return myArray;\r
40         }\r
41         catch(exception& e) {\r
42                 m->errorOut(e, "SffInfoCommand", "setParameters");\r
43                 exit(1);\r
44         }\r
45 }\r
46 //**********************************************************************************************************************\r
47 string SffInfoCommand::getHelpString(){ \r
48         try {\r
49                 string helpString = "";\r
50                 helpString += "The sffinfo command reads a sff file and extracts the sequence data, or you can use it to parse a sfftxt file.\n";\r
51                 helpString += "The sffinfo command parameters are sff, fasta, qfile, accnos, flow, sfftxt, oligos, group, bdiffs, tdiffs, ldiffs, sdiffs, pdiffs, checkorient and trim. sff is required. \n";\r
52                 helpString += "The sff parameter allows you to enter the sff file you would like to extract data from.  You may enter multiple files by separating them by -'s.\n";\r
53                 helpString += "The fasta parameter allows you to indicate if you would like a fasta formatted file generated.  Default=True. \n";\r
54                 helpString += "The qfile parameter allows you to indicate if you would like a quality file generated.  Default=True. \n";\r
55         helpString += "The oligos parameter allows you to provide an oligos file to split your sff file into separate sff files by barcode. \n";\r
56         helpString += "The group parameter allows you to provide a group file to split your sff file into separate sff files by group. \n";\r
57         helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n";\r
58                 helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";\r
59                 helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";\r
60         helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n";\r
61                 helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n";\r
62         helpString += "The checkorient parameter will check look for the reverse compliment of the barcode or primer in the sequence. The default is false.\n";\r
63                 helpString += "The flow parameter allows you to indicate if you would like a flowgram file generated.  Default=True. \n";\r
64                 helpString += "The sfftxt parameter allows you to indicate if you would like a sff.txt file generated.  Default=False. \n";\r
65                 helpString += "If you want to parse an existing sfftxt file into flow, fasta and quality file, enter the file name using the sfftxt parameter. \n";\r
66                 helpString += "The trim parameter allows you to indicate if you would like a sequences and quality scores trimmed to the clipQualLeft and clipQualRight values.  Default=True. \n";\r
67                 helpString += "The accnos parameter allows you to provide a accnos file containing the names of the sequences you would like extracted. You may enter multiple files by separating them by -'s. \n";\r
68                 helpString += "Example sffinfo(sff=mySffFile.sff, trim=F).\n";\r
69                 helpString += "Note: No spaces between parameter labels (i.e. sff), '=' and parameters (i.e.yourSffFileName).\n";\r
70                 return helpString;\r
71         }\r
72         catch(exception& e) {\r
73                 m->errorOut(e, "SffInfoCommand", "getHelpString");\r
74                 exit(1);\r
75         }\r
76 }\r
77 \r
78 //**********************************************************************************************************************\r
79 string SffInfoCommand::getOutputPattern(string type) {\r
80     try {\r
81         string pattern = "";\r
82         \r
83         if (type == "fasta")            {   pattern =  "[filename],fasta-[filename],[tag],fasta";   }\r
84         else if (type == "flow")    {   pattern =  "[filename],flow";   }\r
85         else if (type == "sfftxt")        {   pattern =  "[filename],sff.txt";   }\r
86         else if (type == "sff")        {   pattern =  "[filename],[group],sff";   }\r
87         else if (type == "qfile")       {   pattern =  "[filename],qual-[filename],[tag],qual";   }\r
88         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }\r
89         \r
90         return pattern;\r
91     }\r
92     catch(exception& e) {\r
93         m->errorOut(e, "SffInfoCommand", "getOutputPattern");\r
94         exit(1);\r
95     }\r
96 }\r
97 //**********************************************************************************************************************\r
98 SffInfoCommand::SffInfoCommand(){       \r
99         try {\r
100                 abort = true; calledHelp = true; \r
101                 setParameters();\r
102                 vector<string> tempOutNames;\r
103                 outputTypes["fasta"] = tempOutNames;\r
104                 outputTypes["flow"] = tempOutNames;\r
105                 outputTypes["sfftxt"] = tempOutNames;\r
106                 outputTypes["qfile"] = tempOutNames;\r
107         outputTypes["sff"] = tempOutNames;\r
108         }\r
109         catch(exception& e) {\r
110                 m->errorOut(e, "SffInfoCommand", "SffInfoCommand");\r
111                 exit(1);\r
112         }\r
113 }\r
114 //**********************************************************************************************************************\r
115 \r
116 SffInfoCommand::SffInfoCommand(string option)  {\r
117         try {\r
118                 abort = false; calledHelp = false;   \r
119                 hasAccnos = false; hasOligos = false; hasGroup = false;\r
120         split = 1;\r
121                 \r
122                 //allow user to run help\r
123                 if(option == "help") { help(); abort = true; calledHelp = true; }\r
124                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}\r
125                 \r
126                 else {\r
127                         //valid paramters for this command\r
128                         vector<string> myArray = setParameters();\r
129                         \r
130                         OptionParser parser(option);\r
131                         map<string, string> parameters = parser.getParameters();\r
132                         \r
133                         ValidParameters validParameter;\r
134                         //check to make sure all parameters are valid for command\r
135                         for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { \r
136                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }\r
137                         }\r
138                         \r
139                         //initialize outputTypes\r
140                         vector<string> tempOutNames;\r
141                         outputTypes["fasta"] = tempOutNames;\r
142                         outputTypes["flow"] = tempOutNames;\r
143                         outputTypes["sfftxt"] = tempOutNames;\r
144                         outputTypes["qfile"] = tempOutNames;\r
145             outputTypes["sff"] = tempOutNames;\r
146                         \r
147                         //if the user changes the output directory command factory will send this info to us in the output parameter \r
148                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }\r
149                         \r
150                         //if the user changes the input directory command factory will send this info to us in the output parameter \r
151                         string inputDir = validParameter.validFile(parameters, "inputdir", false);        if (inputDir == "not found"){ inputDir = "";          }\r
152 \r
153                         sffFilename = validParameter.validFile(parameters, "sff", false);\r
154                         if (sffFilename == "not found") { sffFilename = "";  }\r
155                         else { \r
156                                 m->splitAtDash(sffFilename, filenames);\r
157                                 \r
158                                 //go through files and make sure they are good, if not, then disregard them\r
159                                 for (int i = 0; i < filenames.size(); i++) {\r
160                                         bool ignore = false;\r
161                                         if (filenames[i] == "current") { \r
162                                                 filenames[i] = m->getSFFFile(); \r
163                                                 if (filenames[i] != "") {  m->mothurOut("Using " + filenames[i] + " as input file for the sff parameter where you had given current."); m->mothurOutEndLine(); }\r
164                                                 else {  \r
165                                                         m->mothurOut("You have no current sfffile, ignoring current."); m->mothurOutEndLine(); ignore=true; \r
166                                                         //erase from file list\r
167                                                         filenames.erase(filenames.begin()+i);\r
168                                                         i--;\r
169                                                 }\r
170                                         }\r
171                                         \r
172                                         if (!ignore) {\r
173                                                 if (inputDir != "") {\r
174                                                         string path = m->hasPath(filenames[i]);\r
175                                                         //if the user has not given a path then, add inputdir. else leave path alone.\r
176                                                         if (path == "") {       filenames[i] = inputDir + filenames[i];         }\r
177                                                 }\r
178                 \r
179                                                 ifstream in;\r
180                                                 int ableToOpen = m->openInputFile(filenames[i], in, "noerror");\r
181                                         \r
182                                                 //if you can't open it, try default location\r
183                                                 if (ableToOpen == 1) {\r
184                                                         if (m->getDefaultPath() != "") { //default path is set\r
185                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(filenames[i]);\r
186                                                                 m->mothurOut("Unable to open " + filenames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();\r
187                                                                 ifstream in2;\r
188                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");\r
189                                                                 in2.close();\r
190                                                                 filenames[i] = tryPath;\r
191                                                         }\r
192                                                 }\r
193                                                 \r
194                                                 //if you can't open it, try default location\r
195                                                 if (ableToOpen == 1) {\r
196                                                         if (m->getOutputDir() != "") { //default path is set\r
197                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(filenames[i]);\r
198                                                                 m->mothurOut("Unable to open " + filenames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();\r
199                                                                 ifstream in2;\r
200                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");\r
201                                                                 in2.close();\r
202                                                                 filenames[i] = tryPath;\r
203                                                         }\r
204                                                 }\r
205                                                 \r
206                                                 in.close();\r
207                                                 \r
208                                                 if (ableToOpen == 1) { \r
209                                                         m->mothurOut("Unable to open " + filenames[i] + ". It will be disregarded."); m->mothurOutEndLine();\r
210                                                         //erase from file list\r
211                                                         filenames.erase(filenames.begin()+i);\r
212                                                         i--;\r
213                                                 }else { m->setSFFFile(filenames[i]); }\r
214                                         }\r
215                                 }\r
216                                 \r
217                                 //make sure there is at least one valid file left\r
218                                 if (filenames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; }\r
219                         }\r
220                         \r
221                         accnosName = validParameter.validFile(parameters, "accnos", false);\r
222                         if (accnosName == "not found") { accnosName = "";  }\r
223                         else { \r
224                                 hasAccnos = true;\r
225                                 m->splitAtDash(accnosName, accnosFileNames);\r
226                                 \r
227                                 //go through files and make sure they are good, if not, then disregard them\r
228                                 for (int i = 0; i < accnosFileNames.size(); i++) {\r
229                                         bool ignore = false;\r
230                                         if (accnosFileNames[i] == "current") { \r
231                                                 accnosFileNames[i] = m->getAccnosFile(); \r
232                                                 if (accnosFileNames[i] != "") {  m->mothurOut("Using " + accnosFileNames[i] + " as input file for the accnos parameter where you had given current."); m->mothurOutEndLine(); }\r
233                                                 else {  \r
234                                                         m->mothurOut("You have no current accnosfile, ignoring current."); m->mothurOutEndLine(); ignore=true; \r
235                                                         //erase from file list\r
236                                                         accnosFileNames.erase(accnosFileNames.begin()+i);\r
237                                                         i--;\r
238                                                 }\r
239                                         }\r
240                                         \r
241                                         if (!ignore) {\r
242                                         \r
243                                                 if (inputDir != "") {\r
244                                                         string path = m->hasPath(accnosFileNames[i]);\r
245                                                         //if the user has not given a path then, add inputdir. else leave path alone.\r
246                                                         if (path == "") {       accnosFileNames[i] = inputDir + accnosFileNames[i];             }\r
247                                                 }\r
248                 \r
249                                                 ifstream in;\r
250                                                 int ableToOpen = m->openInputFile(accnosFileNames[i], in, "noerror");\r
251                                         \r
252                                                 //if you can't open it, try default location\r
253                                                 if (ableToOpen == 1) {\r
254                                                         if (m->getDefaultPath() != "") { //default path is set\r
255                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(accnosFileNames[i]);\r
256                                                                 m->mothurOut("Unable to open " + accnosFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();\r
257                                                                 ifstream in2;\r
258                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");\r
259                                                                 in2.close();\r
260                                                                 accnosFileNames[i] = tryPath;\r
261                                                         }\r
262                                                 }\r
263                                                 //if you can't open it, try default location\r
264                                                 if (ableToOpen == 1) {\r
265                                                         if (m->getOutputDir() != "") { //default path is set\r
266                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(accnosFileNames[i]);\r
267                                                                 m->mothurOut("Unable to open " + accnosFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();\r
268                                                                 ifstream in2;\r
269                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");\r
270                                                                 in2.close();\r
271                                                                 accnosFileNames[i] = tryPath;\r
272                                                         }\r
273                                                 }\r
274                                                 in.close();\r
275                                                 \r
276                                                 if (ableToOpen == 1) { \r
277                                                         m->mothurOut("Unable to open " + accnosFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();\r
278                                                         //erase from file list\r
279                                                         accnosFileNames.erase(accnosFileNames.begin()+i);\r
280                                                         i--;\r
281                                                 }\r
282                                         }\r
283                                 }\r
284                                 \r
285                                 //make sure there is at least one valid file left\r
286                                 if (accnosFileNames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; }\r
287                         }\r
288             \r
289             oligosfile = validParameter.validFile(parameters, "oligos", false);\r
290                         if (oligosfile == "not found") { oligosfile = "";  }\r
291                         else { \r
292                                 hasOligos = true;\r
293                                 m->splitAtDash(oligosfile, oligosFileNames);\r
294                                 \r
295                                 //go through files and make sure they are good, if not, then disregard them\r
296                                 for (int i = 0; i < oligosFileNames.size(); i++) {\r
297                                         bool ignore = false;\r
298                                         if (oligosFileNames[i] == "current") { \r
299                                                 oligosFileNames[i] = m->getOligosFile(); \r
300                                                 if (oligosFileNames[i] != "") {  m->mothurOut("Using " + oligosFileNames[i] + " as input file for the oligos parameter where you had given current."); m->mothurOutEndLine(); }\r
301                                                 else {  \r
302                                                         m->mothurOut("You have no current oligosfile, ignoring current."); m->mothurOutEndLine(); ignore=true; \r
303                                                         //erase from file list\r
304                                                         oligosFileNames.erase(oligosFileNames.begin()+i);\r
305                                                         i--;\r
306                                                 }\r
307                                         }\r
308                                         \r
309                                         if (!ignore) {\r
310                         \r
311                                                 if (inputDir != "") {\r
312                                                         string path = m->hasPath(oligosFileNames[i]);\r
313                                                         //if the user has not given a path then, add inputdir. else leave path alone.\r
314                                                         if (path == "") {       oligosFileNames[i] = inputDir + oligosFileNames[i];             }\r
315                                                 }\r
316                         \r
317                                                 ifstream in;\r
318                                                 int ableToOpen = m->openInputFile(oligosFileNames[i], in, "noerror");\r
319                         \r
320                                                 //if you can't open it, try default location\r
321                                                 if (ableToOpen == 1) {\r
322                                                         if (m->getDefaultPath() != "") { //default path is set\r
323                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(oligosFileNames[i]);\r
324                                                                 m->mothurOut("Unable to open " + oligosFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();\r
325                                                                 ifstream in2;\r
326                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");\r
327                                                                 in2.close();\r
328                                                                 oligosFileNames[i] = tryPath;\r
329                                                         }\r
330                                                 }\r
331                                                 //if you can't open it, try default location\r
332                                                 if (ableToOpen == 1) {\r
333                                                         if (m->getOutputDir() != "") { //default path is set\r
334                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(oligosFileNames[i]);\r
335                                                                 m->mothurOut("Unable to open " + oligosFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();\r
336                                                                 ifstream in2;\r
337                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");\r
338                                                                 in2.close();\r
339                                                                 oligosFileNames[i] = tryPath;\r
340                                                         }\r
341                                                 }\r
342                                                 in.close();\r
343                                                 \r
344                                                 if (ableToOpen == 1) { \r
345                                                         m->mothurOut("Unable to open " + oligosFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();\r
346                                                         //erase from file list\r
347                                                         oligosFileNames.erase(oligosFileNames.begin()+i);\r
348                                                         i--;\r
349                                                 }\r
350                                         }\r
351                                 }\r
352                                 \r
353                                 //make sure there is at least one valid file left\r
354                                 if (oligosFileNames.size() == 0) { m->mothurOut("no valid oligos files."); m->mothurOutEndLine(); abort = true; }\r
355                         }\r
356             \r
357             groupfile = validParameter.validFile(parameters, "group", false);\r
358                         if (groupfile == "not found") { groupfile = "";  }\r
359                         else {\r
360                                 hasGroup = true;\r
361                                 m->splitAtDash(groupfile, groupFileNames);\r
362                                 \r
363                                 //go through files and make sure they are good, if not, then disregard them\r
364                                 for (int i = 0; i < groupFileNames.size(); i++) {\r
365                                         bool ignore = false;\r
366                                         if (groupFileNames[i] == "current") {\r
367                                                 groupFileNames[i] = m->getGroupFile();\r
368                                                 if (groupFileNames[i] != "") {  m->mothurOut("Using " + groupFileNames[i] + " as input file for the group parameter where you had given current."); m->mothurOutEndLine(); }\r
369                                                 else {\r
370                                                         m->mothurOut("You have no current group file, ignoring current."); m->mothurOutEndLine(); ignore=true;\r
371                                                         //erase from file list\r
372                                                         groupFileNames.erase(groupFileNames.begin()+i);\r
373                                                         i--;\r
374                                                 }\r
375                                         }\r
376                                         \r
377                                         if (!ignore) {\r
378                         \r
379                                                 if (inputDir != "") {\r
380                                                         string path = m->hasPath(groupFileNames[i]);\r
381                                                         //if the user has not given a path then, add inputdir. else leave path alone.\r
382                                                         if (path == "") {       groupFileNames[i] = inputDir + groupFileNames[i];               }\r
383                                                 }\r
384                         \r
385                                                 ifstream in;\r
386                                                 int ableToOpen = m->openInputFile(groupFileNames[i], in, "noerror");\r
387                         \r
388                                                 //if you can't open it, try default location\r
389                                                 if (ableToOpen == 1) {\r
390                                                         if (m->getDefaultPath() != "") { //default path is set\r
391                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(groupFileNames[i]);\r
392                                                                 m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();\r
393                                                                 ifstream in2;\r
394                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");\r
395                                                                 in2.close();\r
396                                                                 groupFileNames[i] = tryPath;\r
397                                                         }\r
398                                                 }\r
399                                                 //if you can't open it, try default location\r
400                                                 if (ableToOpen == 1) {\r
401                                                         if (m->getOutputDir() != "") { //default path is set\r
402                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(groupFileNames[i]);\r
403                                                                 m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();\r
404                                                                 ifstream in2;\r
405                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");\r
406                                                                 in2.close();\r
407                                                                 groupFileNames[i] = tryPath;\r
408                                                         }\r
409                                                 }\r
410                                                 in.close();\r
411                                                 \r
412                                                 if (ableToOpen == 1) {\r
413                                                         m->mothurOut("Unable to open " + groupFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();\r
414                                                         //erase from file list\r
415                                                         groupFileNames.erase(groupFileNames.begin()+i);\r
416                                                         i--;\r
417                                                 }\r
418                                         }\r
419                                 }\r
420                                 \r
421                                 //make sure there is at least one valid file left\r
422                                 if (groupFileNames.size() == 0) { m->mothurOut("no valid group files."); m->mothurOutEndLine(); abort = true; }\r
423                         }\r
424 \r
425                         if (hasGroup) {\r
426                 split = 2;\r
427                                 if (groupFileNames.size() != filenames.size()) { abort = true; m->mothurOut("If you provide a group file, you must have one for each sff file."); m->mothurOutEndLine(); }\r
428                         }\r
429             \r
430             if (hasOligos) {\r
431                 split = 2;\r
432                                 if (oligosFileNames.size() != filenames.size()) { abort = true; m->mothurOut("If you provide an oligos file, you must have one for each sff file."); m->mothurOutEndLine(); }\r
433                         }\r
434             \r
435             if (hasGroup && hasOligos) { m->mothurOut("You must enter ONLY ONE of the following: oligos or group."); m->mothurOutEndLine(); abort = true;}\r
436             \r
437                         if (hasAccnos) {\r
438                                 if (accnosFileNames.size() != filenames.size()) { abort = true; m->mothurOut("If you provide a accnos file, you must have one for each sff file."); m->mothurOutEndLine(); }\r
439                         }\r
440                         \r
441                         string temp = validParameter.validFile(parameters, "qfile", false);                     if (temp == "not found"){       temp = "T";                             }\r
442                         qual = m->isTrue(temp); \r
443                         \r
444                         temp = validParameter.validFile(parameters, "fasta", false);                            if (temp == "not found"){       temp = "T";                             }\r
445                         fasta = m->isTrue(temp); \r
446                         \r
447                         temp = validParameter.validFile(parameters, "flow", false);                                     if (temp == "not found"){       temp = "T";                             }\r
448                         flow = m->isTrue(temp); \r
449                         \r
450                         temp = validParameter.validFile(parameters, "trim", false);                                     if (temp == "not found"){       temp = "T";                             }\r
451                         trim = m->isTrue(temp); \r
452             \r
453             temp = validParameter.validFile(parameters, "bdiffs", false);               if (temp == "not found") { temp = "0"; }\r
454                         m->mothurConvert(temp, bdiffs);\r
455                         \r
456                         temp = validParameter.validFile(parameters, "pdiffs", false);           if (temp == "not found") { temp = "0"; }\r
457                         m->mothurConvert(temp, pdiffs);\r
458             \r
459             temp = validParameter.validFile(parameters, "ldiffs", false);               if (temp == "not found") { temp = "0"; }\r
460                         m->mothurConvert(temp, ldiffs);\r
461             \r
462             temp = validParameter.validFile(parameters, "sdiffs", false);               if (temp == "not found") { temp = "0"; }\r
463                         m->mothurConvert(temp, sdiffs);\r
464                         \r
465                         temp = validParameter.validFile(parameters, "tdiffs", false);           if (temp == "not found") { int tempTotal = pdiffs + bdiffs + ldiffs + sdiffs;  temp = toString(tempTotal); }\r
466                         m->mothurConvert(temp, tdiffs);\r
467                         \r
468                         if(tdiffs == 0){        tdiffs = bdiffs + pdiffs + ldiffs + sdiffs;     }\r
469             \r
470                         temp = validParameter.validFile(parameters, "sfftxt", false);                           \r
471                         if (temp == "not found")        {       temp = "F";      sfftxt = false; sfftxtFilename = "";           }\r
472                         else if (m->isTrue(temp))       {       sfftxt = true;          sfftxtFilename = "";                            }\r
473                         else {\r
474                                 //you are a filename\r
475                                 if (inputDir != "") {\r
476                                         map<string,string>::iterator it = parameters.find("sfftxt");\r
477                                         //user has given a template file\r
478                                         if(it != parameters.end()){ \r
479                                                 string path = m->hasPath(it->second);\r
480                                                 //if the user has not given a path then, add inputdir. else leave path alone.\r
481                                                 if (path == "") {       parameters["sfftxt"] = inputDir + it->second;           }\r
482                                         }\r
483                                 }\r
484                                 \r
485                                 sfftxtFilename = validParameter.validFile(parameters, "sfftxt", true);\r
486                                 if (sfftxtFilename == "not found") { sfftxtFilename = "";  }\r
487                                 else if (sfftxtFilename == "not open") { sfftxtFilename = "";  }\r
488                         }\r
489                         \r
490                         if ((sfftxtFilename == "") && (filenames.size() == 0)) {  \r
491                                 //if there is a current sff file, use it\r
492                                 string filename = m->getSFFFile(); \r
493                                 if (filename != "") { filenames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the sff parameter."); m->mothurOutEndLine(); }\r
494                                 else {  m->mothurOut("[ERROR]: you must provide a valid sff or sfftxt file."); m->mothurOutEndLine(); abort=true;  }\r
495                         }\r
496             \r
497             temp = validParameter.validFile(parameters, "checkorient", false);          if (temp == "not found") { temp = "F"; }\r
498                         reorient = m->isTrue(temp);\r
499             \r
500                 }\r
501         }\r
502         catch(exception& e) {\r
503                 m->errorOut(e, "SffInfoCommand", "SffInfoCommand");\r
504                 exit(1);\r
505         }\r
506 }\r
507 //**********************************************************************************************************************\r
508 int SffInfoCommand::execute(){\r
509         try {\r
510                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }\r
511                 \r
512                 for (int s = 0; s < filenames.size(); s++) {\r
513                         \r
514                         if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]);        } return 0; }\r
515                         \r
516                         int start = time(NULL);\r
517                         \r
518             filenames[s] = m->getFullPathName(filenames[s]);\r
519                         m->mothurOut("Extracting info from " + filenames[s] + " ..." ); m->mothurOutEndLine();\r
520                         \r
521                         string accnos = "";\r
522                         if (hasAccnos) { accnos = accnosFileNames[s]; }\r
523             \r
524             string oligos = "";\r
525             if (hasOligos) { oligos = oligosFileNames[s]; }\r
526             if (hasGroup) { oligos = groupFileNames[s]; }\r
527             \r
528                         int numReads = extractSffInfo(filenames[s], accnos, oligos);\r
529 \r
530                         m->mothurOut("It took " + toString(time(NULL) - start) + " secs to extract " + toString(numReads) + ".");\r
531                 }\r
532                 \r
533                 if (sfftxtFilename != "") {  parseSffTxt(); }\r
534                 \r
535                 if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]);        } return 0; }\r
536                 \r
537                 //set fasta file as new current fastafile\r
538                 string current = "";\r
539                 itTypes = outputTypes.find("fasta");\r
540                 if (itTypes != outputTypes.end()) {\r
541                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }\r
542                 }\r
543                 \r
544                 itTypes = outputTypes.find("qfile");\r
545                 if (itTypes != outputTypes.end()) {\r
546                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); }\r
547                 }\r
548                 \r
549                 itTypes = outputTypes.find("flow");\r
550                 if (itTypes != outputTypes.end()) {\r
551                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFlowFile(current); }\r
552                 }\r
553                 \r
554                 //report output filenames\r
555                 m->mothurOutEndLine();\r
556                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();\r
557                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }\r
558                 m->mothurOutEndLine();\r
559 \r
560                 return 0;\r
561         }\r
562         catch(exception& e) {\r
563                 m->errorOut(e, "SffInfoCommand", "execute");\r
564                 exit(1);\r
565         }\r
566 }\r
567 //**********************************************************************************************************************\r
568 int SffInfoCommand::extractSffInfo(string input, string accnos, string oligos){\r
569         try {\r
570         oligosObject = new Oligos();\r
571                 currentFileName = input;\r
572                 if (outputDir == "") {  outputDir += m->hasPath(input); }\r
573                 \r
574                 if (accnos != "")       {  readAccnosFile(accnos);  }\r
575                 else                            {       seqNames.clear();               }\r
576         \r
577         TrimOligos* trimOligos = NULL; TrimOligos* rtrimOligos = NULL;\r
578         if (hasOligos)   {\r
579             readOligos(oligos);    split = 2;\r
580             if (m->control_pressed) { delete oligosObject; return 0; }\r
581             trimOligos = new TrimOligos(pdiffs, bdiffs, ldiffs, sdiffs, oligosObject->getPrimers(), oligosObject->getBarcodes(), oligosObject->getReversePrimers(), oligosObject->getLinkers(), oligosObject->getSpacers());  numFPrimers = oligosObject->getPrimers().size(); numBarcodes = oligosObject->getBarcodes().size();\r
582             if (reorient) {\r
583                 rtrimOligos = new TrimOligos(pdiffs, bdiffs, 0, 0, oligosObject->getReorientedPairedPrimers(), oligosObject->getReorientedPairedBarcodes()); numBarcodes = oligosObject->getReorientedPairedBarcodes().size();\r
584             }\r
585         }\r
586         if (hasGroup)    {   readGroup(oligos);     split = 2;      }\r
587         \r
588                 ofstream outSfftxt, outFasta, outQual, outFlow;\r
589                 string outFastaFileName, outQualFileName;\r
590         string rootName = outputDir + m->getRootName(m->getSimpleName(input));\r
591         if(rootName.find_last_of(".") == rootName.npos){ rootName += "."; }\r
592         \r
593         map<string, string> variables; \r
594                 variables["[filename]"] = rootName;\r
595                 string sfftxtFileName = getOutputFileName("sfftxt",variables);\r
596                 string outFlowFileName = getOutputFileName("flow",variables);\r
597                 if (!trim) { variables["[tag]"] = "raw"; }\r
598                 outFastaFileName = getOutputFileName("fasta",variables);\r
599         outQualFileName = getOutputFileName("qfile",variables);\r
600         \r
601                 if (sfftxt) { m->openOutputFile(sfftxtFileName, outSfftxt); outSfftxt.setf(ios::fixed, ios::floatfield); outSfftxt.setf(ios::showpoint);  outputNames.push_back(sfftxtFileName);  outputTypes["sfftxt"].push_back(sfftxtFileName); }\r
602                 if (fasta)      { m->openOutputFile(outFastaFileName, outFasta);        outputNames.push_back(outFastaFileName); outputTypes["fasta"].push_back(outFastaFileName); }\r
603                 if (qual)       { m->openOutputFile(outQualFileName, outQual);          outputNames.push_back(outQualFileName); outputTypes["qfile"].push_back(outQualFileName);  }\r
604                 if (flow)       { m->openOutputFile(outFlowFileName, outFlow);          outputNames.push_back(outFlowFileName);  outFlow.setf(ios::fixed, ios::floatfield); outFlow.setf(ios::showpoint); outputTypes["flow"].push_back(outFlowFileName);  }\r
605                 \r
606                 ifstream in;\r
607                 m->openInputFileBinary(input, in);\r
608                 \r
609                 CommonHeader header;\r
610                 readCommonHeader(in, header);\r
611         \r
612                 int count = 0;\r
613                 \r
614                 //check magic number and version\r
615                 if (header.magicNumber != 779314790) { m->mothurOut("Magic Number is not correct, not a valid .sff file"); m->mothurOutEndLine(); delete oligosObject; if (hasOligos)   { delete trimOligos; if (reorient) { delete  rtrimOligos; } } return count; }\r
616                 if (header.version != "0001") { m->mothurOut("Version is not supported, only support version 0001."); m->mothurOutEndLine(); delete oligosObject; if (hasOligos)   { delete trimOligos; if (reorient) { delete  rtrimOligos; } } return count; }\r
617         \r
618                 //print common header\r
619                 if (sfftxt) {   printCommonHeader(outSfftxt, header);           }\r
620                 if (flow)       {       outFlow << header.numFlowsPerRead << endl;      }\r
621                 \r
622                 //read through the sff file\r
623                 while (!in.eof()) {\r
624                         \r
625                         bool print = true;\r
626                                                 \r
627                         //read data\r
628                         seqRead read;  Header readheader;\r
629             readSeqData(in, read, header.numFlowsPerRead, readheader, trimOligos, rtrimOligos);\r
630             \r
631             bool okay = sanityCheck(readheader, read);\r
632             if (!okay) { break; }\r
633             \r
634                         //if you have provided an accosfile and this seq is not in it, then dont print\r
635                         if (seqNames.size() != 0) {   if (seqNames.count(readheader.name) == 0) { print = false; }  }\r
636                         \r
637                         //print \r
638                         if (print) {\r
639                                 if (sfftxt) { printHeader(outSfftxt, readheader); printSffTxtSeqData(outSfftxt, read, readheader); }\r
640                                 if (fasta)      {       printFastaSeqData(outFasta, read, readheader);  }\r
641                                 if (qual)       {       printQualSeqData(outQual, read, readheader);    }\r
642                                 if (flow)       {       printFlowSeqData(outFlow, read, readheader);    }\r
643                         }\r
644                         \r
645                         count++;\r
646             \r
647                         //report progress\r
648                         if((count+1) % 10000 == 0){     m->mothurOut(toString(count+1)); m->mothurOutEndLine();         }\r
649                 \r
650                         if (m->control_pressed) { count = 0; break;   }\r
651                         \r
652                         if (count >= header.numReads) { break; }\r
653                 }\r
654                 \r
655                 //report progress\r
656                 if (!m->control_pressed) {   if((count) % 10000 != 0){  m->mothurOut(toString(count)); m->mothurOutEndLine();           }  }\r
657                 \r
658                 in.close();\r
659                 \r
660                 if (sfftxt) {  outSfftxt.close();       }\r
661                 if (fasta)      {  outFasta.close();    }\r
662                 if (qual)       {  outQual.close();             }\r
663                 if (flow)       {  outFlow.close();             }\r
664                 \r
665         if (split > 1) {\r
666             //create new common headers for each file with the correct number of reads\r
667             adjustCommonHeader(header);\r
668             \r
669             if (hasGroup) { delete groupMap; }\r
670             \r
671             //cout << "here" << endl;\r
672                         map<string, string>::iterator it;\r
673                         set<string> namesToRemove;\r
674                         for(int i=0;i<filehandles.size();i++){\r
675                                 for(int j=0;j<filehandles[0].size();j++){\r
676                     //cout << i << '\t' << '\t' << j  << '\t' << filehandles[i][j] << endl;\r
677                                         if (filehandles[i][j] != "") {\r
678                                                 if (namesToRemove.count(filehandles[i][j]) == 0) {\r
679                                                         if(m->isBlank(filehandles[i][j])){\r
680                                 //cout << i << '\t' << '\t' << j  << '\t' << filehandles[i][j] << " is blank removing" << endl;\r
681                                                                 m->mothurRemove(filehandles[i][j]);\r
682                                 m->mothurRemove(filehandlesHeaders[i][j]);\r
683                                                                 namesToRemove.insert(filehandles[i][j]);\r
684                             }\r
685                                                 }\r
686                                         }\r
687                                 }\r
688                         }\r
689             //cout << "here2" << endl;\r
690             //append new header to reads\r
691             for (int i = 0; i < filehandles.size(); i++) {\r
692                 for (int j = 0; j < filehandles[i].size(); j++) {\r
693                     if (filehandles[i][j] != "") {\r
694                         m->appendBinaryFiles(filehandles[i][j], filehandlesHeaders[i][j]);\r
695                         m->renameFile(filehandlesHeaders[i][j], filehandles[i][j]);\r
696                         m->mothurRemove(filehandlesHeaders[i][j]);\r
697                         //cout << i << '\t' << '\t' << j  << '\t' << filehandles[i][j] << " done appending headers and removing " << filehandlesHeaders[i][j] << endl;\r
698                         if (numSplitReads[i][j] == 0) { m->mothurRemove(filehandles[i][j]); }\r
699                     }\r
700                 }\r
701             }\r
702                         //cout << "here3" << endl;\r
703                         //remove names for outputFileNames, just cleans up the output\r
704                         for(int i = 0; i < outputNames.size(); i++) { \r
705                 if (namesToRemove.count(outputNames[i]) != 0) {\r
706                     //cout << "erasing " << i << '\t' << outputNames[i] << endl;\r
707                     outputNames.erase(outputNames.begin()+i);\r
708                     i--;\r
709                 } \r
710             }\r
711             //cout << "here4" << endl;\r
712             if(m->isBlank(noMatchFile)){  m->mothurRemove(noMatchFile); }\r
713             else { outputNames.push_back(noMatchFile); outputTypes["sff"].push_back(noMatchFile); }\r
714         }\r
715         \r
716         delete oligosObject;\r
717         if (hasOligos)   { delete trimOligos; if (reorient) { delete  rtrimOligos; } }\r
718         \r
719                 return count;\r
720         }\r
721         catch(exception& e) {\r
722                 m->errorOut(e, "SffInfoCommand", "extractSffInfo");\r
723                 exit(1);\r
724         }\r
725 }\r
726 //**********************************************************************************************************************\r
727 int SffInfoCommand::readCommonHeader(ifstream& in, CommonHeader& header){\r
728         try {\r
729         \r
730                 if (!in.eof()) {\r
731 \r
732                         //read magic number\r
733                         char buffer[4];\r
734                         in.read(buffer, 4);\r
735                         header.magicNumber = be_int4(*(unsigned int *)(&buffer));\r
736             \r
737                         //read version\r
738                         char buffer9[4];\r
739                         in.read(buffer9, 4);\r
740                         header.version = "";\r
741                         for (int i = 0; i < 4; i++) {  header.version += toString((int)(buffer9[i]));  }\r
742     \r
743                         //read offset\r
744                         char buffer2 [8];\r
745                         in.read(buffer2, 8);\r
746                         header.indexOffset =  be_int8(*(unsigned long long *)(&buffer2));\r
747                         \r
748                         //read index length\r
749                         char buffer3 [4];\r
750                         in.read(buffer3, 4);\r
751                         header.indexLength =  be_int4(*(unsigned int *)(&buffer3));\r
752             \r
753                         //read num reads\r
754                         char buffer4 [4];\r
755                         in.read(buffer4, 4);\r
756                         header.numReads =  be_int4(*(unsigned int *)(&buffer4));\r
757             \r
758             if (m->debug) { m->mothurOut("[DEBUG]: numReads = " + toString(header.numReads) + "\n"); }\r
759                                 \r
760                         //read header length\r
761                         char buffer5 [2];\r
762                         in.read(buffer5, 2);\r
763                         header.headerLength =  be_int2(*(unsigned short *)(&buffer5));\r
764                                         \r
765                         //read key length\r
766                         char buffer6 [2];\r
767                         in.read(buffer6, 2);\r
768                         header.keyLength = be_int2(*(unsigned short *)(&buffer6));\r
769                         \r
770                         //read number of flow reads\r
771                         char buffer7 [2];\r
772                         in.read(buffer7, 2);\r
773                         header.numFlowsPerRead =  be_int2(*(unsigned short *)(&buffer7));\r
774                                 \r
775                         //read format code\r
776                         char buffer8 [1];\r
777                         in.read(buffer8, 1);\r
778                         header.flogramFormatCode = (int)(buffer8[0]);\r
779                         \r
780                         //read flow chars\r
781                         char* tempBuffer = new char[header.numFlowsPerRead];\r
782                         in.read(&(*tempBuffer), header.numFlowsPerRead); \r
783                         header.flowChars = tempBuffer;\r
784                         if (header.flowChars.length() > header.numFlowsPerRead) { header.flowChars = header.flowChars.substr(0, header.numFlowsPerRead);  }\r
785                         delete[] tempBuffer;\r
786                         \r
787                         //read key\r
788                         char* tempBuffer2 = new char[header.keyLength];\r
789                         in.read(&(*tempBuffer2), header.keyLength);\r
790                         header.keySequence = tempBuffer2;\r
791                         if (header.keySequence.length() > header.keyLength) { header.keySequence = header.keySequence.substr(0, header.keyLength);  }\r
792                         delete[] tempBuffer2;\r
793                         \r
794                         /* Pad to 8 chars */\r
795                         unsigned long long spotInFile = in.tellg();\r
796                         unsigned long long spot = (spotInFile + 7)& ~7;  // ~ inverts\r
797                         in.seekg(spot);\r
798             \r
799         }else{\r
800                         m->mothurOut("Error reading sff common header."); m->mothurOutEndLine();\r
801                 }\r
802         \r
803                 return 0;\r
804         \r
805         }\r
806         catch(exception& e) {\r
807                 m->errorOut(e, "SffInfoCommand", "readCommonHeader");\r
808                 exit(1);\r
809         }\r
810 }\r
811 //**********************************************************************************************************************\r
812 int SffInfoCommand::adjustCommonHeader(CommonHeader header){\r
813         try {\r
814         string endian = m->findEdianness();\r
815         char* mybuffer = new char[4];\r
816         ifstream in;\r
817         m->openInputFileBinary(currentFileName, in);\r
818         \r
819         ofstream outNoMatchHeader;\r
820         string tempNoHeader = "tempNoMatchHeader";\r
821         m->openOutputFileBinary(tempNoHeader, outNoMatchHeader);\r
822         \r
823         //magic number\r
824         in.read(mybuffer,4);\r
825         for (int i = 0; i < filehandlesHeaders.size(); i++) {  \r
826             for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
827                 ofstream out;\r
828                 m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);\r
829                 out.write(mybuffer, in.gcount());\r
830                 out.close();\r
831             }\r
832         }\r
833         outNoMatchHeader.write(mybuffer, in.gcount());\r
834         delete[] mybuffer;\r
835         \r
836         //version\r
837         mybuffer = new char[4];\r
838         in.read(mybuffer,4);\r
839         for (int i = 0; i < filehandlesHeaders.size(); i++) {  \r
840             for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
841                 ofstream out;\r
842                 m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);\r
843                 out.write(mybuffer, in.gcount());\r
844                 out.close();\r
845             }\r
846         }\r
847         outNoMatchHeader.write(mybuffer, in.gcount());\r
848         delete[] mybuffer;\r
849         \r
850         //offset\r
851         mybuffer = new char[8];\r
852         in.read(mybuffer,8);\r
853         unsigned long long offset = 0;\r
854         char* thisbuffer = new char[8];\r
855         thisbuffer[0] = (offset >> 56) & 0xFF;\r
856         thisbuffer[1] = (offset >> 48) & 0xFF;\r
857         thisbuffer[2] = (offset >> 40) & 0xFF;\r
858         thisbuffer[3] = (offset >> 32) & 0xFF;\r
859         thisbuffer[4] = (offset >> 24) & 0xFF;\r
860         thisbuffer[5] = (offset >> 16) & 0xFF;\r
861         thisbuffer[6] = (offset >> 8) & 0xFF;\r
862         thisbuffer[7] = offset & 0xFF;\r
863         for (int i = 0; i < filehandlesHeaders.size(); i++) {\r
864             for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
865                 ofstream out;\r
866                 m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);\r
867                 out.write(thisbuffer, 8);\r
868                 out.close();\r
869             }\r
870         }\r
871         outNoMatchHeader.write(thisbuffer, 8);\r
872         delete[] thisbuffer;\r
873         delete[] mybuffer;\r
874             \r
875                         \r
876         //read index length\r
877                 mybuffer = new char[4];\r
878         in.read(mybuffer,4);\r
879         offset = 0;\r
880         char* thisbuffer2 = new char[4];\r
881         thisbuffer2[0] = (offset >> 24) & 0xFF;\r
882         thisbuffer2[1] = (offset >> 16) & 0xFF;\r
883         thisbuffer2[2] = (offset >> 8) & 0xFF;\r
884         thisbuffer2[3] = offset & 0xFF;\r
885         for (int i = 0; i < filehandlesHeaders.size(); i++) {\r
886             for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
887                 ofstream out;\r
888                 m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);\r
889                 out.write(thisbuffer2, 4);\r
890                 out.close();\r
891             }\r
892         }\r
893         outNoMatchHeader.write(thisbuffer2, 4);\r
894         delete[] thisbuffer2;\r
895         delete[] mybuffer;\r
896                 \r
897         //change num reads\r
898         mybuffer = new char[4];\r
899         in.read(mybuffer,4);\r
900         delete[] mybuffer;\r
901         for (int i = 0; i < filehandlesHeaders.size(); i++) {  \r
902             for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
903                 char* thisbuffer = new char[4];\r
904                 if (endian == "BIG_ENDIAN") {\r
905                     thisbuffer[0] = (numSplitReads[i][j] >> 24) & 0xFF;\r
906                     thisbuffer[1] = (numSplitReads[i][j] >> 16) & 0xFF;\r
907                     thisbuffer[2] = (numSplitReads[i][j] >> 8) & 0xFF;\r
908                     thisbuffer[3] = numSplitReads[i][j] & 0xFF;\r
909                 }else {\r
910                     thisbuffer[0] = numSplitReads[i][j] & 0xFF;\r
911                     thisbuffer[1] = (numSplitReads[i][j] >> 8) & 0xFF;\r
912                     thisbuffer[2] = (numSplitReads[i][j] >> 16) & 0xFF;\r
913                     thisbuffer[3] = (numSplitReads[i][j] >> 24) & 0xFF;\r
914                  }\r
915                 ofstream out;\r
916                 m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);\r
917                 out.write(thisbuffer, 4);\r
918                 out.close();\r
919                 delete[] thisbuffer;\r
920             }\r
921         }\r
922         char* thisbuffer3 = new char[4];\r
923         if (endian == "BIG_ENDIAN") {\r
924             thisbuffer3[0] = (numNoMatch >> 24) & 0xFF;\r
925             thisbuffer3[1] = (numNoMatch >> 16) & 0xFF;\r
926             thisbuffer3[2] = (numNoMatch >> 8) & 0xFF;\r
927             thisbuffer3[3] = numNoMatch & 0xFF;\r
928         }else {\r
929             thisbuffer3[0] = numNoMatch & 0xFF;\r
930             thisbuffer3[1] = (numNoMatch >> 8) & 0xFF;\r
931             thisbuffer3[2] = (numNoMatch >> 16) & 0xFF;\r
932             thisbuffer3[3] = (numNoMatch >> 24) & 0xFF;\r
933         }\r
934         outNoMatchHeader.write(thisbuffer3, 4);\r
935         delete[] thisbuffer3;\r
936         \r
937         \r
938         //read header length\r
939         mybuffer = new char[2];\r
940         in.read(mybuffer,2);\r
941         for (int i = 0; i < filehandlesHeaders.size(); i++) {  \r
942             for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
943                 ofstream out;\r
944                 m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);\r
945                 out.write(mybuffer, in.gcount());\r
946                 out.close();\r
947             }\r
948         }\r
949         outNoMatchHeader.write(mybuffer, in.gcount());\r
950         delete[] mybuffer;\r
951             \r
952         //read key length\r
953         mybuffer = new char[2];\r
954         in.read(mybuffer,2);\r
955         for (int i = 0; i < filehandlesHeaders.size(); i++) {  \r
956             for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
957                 ofstream out;\r
958                 m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);\r
959                 out.write(mybuffer, in.gcount());\r
960                 out.close();\r
961             }\r
962         }\r
963         outNoMatchHeader.write(mybuffer, in.gcount());\r
964         delete[] mybuffer;\r
965                         \r
966         //read number of flow reads\r
967         mybuffer = new char[2];\r
968         in.read(mybuffer,2);\r
969         for (int i = 0; i < filehandlesHeaders.size(); i++) {  \r
970             for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
971                 ofstream out;\r
972                 m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);\r
973                 out.write(mybuffer, in.gcount());\r
974                 out.close();\r
975             }\r
976         }\r
977         outNoMatchHeader.write(mybuffer, in.gcount());\r
978         delete[] mybuffer;\r
979             \r
980         //read format code\r
981         mybuffer = new char[1];\r
982         in.read(mybuffer,1);\r
983         for (int i = 0; i < filehandlesHeaders.size(); i++) {  \r
984             for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
985                 ofstream out;\r
986                 m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);\r
987                 out.write(mybuffer, in.gcount());\r
988                 out.close();\r
989             }\r
990         }\r
991         outNoMatchHeader.write(mybuffer, in.gcount());\r
992         delete[] mybuffer;\r
993                         \r
994         //read flow chars\r
995         mybuffer = new char[header.numFlowsPerRead];\r
996         in.read(mybuffer,header.numFlowsPerRead);\r
997         for (int i = 0; i < filehandlesHeaders.size(); i++) {  \r
998             for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
999                 ofstream out;\r
1000                 m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);\r
1001                 out.write(mybuffer, in.gcount());\r
1002                 out.close();\r
1003             }\r
1004         }\r
1005         outNoMatchHeader.write(mybuffer, in.gcount());\r
1006         delete[] mybuffer;\r
1007                         \r
1008         //read key\r
1009         mybuffer = new char[header.keyLength];\r
1010         in.read(mybuffer,header.keyLength);\r
1011         for (int i = 0; i < filehandlesHeaders.size(); i++) {  \r
1012             for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
1013                 ofstream out;\r
1014                 m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);\r
1015                 out.write(mybuffer, in.gcount());\r
1016                 out.close();\r
1017             }\r
1018         }\r
1019         outNoMatchHeader.write(mybuffer, in.gcount());\r
1020         delete[] mybuffer;\r
1021         \r
1022                         \r
1023         /* Pad to 8 chars */\r
1024         unsigned long long spotInFile = in.tellg();\r
1025         unsigned long long spot = (spotInFile + 7)& ~7;  // ~ inverts\r
1026         in.seekg(spot);\r
1027         \r
1028         mybuffer = new char[spot-spotInFile];\r
1029         for (int i = 0; i < filehandlesHeaders.size(); i++) { \r
1030             for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
1031                 ofstream out;\r
1032                 m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);\r
1033                 out.write(mybuffer, spot-spotInFile);\r
1034                 out.close();\r
1035             }\r
1036         }\r
1037         outNoMatchHeader.write(mybuffer, spot-spotInFile);\r
1038         outNoMatchHeader.close();\r
1039         delete[] mybuffer;\r
1040         in.close();\r
1041         \r
1042         m->appendBinaryFiles(noMatchFile, tempNoHeader);\r
1043         m->renameFile(tempNoHeader, noMatchFile);\r
1044         m->mothurRemove(tempNoHeader);\r
1045         \r
1046                 return 0;\r
1047         \r
1048         }\r
1049         catch(exception& e) {\r
1050                 m->errorOut(e, "SffInfoCommand", "adjustCommonHeader");\r
1051                 exit(1);\r
1052         }\r
1053 }\r
1054 //**********************************************************************************************************************\r
1055 bool SffInfoCommand::readSeqData(ifstream& in, seqRead& read, int numFlowReads, Header& header, TrimOligos*& trimOligos, TrimOligos*& rtrimOligos){\r
1056         try {\r
1057         unsigned long long startSpotInFile = in.tellg();\r
1058                 if (!in.eof()) {\r
1059             \r
1060             /*****************************************/\r
1061             //read header\r
1062             \r
1063             //read header length\r
1064                         char buffer [2];\r
1065                         in.read(buffer, 2); \r
1066                         header.headerLength = be_int2(*(unsigned short *)(&buffer));\r
1067             \r
1068                         //read name length\r
1069                         char buffer2 [2];\r
1070                         in.read(buffer2, 2);\r
1071                         header.nameLength = be_int2(*(unsigned short *)(&buffer2));\r
1072             \r
1073                         //read num bases\r
1074                         char buffer3 [4];\r
1075                         in.read(buffer3, 4);\r
1076                         header.numBases =  be_int4(*(unsigned int *)(&buffer3));\r
1077             \r
1078                         \r
1079                         //read clip qual left\r
1080                         char buffer4 [2];\r
1081                         in.read(buffer4, 2);\r
1082                         header.clipQualLeft =  be_int2(*(unsigned short *)(&buffer4));\r
1083                         header.clipQualLeft = 5;\r
1084             \r
1085                         \r
1086                         //read clip qual right\r
1087                         char buffer5 [2];\r
1088                         in.read(buffer5, 2);\r
1089                         header.clipQualRight =  be_int2(*(unsigned short *)(&buffer5));\r
1090            \r
1091             \r
1092                         //read clipAdapterLeft\r
1093                         char buffer6 [2];\r
1094                         in.read(buffer6, 2);\r
1095                         header.clipAdapterLeft = be_int2(*(unsigned short *)(&buffer6));\r
1096             \r
1097             \r
1098                         //read clipAdapterRight\r
1099                         char buffer7 [2];\r
1100                         in.read(buffer7, 2);\r
1101                         header.clipAdapterRight = be_int2(*(unsigned short *)(&buffer7));\r
1102             \r
1103             \r
1104                         //read name\r
1105                         char* tempBuffer = new char[header.nameLength];\r
1106                         in.read(&(*tempBuffer), header.nameLength);\r
1107                         header.name = tempBuffer;\r
1108                         if (header.name.length() > header.nameLength) { header.name = header.name.substr(0, header.nameLength);  }\r
1109             \r
1110                         delete[] tempBuffer;\r
1111                         \r
1112                         //extract info from name\r
1113                         decodeName(header.timestamp, header.region, header.xy, header.name);\r
1114                         \r
1115                         /* Pad to 8 chars */\r
1116                         unsigned long long spotInFile = in.tellg();\r
1117                         unsigned long long spot = (spotInFile + 7)& ~7;\r
1118                         in.seekg(spot);\r
1119 \r
1120             /*****************************************/\r
1121             //sequence read \r
1122             \r
1123                         //read flowgram\r
1124                         read.flowgram.resize(numFlowReads);\r
1125                         for (int i = 0; i < numFlowReads; i++) {  \r
1126                                 char buffer [2];\r
1127                                 in.read(buffer, 2);\r
1128                                 read.flowgram[i] = be_int2(*(unsigned short *)(&buffer));\r
1129                         }\r
1130             \r
1131                         //read flowIndex\r
1132                         read.flowIndex.resize(header.numBases);\r
1133                         for (int i = 0; i < header.numBases; i++) {  \r
1134                                 char temp[1];\r
1135                                 in.read(temp, 1);\r
1136                                 read.flowIndex[i] = be_int1(*(unsigned char *)(&temp));\r
1137                         }\r
1138         \r
1139                         //read bases\r
1140                         char* tempBuffer6 = new char[header.numBases];\r
1141                         in.read(&(*tempBuffer6), header.numBases);\r
1142                         read.bases = tempBuffer6;\r
1143                         if (read.bases.length() > header.numBases) { read.bases = read.bases.substr(0, header.numBases);  }\r
1144                         delete[] tempBuffer6;\r
1145 \r
1146                         //read qual scores\r
1147                         read.qualScores.resize(header.numBases);\r
1148                         for (int i = 0; i < header.numBases; i++) {  \r
1149                                 char temp[1];\r
1150                                 in.read(temp, 1);\r
1151                                 read.qualScores[i] = be_int1(*(unsigned char *)(&temp));\r
1152                         }\r
1153         \r
1154                         /* Pad to 8 chars */\r
1155                         spotInFile = in.tellg();\r
1156                         spot = (spotInFile + 7)& ~7;\r
1157                         in.seekg(spot);\r
1158             \r
1159             if (split > 1) { \r
1160                \r
1161                 int barcodeIndex, primerIndex, trashCodeLength;\r
1162                 \r
1163                 if (hasOligos)      {  trashCodeLength = findGroup(header, read, barcodeIndex, primerIndex, trimOligos, rtrimOligos);                }\r
1164                 else if (hasGroup)  {  trashCodeLength = findGroup(header, read, barcodeIndex, primerIndex, "groupMode");   }\r
1165                 else {  m->mothurOut("[ERROR]: uh oh, we shouldn't be here...\n"); }\r
1166 \r
1167                 char * mybuffer;\r
1168                 mybuffer = new char [spot-startSpotInFile];\r
1169                 \r
1170                 ifstream in2;\r
1171                 m->openInputFileBinary(currentFileName, in2);\r
1172                 in2.seekg(startSpotInFile);\r
1173                 in2.read(mybuffer,spot-startSpotInFile);\r
1174                 \r
1175                 \r
1176                 if(trashCodeLength == 0){\r
1177                     ofstream out;\r
1178                     m->openOutputFileBinaryAppend(filehandles[barcodeIndex][primerIndex], out);\r
1179                     out.write(mybuffer, in2.gcount());\r
1180                     out.close();\r
1181                     numSplitReads[barcodeIndex][primerIndex]++;\r
1182                                 }\r
1183                                 else{\r
1184                                         ofstream out;\r
1185                     m->openOutputFileBinaryAppend(noMatchFile, out);\r
1186                     out.write(mybuffer, in2.gcount());\r
1187                     out.close();\r
1188                     numNoMatch++;\r
1189                                 }\r
1190                                 delete[] mybuffer;\r
1191                 in2.close();\r
1192         }    \r
1193             \r
1194                 }else{\r
1195                         m->mothurOut("Error reading."); m->mothurOutEndLine();\r
1196                 }\r
1197         \r
1198         if (in.eof()) {  return true; }\r
1199         \r
1200                 return false;\r
1201         }\r
1202         catch(exception& e) {\r
1203                 m->errorOut(e, "SffInfoCommand", "readSeqData");\r
1204                 exit(1);\r
1205         }\r
1206 }\r
1207 //**********************************************************************************************************************\r
1208 int SffInfoCommand::findGroup(Header header, seqRead read, int& barcode, int& primer, TrimOligos*& trimOligos, TrimOligos*& rtrimOligos) {\r
1209         try {\r
1210         \r
1211         int success = 1;\r
1212         string trashCode = "";\r
1213         int currentSeqsDiffs = 0;\r
1214         \r
1215         string seq = read.bases;\r
1216         \r
1217         if (trim) {\r
1218             if(header.clipQualRight < header.clipQualLeft){\r
1219                 if (header.clipQualRight == 0) { //don't trim right\r
1220                     seq = seq.substr(header.clipQualLeft-1);\r
1221                 }else {\r
1222                     seq = "NNNN";\r
1223                 }\r
1224             }\r
1225             else if((header.clipQualRight != 0) && ((header.clipQualRight-header.clipQualLeft) >= 0)){\r
1226                 seq = seq.substr((header.clipQualLeft-1), (header.clipQualRight-header.clipQualLeft));\r
1227             }\r
1228             else {\r
1229                 seq = seq.substr(header.clipQualLeft-1);\r
1230             }\r
1231         }else{\r
1232             //if you wanted the sfftxt then you already converted the bases to the right case\r
1233             if (!sfftxt) {\r
1234                 int endValue = header.clipQualRight;\r
1235                 //make the bases you want to clip lowercase and the bases you want to keep upper case\r
1236                 if(endValue == 0){      endValue = seq.length();        }\r
1237                 for (int i = 0; i < (header.clipQualLeft-1); i++) { seq[i] = tolower(seq[i]);  }\r
1238                 for (int i = (header.clipQualLeft-1); i < (endValue-1); i++)  {   seq[i] = toupper(seq[i]);  }\r
1239                 for (int i = (endValue-1); i < seq.length(); i++) {   seq[i] = tolower(seq[i]);  }\r
1240             }\r
1241         }\r
1242         \r
1243         Sequence currSeq(header.name, seq);\r
1244         QualityScores currQual;\r
1245         \r
1246         //for reorient\r
1247         Sequence savedSeq(currSeq.getName(), currSeq.getAligned());\r
1248         QualityScores savedQual(currQual.getName(), currQual.getScores());\r
1249         \r
1250         if(numLinkers != 0){\r
1251             success = trimOligos->stripLinker(currSeq, currQual);\r
1252             if(success > ldiffs)                {       trashCode += 'k';       }\r
1253             else{ currentSeqsDiffs += success;  }\r
1254             \r
1255         }\r
1256         \r
1257         if(numBarcodes != 0){\r
1258             success = trimOligos->stripBarcode(currSeq, currQual, barcode);\r
1259             if(success > bdiffs)                {       trashCode += 'b';       }\r
1260             else{ currentSeqsDiffs += success;  }\r
1261         }\r
1262         \r
1263         if(numSpacers != 0){\r
1264             success = trimOligos->stripSpacer(currSeq, currQual);\r
1265             if(success > sdiffs)                {       trashCode += 's';       }\r
1266             else{ currentSeqsDiffs += success;  }\r
1267             \r
1268         }\r
1269         \r
1270         if(numFPrimers != 0){\r
1271             success = trimOligos->stripForward(currSeq, currQual, primer, true);\r
1272             if(success > pdiffs)                {       trashCode += 'f';       }\r
1273             else{ currentSeqsDiffs += success;  }\r
1274         }\r
1275         \r
1276         if (currentSeqsDiffs > tdiffs)  {       trashCode += 't';   }\r
1277         \r
1278         if(numRPrimers != 0){\r
1279             success = trimOligos->stripReverse(currSeq, currQual);\r
1280             if(!success)                                {       trashCode += 'r';       }\r
1281         }\r
1282 \r
1283         if (reorient && (trashCode != "")) { //if you failed and want to check the reverse\r
1284             int thisSuccess = 0;\r
1285             string thisTrashCode = "";\r
1286             int thisCurrentSeqsDiffs = 0;\r
1287             \r
1288             int thisBarcodeIndex = 0;\r
1289             int thisPrimerIndex = 0;\r
1290             //cout << currSeq.getName() << '\t' << savedSeq.getUnaligned() << endl;\r
1291             if(numBarcodes != 0){\r
1292                 thisSuccess = rtrimOligos->stripBarcode(savedSeq, savedQual, thisBarcodeIndex);\r
1293                 if(thisSuccess > bdiffs)                { thisTrashCode += "b"; }\r
1294                 else{ thisCurrentSeqsDiffs += thisSuccess;  }\r
1295             }\r
1296             //cout << currSeq.getName() << '\t' << savedSeq.getUnaligned() << endl;\r
1297             if(numFPrimers != 0){\r
1298                 thisSuccess = rtrimOligos->stripForward(savedSeq, savedQual, thisPrimerIndex, true);\r
1299                 if(thisSuccess > pdiffs)                { thisTrashCode += "f"; }\r
1300                 else{ thisCurrentSeqsDiffs += thisSuccess;  }\r
1301             }\r
1302             \r
1303             if (thisCurrentSeqsDiffs > tdiffs)  {       thisTrashCode += 't';   }\r
1304             \r
1305             if (thisTrashCode == "") {\r
1306                 trashCode = thisTrashCode;\r
1307                 success = thisSuccess;\r
1308                 currentSeqsDiffs = thisCurrentSeqsDiffs;\r
1309                 barcode = thisBarcodeIndex;\r
1310                 primer = thisPrimerIndex;\r
1311                 savedSeq.reverseComplement();\r
1312                 currSeq.setAligned(savedSeq.getAligned());\r
1313                 savedQual.flipQScores();\r
1314                 currQual.setScores(savedQual.getScores());\r
1315             }else { trashCode += "(" + thisTrashCode + ")";  }\r
1316         }\r
1317 \r
1318         if (trashCode.length() == 0) { //is this sequence in the ignore group\r
1319             string thisGroup = oligosObject->getGroupName(barcode, primer);\r
1320             \r
1321             int pos = thisGroup.find("ignore");\r
1322             if (pos != string::npos) {  trashCode += "i"; }\r
1323         }\r
1324         \r
1325         return trashCode.length();\r
1326     }\r
1327         catch(exception& e) {\r
1328                 m->errorOut(e, "SffInfoCommand", "findGroup");\r
1329                 exit(1);\r
1330         }\r
1331 }\r
1332 //**********************************************************************************************************************\r
1333 int SffInfoCommand::findGroup(Header header, seqRead read, int& barcode, int& primer, string groupMode) {\r
1334         try {\r
1335         string trashCode = "";\r
1336         primer = 0;\r
1337         \r
1338         string group = groupMap->getGroup(header.name);\r
1339         if (group == "not found") {     trashCode += "g";   } //scrap for group\r
1340         \r
1341         return trashCode.length();\r
1342     }\r
1343         catch(exception& e) {\r
1344                 m->errorOut(e, "SffInfoCommand", "findGroup");\r
1345                 exit(1);\r
1346         }\r
1347 }\r
1348 //**********************************************************************************************************************\r
1349 int SffInfoCommand::decodeName(string& timestamp, string& region, string& xy, string name) {\r
1350         try {\r
1351                 \r
1352                 if (name.length() >= 6) {\r
1353                         string time = name.substr(0, 6);\r
1354                         unsigned int timeNum = m->fromBase36(time);\r
1355                         \r
1356                         int q1 = timeNum / 60;\r
1357                         int sec = timeNum - 60 * q1;\r
1358                         int q2 = q1 / 60;\r
1359                         int minute = q1 - 60 * q2;\r
1360                         int q3 = q2 / 24;\r
1361                         int hr = q2 - 24 * q3;\r
1362                         int q4 = q3 / 32;\r
1363                         int day = q3 - 32 * q4;\r
1364                         int q5 = q4 / 13;\r
1365                         int mon = q4 - 13 * q5;\r
1366                         int year = 2000 + q5;\r
1367                 \r
1368                         timestamp = toString(year) + "_" + toString(mon) + "_" + toString(day) + "_" + toString(hr) + "_" + toString(minute) + "_" + toString(sec);\r
1369                 }\r
1370                 \r
1371                 if (name.length() >= 9) {\r
1372                         region = name.substr(7, 2);\r
1373                 \r
1374                         string xyNum = name.substr(9);\r
1375                         unsigned int myXy = m->fromBase36(xyNum);\r
1376                         int x = myXy >> 12;\r
1377                         int y = myXy & 4095;\r
1378                 \r
1379                         xy = toString(x) + "_" + toString(y);\r
1380                 }\r
1381                 \r
1382                 return 0;\r
1383         }\r
1384         catch(exception& e) {\r
1385                 m->errorOut(e, "SffInfoCommand", "decodeName");\r
1386                 exit(1);\r
1387         }\r
1388 }\r
1389 //**********************************************************************************************************************\r
1390 int SffInfoCommand::printCommonHeader(ofstream& out, CommonHeader& header) {\r
1391         try {\r
1392         \r
1393                 out << "Common Header:\nMagic Number: " << header.magicNumber << endl;\r
1394                 out << "Version: " << header.version << endl;\r
1395                 out << "Index Offset: " << header.indexOffset << endl;\r
1396                 out << "Index Length: " << header.indexLength << endl;\r
1397                 out << "Number of Reads: " << header.numReads << endl;\r
1398                 out << "Header Length: " << header.headerLength << endl;\r
1399                 out << "Key Length: " << header.keyLength << endl;\r
1400                 out << "Number of Flows: " << header.numFlowsPerRead << endl;\r
1401                 out << "Format Code: " << header.flogramFormatCode << endl;\r
1402                 out << "Flow Chars: " << header.flowChars << endl;\r
1403                 out << "Key Sequence: " << header.keySequence << endl << endl;\r
1404                         \r
1405                 return 0;\r
1406         }\r
1407         catch(exception& e) {\r
1408                 m->errorOut(e, "SffInfoCommand", "printCommonHeader");\r
1409                 exit(1);\r
1410         }\r
1411 }\r
1412 //**********************************************************************************************************************\r
1413 int SffInfoCommand::printHeader(ofstream& out, Header& header) {\r
1414         try {\r
1415                 \r
1416                 out << ">" << header.name << endl;\r
1417                 out << "Run Prefix: " << header.timestamp << endl;\r
1418                 out << "Region #:  " << header.region << endl;\r
1419                 out << "XY Location: " << header.xy << endl << endl;\r
1420                 \r
1421                 out << "Run Name:  " << endl;\r
1422                 out << "Analysis Name:  " << endl;\r
1423                 out << "Full Path: " << endl << endl;\r
1424                 \r
1425                 out << "Read Header Len: " << header.headerLength << endl;\r
1426                 out << "Name Length: " << header.nameLength << endl;\r
1427                 out << "# of Bases: " << header.numBases << endl;\r
1428                 out << "Clip Qual Left: " << header.clipQualLeft << endl;\r
1429                 out << "Clip Qual Right: " << header.clipQualRight << endl;\r
1430                 out << "Clip Adap Left: " << header.clipAdapterLeft << endl;\r
1431                 out << "Clip Adap Right: " << header.clipAdapterRight << endl << endl;\r
1432                 \r
1433                 return 0;\r
1434         }\r
1435         catch(exception& e) {\r
1436                 m->errorOut(e, "SffInfoCommand", "printHeader");\r
1437                 exit(1);\r
1438         }\r
1439 }\r
1440 //**********************************************************************************************************************\r
1441 bool SffInfoCommand::sanityCheck(Header& header, seqRead& read) {\r
1442         try {\r
1443         bool okay = true;\r
1444         string message = "[WARNING]: Your sff file may be corrupted! Sequence: " + header.name + "\n";\r
1445         \r
1446         if (header.clipQualLeft > read.bases.length()) {\r
1447             okay = false; message += "Clip Qual Left = " + toString(header.clipQualLeft) + ", but we only read " + toString(read.bases.length()) + " bases.\n";\r
1448         }\r
1449         if (header.clipQualRight > read.bases.length()) {\r
1450             okay = false; message += "Clip Qual Right = " + toString(header.clipQualRight) + ", but we only read " + toString(read.bases.length()) + " bases.\n";\r
1451         }\r
1452         if (header.clipQualLeft > read.qualScores.size()) {\r
1453             okay = false; message += "Clip Qual Left = " + toString(header.clipQualLeft) + ", but we only read " + toString(read.qualScores.size()) + " quality scores.\n";\r
1454         }\r
1455         if (header.clipQualRight > read.qualScores.size()) {\r
1456             okay = false; message += "Clip Qual Right = " + toString(header.clipQualRight) + ", but we only read " + toString(read.qualScores.size()) + " quality scores.\n";\r
1457         }\r
1458         \r
1459         if (okay == false) {\r
1460             m->mothurOut(message); m->mothurOutEndLine();\r
1461         }\r
1462         \r
1463                 return okay;\r
1464         }\r
1465         catch(exception& e) {\r
1466                 m->errorOut(e, "SffInfoCommand", "sanityCheck");\r
1467                 exit(1);\r
1468         }\r
1469 }\r
1470 //**********************************************************************************************************************\r
1471 int SffInfoCommand::printSffTxtSeqData(ofstream& out, seqRead& read, Header& header) {\r
1472         try {\r
1473                 out << "Flowgram: ";\r
1474                 for (int i = 0; i < read.flowgram.size(); i++) { out << setprecision(2) << (read.flowgram[i]/(float)100) << '\t';  }\r
1475                 \r
1476                 out << endl <<  "Flow Indexes: ";\r
1477                 int sum = 0;\r
1478                 for (int i = 0; i < read.flowIndex.size(); i++) {  sum +=  read.flowIndex[i];  out << sum << '\t'; }\r
1479                 \r
1480                 //make the bases you want to clip lowercase and the bases you want to keep upper case\r
1481         int endValue = header.clipQualRight;\r
1482                 if(endValue == 0){      endValue = read.bases.length(); }\r
1483                 for (int i = 0; i < (header.clipQualLeft-1); i++) { read.bases[i] = tolower(read.bases[i]); }\r
1484                 for (int i = (header.clipQualLeft-1); i < (endValue-1); i++) {   read.bases[i] = toupper(read.bases[i]);  }\r
1485                 for (int i = (endValue-1); i < read.bases.length(); i++) {   read.bases[i] = tolower(read.bases[i]);  }\r
1486                 \r
1487                 out << endl <<  "Bases: " << read.bases << endl << "Quality Scores: ";\r
1488                 for (int i = 0; i < read.qualScores.size(); i++) {   out << read.qualScores[i] << '\t';  }\r
1489         \r
1490                 \r
1491                 out << endl << endl;\r
1492                 \r
1493                 return 0;\r
1494         }\r
1495         catch(exception& e) {\r
1496                 m->errorOut(e, "SffInfoCommand", "printSffTxtSeqData");\r
1497                 exit(1);\r
1498         }\r
1499 }\r
1500 //**********************************************************************************************************************\r
1501 int SffInfoCommand::printFastaSeqData(ofstream& out, seqRead& read, Header& header) {\r
1502         try {\r
1503                 string seq = read.bases;\r
1504                 \r
1505         if (trim) {\r
1506                         if(header.clipQualRight < header.clipQualLeft){\r
1507                                 if (header.clipQualRight == 0) { //don't trim right\r
1508                     seq = seq.substr(header.clipQualLeft-1);\r
1509                 }else {\r
1510                     seq = "NNNN";\r
1511                 }\r
1512                         }\r
1513                         else if((header.clipQualRight != 0) && ((header.clipQualRight-header.clipQualLeft) >= 0)){\r
1514                                 seq = seq.substr((header.clipQualLeft-1), (header.clipQualRight-header.clipQualLeft));\r
1515                         }\r
1516                         else {\r
1517                                 seq = seq.substr(header.clipQualLeft-1);\r
1518                         }\r
1519                 }else{\r
1520                         //if you wanted the sfftxt then you already converted the bases to the right case\r
1521                         if (!sfftxt) {\r
1522                 int endValue = header.clipQualRight;\r
1523                                 //make the bases you want to clip lowercase and the bases you want to keep upper case\r
1524                                 if(endValue == 0){      endValue = seq.length();        }\r
1525                                 for (int i = 0; i < (header.clipQualLeft-1); i++) { seq[i] = tolower(seq[i]);  }\r
1526                                 for (int i = (header.clipQualLeft-1); i < (endValue-1); i++)  {   seq[i] = toupper(seq[i]);  }\r
1527                                 for (int i = (endValue-1); i < seq.length(); i++) {   seq[i] = tolower(seq[i]);  }\r
1528                         }\r
1529                 }\r
1530                 \r
1531                 out << ">" << header.name  << " xy=" << header.xy << endl;\r
1532                 out << seq << endl;\r
1533                 \r
1534                 return 0;\r
1535         }\r
1536         catch(exception& e) {\r
1537                 m->errorOut(e, "SffInfoCommand", "printFastaSeqData");\r
1538                 exit(1);\r
1539         }\r
1540 }\r
1541 \r
1542 //**********************************************************************************************************************\r
1543 int SffInfoCommand::printQualSeqData(ofstream& out, seqRead& read, Header& header) {\r
1544         try {\r
1545                 \r
1546                 if (trim) {\r
1547                         if(header.clipQualRight < header.clipQualLeft){\r
1548                 if (header.clipQualRight == 0) { //don't trim right\r
1549                     out << ">" << header.name << " xy=" << header.xy << " length=" << (read.qualScores.size()-header.clipQualLeft) << endl;\r
1550                     for (int i = (header.clipQualLeft-1); i < read.qualScores.size(); i++) {   out << read.qualScores[i] << '\t';       }       \r
1551                 }else {\r
1552                     out << ">" << header.name << " xy=" << header.xy << endl;\r
1553                     out << "0\t0\t0\t0";\r
1554                 }\r
1555                         }\r
1556                         else if((header.clipQualRight != 0) && ((header.clipQualRight-header.clipQualLeft) >= 0)){\r
1557                                 out << ">" << header.name << " xy=" << header.xy << " length=" << (header.clipQualRight-header.clipQualLeft) << endl;\r
1558                                 for (int i = (header.clipQualLeft-1); i < (header.clipQualRight-1); i++) {   out << read.qualScores[i] << '\t'; }\r
1559                         }\r
1560                         else{\r
1561                                 out << ">" << header.name << " xy=" << header.xy << " length=" << (header.clipQualRight-header.clipQualLeft) << endl;\r
1562                                 for (int i = (header.clipQualLeft-1); i < read.qualScores.size(); i++) {   out << read.qualScores[i] << '\t';   }                       \r
1563                         }\r
1564                 }else{\r
1565                         out << ">" << header.name << " xy=" << header.xy << " length=" << read.qualScores.size() << endl;\r
1566                         for (int i = 0; i < read.qualScores.size(); i++) {   out << read.qualScores[i] << '\t';  }\r
1567                 }\r
1568                 \r
1569                 out << endl;\r
1570                 \r
1571                 return 0;\r
1572         }\r
1573         catch(exception& e) {\r
1574                 m->errorOut(e, "SffInfoCommand", "printQualSeqData");\r
1575                 exit(1);\r
1576         }\r
1577 }\r
1578 \r
1579 //**********************************************************************************************************************\r
1580 int SffInfoCommand::printFlowSeqData(ofstream& out, seqRead& read, Header& header) {\r
1581         try {\r
1582         \r
1583         int endValue = header.clipQualRight;\r
1584         if (header.clipQualRight == 0) {\r
1585             endValue = read.flowIndex.size();\r
1586             if (m->debug) { m->mothurOut("[DEBUG]: " + header.name + " has clipQualRight=0.\n"); }\r
1587         }\r
1588         if(endValue > header.clipQualLeft){\r
1589             \r
1590             int rightIndex = 0;\r
1591             for (int i = 0; i < endValue; i++) {  rightIndex +=  read.flowIndex[i];      }\r
1592             \r
1593             out << header.name << ' ' << rightIndex;\r
1594             for (int i = 0; i < read.flowgram.size(); i++) { out << setprecision(2) << ' ' << (read.flowgram[i]/(float)100);  }\r
1595             out << endl;\r
1596         }\r
1597                 \r
1598                 \r
1599                 return 0;\r
1600         }\r
1601         catch(exception& e) {\r
1602                 m->errorOut(e, "SffInfoCommand", "printFlowSeqData");\r
1603                 exit(1);\r
1604         }\r
1605 }\r
1606 //**********************************************************************************************************************\r
1607 int SffInfoCommand::readAccnosFile(string filename) {\r
1608         try {\r
1609                 //remove old names\r
1610                 seqNames.clear();\r
1611                 \r
1612                 ifstream in;\r
1613                 m->openInputFile(filename, in);\r
1614                 string name;\r
1615                 \r
1616                 while(!in.eof()){\r
1617                         in >> name; m->gobble(in);\r
1618                                                 \r
1619                         seqNames.insert(name);\r
1620                         \r
1621                         if (m->control_pressed) { seqNames.clear(); break; }\r
1622                 }\r
1623                 in.close();             \r
1624                 \r
1625                 return 0;\r
1626         }\r
1627         catch(exception& e) {\r
1628                 m->errorOut(e, "SffInfoCommand", "readAccnosFile");\r
1629                 exit(1);\r
1630         }\r
1631 }\r
1632 //**********************************************************************************************************************\r
1633 int SffInfoCommand::parseSffTxt() {\r
1634         try {\r
1635                 \r
1636                 ifstream inSFF;\r
1637                 m->openInputFile(sfftxtFilename, inSFF);\r
1638                 \r
1639                 if (outputDir == "") {  outputDir += m->hasPath(sfftxtFilename); }\r
1640                 \r
1641                 //output file names\r
1642                 ofstream outFasta, outQual, outFlow;\r
1643                 string outFastaFileName, outQualFileName;\r
1644                 string fileRoot = m->getRootName(m->getSimpleName(sfftxtFilename));\r
1645                 if (fileRoot.length() > 0) {\r
1646                         //rip off last .\r
1647                         fileRoot = fileRoot.substr(0, fileRoot.length()-1);\r
1648                         fileRoot = m->getRootName(fileRoot);\r
1649                 }\r
1650                 \r
1651         map<string, string> variables; \r
1652                 variables["[filename]"] = fileRoot;\r
1653                 string sfftxtFileName = getOutputFileName("sfftxt",variables);\r
1654                 string outFlowFileName = getOutputFileName("flow",variables);\r
1655                 if (!trim) { variables["[tag]"] = "raw"; }\r
1656                 outFastaFileName = getOutputFileName("fasta",variables);\r
1657         outQualFileName = getOutputFileName("qfile",variables);\r
1658                 \r
1659                 if (fasta)      { m->openOutputFile(outFastaFileName, outFasta);        outputNames.push_back(outFastaFileName); outputTypes["fasta"].push_back(outFastaFileName); }\r
1660                 if (qual)       { m->openOutputFile(outQualFileName, outQual);          outputNames.push_back(outQualFileName); outputTypes["qfile"].push_back(outQualFileName);  }\r
1661                 if (flow)       { m->openOutputFile(outFlowFileName, outFlow);          outputNames.push_back(outFlowFileName);  outFlow.setf(ios::fixed, ios::floatfield); outFlow.setf(ios::showpoint); outputTypes["flow"].push_back(outFlowFileName);  }\r
1662                 \r
1663                 //read common header\r
1664                 string commonHeader = m->getline(inSFF);\r
1665                 string magicNumber = m->getline(inSFF); \r
1666                 string version = m->getline(inSFF);\r
1667                 string indexOffset = m->getline(inSFF);\r
1668                 string indexLength = m->getline(inSFF);\r
1669                 int numReads = parseHeaderLineToInt(inSFF);\r
1670                 string headerLength = m->getline(inSFF);\r
1671                 string keyLength = m->getline(inSFF);\r
1672                 int numFlows = parseHeaderLineToInt(inSFF);\r
1673                 string flowgramCode = m->getline(inSFF);\r
1674                 string flowChars = m->getline(inSFF);\r
1675                 string keySequence = m->getline(inSFF);\r
1676                 m->gobble(inSFF);\r
1677                 \r
1678                 string seqName;\r
1679                 \r
1680                 if (flow)       {       outFlow << numFlows << endl;    }\r
1681                 \r
1682                 for(int i=0;i<numReads;i++){\r
1683                         \r
1684                         //sanity check\r
1685                         if (inSFF.eof()) { m->mothurOut("[ERROR]: Expected " + toString(numReads) + " but reached end of file at " + toString(i+1) + "."); m->mothurOutEndLine(); break; }\r
1686                         \r
1687                         Header header;\r
1688                         \r
1689                         //parse read header\r
1690                         inSFF >> seqName;\r
1691                         seqName = seqName.substr(1);\r
1692                         m->gobble(inSFF);\r
1693                         header.name = seqName;\r
1694                         \r
1695                         string runPrefix = parseHeaderLineToString(inSFF);              header.timestamp = runPrefix;\r
1696                         string regionNumber = parseHeaderLineToString(inSFF);   header.region = regionNumber;\r
1697                         string xyLocation = parseHeaderLineToString(inSFF);             header.xy = xyLocation;\r
1698                         m->gobble(inSFF);\r
1699                                 \r
1700                         string runName = parseHeaderLineToString(inSFF);\r
1701                         string analysisName = parseHeaderLineToString(inSFF);\r
1702                         string fullPath = parseHeaderLineToString(inSFF);\r
1703                         m->gobble(inSFF);\r
1704                         \r
1705                         string readHeaderLen = parseHeaderLineToString(inSFF);  convert(readHeaderLen, header.headerLength);\r
1706                         string nameLength = parseHeaderLineToString(inSFF);             convert(nameLength, header.nameLength);\r
1707                         int numBases = parseHeaderLineToInt(inSFF);                             header.numBases = numBases;\r
1708                         string clipQualLeft = parseHeaderLineToString(inSFF);   convert(clipQualLeft, header.clipQualLeft);\r
1709                         int clipQualRight = parseHeaderLineToInt(inSFF);                header.clipQualRight = clipQualRight;\r
1710                         string clipAdapLeft = parseHeaderLineToString(inSFF);   convert(clipAdapLeft, header.clipAdapterLeft);\r
1711                         string clipAdapRight = parseHeaderLineToString(inSFF);  convert(clipAdapRight, header.clipAdapterRight);\r
1712                         m->gobble(inSFF);\r
1713                                 \r
1714                         seqRead read;\r
1715                         \r
1716                         //parse read\r
1717                         vector<unsigned short> flowVector = parseHeaderLineToFloatVector(inSFF, numFlows);      read.flowgram = flowVector;\r
1718                         vector<unsigned int> flowIndices = parseHeaderLineToIntVector(inSFF, numBases); \r
1719                         \r
1720                         //adjust for print\r
1721                         vector<unsigned int> flowIndicesAdjusted; flowIndicesAdjusted.push_back(flowIndices[0]);\r
1722                         for (int j = 1; j < flowIndices.size(); j++) {   flowIndicesAdjusted.push_back(flowIndices[j] - flowIndices[j-1]);   }\r
1723                         read.flowIndex = flowIndicesAdjusted;\r
1724                         \r
1725                         string bases = parseHeaderLineToString(inSFF);                                                                          read.bases = bases;\r
1726                         vector<unsigned int> qualityScores = parseHeaderLineToIntVector(inSFF, numBases);       read.qualScores = qualityScores;\r
1727                         m->gobble(inSFF);\r
1728                                         \r
1729                         //if you have provided an accosfile and this seq is not in it, then dont print\r
1730                         bool print = true;\r
1731                         if (seqNames.size() != 0) {   if (seqNames.count(header.name) == 0) { print = false; }  }\r
1732                         \r
1733                         //print \r
1734                         if (print) {\r
1735                                 if (fasta)      {       printFastaSeqData(outFasta, read, header);      }\r
1736                                 if (qual)       {       printQualSeqData(outQual, read, header);        }\r
1737                                 if (flow)       {       printFlowSeqData(outFlow, read, header);        }\r
1738                         }\r
1739                         \r
1740                         //report progress\r
1741                         if((i+1) % 10000 == 0){ m->mothurOut(toString(i+1)); m->mothurOutEndLine();             }\r
1742                         \r
1743                         if (m->control_pressed) {  break;  }\r
1744                 }\r
1745                 \r
1746                 //report progress\r
1747                 if (!m->control_pressed) {   if((numReads) % 10000 != 0){       m->mothurOut(toString(numReads)); m->mothurOutEndLine();                }  }\r
1748                 \r
1749                 inSFF.close();\r
1750                 \r
1751                 if (fasta)      {  outFasta.close();    }\r
1752                 if (qual)       {  outQual.close();             }\r
1753                 if (flow)       {  outFlow.close();             }\r
1754                 \r
1755                 return 0;\r
1756         }\r
1757         catch(exception& e) {\r
1758                 m->errorOut(e, "SffInfoCommand", "parseSffTxt");\r
1759                 exit(1);\r
1760         }\r
1761 }\r
1762 //**********************************************************************************************************************\r
1763 \r
1764 int SffInfoCommand::parseHeaderLineToInt(ifstream& file){\r
1765         try {\r
1766                 int number;\r
1767                 \r
1768                 while (!file.eof())     {\r
1769                         \r
1770                         char c = file.get(); \r
1771                         if (c == ':'){\r
1772                                 file >> number;\r
1773                                 break;\r
1774                         }\r
1775                         \r
1776                 }\r
1777                 m->gobble(file);\r
1778                 return number;\r
1779         }\r
1780         catch(exception& e) {\r
1781                 m->errorOut(e, "SffInfoCommand", "parseHeaderLineToInt");\r
1782                 exit(1);\r
1783         }\r
1784         \r
1785 }\r
1786 \r
1787 //**********************************************************************************************************************\r
1788 \r
1789 string SffInfoCommand::parseHeaderLineToString(ifstream& file){\r
1790         try {\r
1791                 string text;\r
1792                 \r
1793                 while (!file.eof())     {\r
1794                         char c = file.get(); \r
1795                         \r
1796                         if (c == ':'){\r
1797                                 //m->gobble(file);\r
1798                                 //text = m->getline(file);      \r
1799                                 file >> text;\r
1800                                 break;\r
1801                         }\r
1802                 }\r
1803                 m->gobble(file);\r
1804                 \r
1805                 return text;\r
1806         }\r
1807         catch(exception& e) {\r
1808                 m->errorOut(e, "SffInfoCommand", "parseHeaderLineToString");\r
1809                 exit(1);\r
1810         }\r
1811 }\r
1812 \r
1813 //**********************************************************************************************************************\r
1814 \r
1815 vector<unsigned short> SffInfoCommand::parseHeaderLineToFloatVector(ifstream& file, int length){\r
1816         try {\r
1817                 vector<unsigned short> floatVector(length);\r
1818                 \r
1819                 while (!file.eof())     {\r
1820                         char c = file.get(); \r
1821                         if (c == ':'){\r
1822                                 float temp;\r
1823                                 for(int i=0;i<length;i++){\r
1824                                         file >> temp;\r
1825                                         floatVector[i] = temp * 100;\r
1826                                 }\r
1827                                 break;\r
1828                         }\r
1829                 }\r
1830                 m->gobble(file);        \r
1831                 return floatVector;\r
1832         }\r
1833         catch(exception& e) {\r
1834                 m->errorOut(e, "SffInfoCommand", "parseHeaderLineToFloatVector");\r
1835                 exit(1);\r
1836         }\r
1837 }\r
1838 \r
1839 //**********************************************************************************************************************\r
1840 \r
1841 vector<unsigned int> SffInfoCommand::parseHeaderLineToIntVector(ifstream& file, int length){\r
1842         try {\r
1843                 vector<unsigned int> intVector(length);\r
1844                 \r
1845                 while (!file.eof())     {\r
1846                         char c = file.get(); \r
1847                         if (c == ':'){\r
1848                                 for(int i=0;i<length;i++){\r
1849                                         file >> intVector[i];\r
1850                                 }\r
1851                                 break;\r
1852                         }\r
1853                 }\r
1854                 m->gobble(file);        \r
1855                 return intVector;\r
1856         }\r
1857         catch(exception& e) {\r
1858                 m->errorOut(e, "SffInfoCommand", "parseHeaderLineToIntVector");\r
1859                 exit(1);\r
1860         }\r
1861 }\r
1862 //***************************************************************************************************************\r
1863 \r
1864 bool SffInfoCommand::readOligos(string oligoFile){\r
1865         try {\r
1866         filehandles.clear();\r
1867         numSplitReads.clear();\r
1868         filehandlesHeaders.clear();\r
1869         \r
1870                 bool allBlank = false;\r
1871         oligosObject->read(oligoFile);\r
1872         \r
1873         if (m->control_pressed) { return false; } //error in reading oligos\r
1874         \r
1875         if (oligosObject->hasPairedBarcodes()) {\r
1876             pairedOligos = true;\r
1877             m->mothurOut("[ERROR]: sffinfo does not support paired barcodes and primers, aborting.\n"); m->control_pressed = true; return true;\r
1878         }else {\r
1879             pairedOligos = false;\r
1880             numFPrimers = oligosObject->getPrimers().size();\r
1881             numBarcodes = oligosObject->getBarcodes().size();\r
1882         }\r
1883         \r
1884         numLinkers = oligosObject->getLinkers().size();\r
1885         numSpacers = oligosObject->getSpacers().size();\r
1886         numRPrimers = oligosObject->getReversePrimers().size();\r
1887         \r
1888         vector<string> groupNames = oligosObject->getGroupNames();\r
1889         if (groupNames.size() == 0) { allBlank = true;  }\r
1890         \r
1891         filehandles.resize(oligosObject->getBarcodeNames().size());\r
1892                 for(int i=0;i<filehandles.size();i++){\r
1893             for(int j=0;j<oligosObject->getPrimerNames().size();j++){  filehandles[i].push_back(""); }\r
1894                 }\r
1895         \r
1896         if(split > 1){\r
1897             set<string> uniqueNames; //used to cleanup outputFileNames\r
1898             map<string, int> barcodes = oligosObject->getBarcodes() ;\r
1899             map<string, int> primers = oligosObject->getPrimers();\r
1900             for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){\r
1901                 for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){\r
1902                     \r
1903                     string primerName = oligosObject->getPrimerName(itPrimer->second);\r
1904                     string barcodeName = oligosObject->getBarcodeName(itBar->second);\r
1905                     \r
1906                     if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing\r
1907                     else if ((primerName == "") && (barcodeName == "")) { } //do nothing\r
1908                     else {\r
1909                         string comboGroupName = "";\r
1910                         string fastaFileName = "";\r
1911                         string qualFileName = "";\r
1912                         string nameFileName = "";\r
1913                         string countFileName = "";\r
1914                         \r
1915                         if(primerName == ""){\r
1916                             comboGroupName = barcodeName;\r
1917                         }else{\r
1918                             if(barcodeName == ""){\r
1919                                 comboGroupName = primerName;\r
1920                             }\r
1921                             else{\r
1922                                 comboGroupName = barcodeName + "." + primerName;\r
1923                             }\r
1924                         }\r
1925                         \r
1926                         ofstream temp;\r
1927                         map<string, string> variables;\r
1928                         variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(currentFileName));\r
1929                         variables["[group]"] = comboGroupName;\r
1930                         string thisFilename = getOutputFileName("sff",variables);\r
1931                         if (uniqueNames.count(thisFilename) == 0) {\r
1932                             outputNames.push_back(thisFilename);\r
1933                             outputTypes["sff"].push_back(thisFilename);\r
1934                             uniqueNames.insert(thisFilename);\r
1935                         }\r
1936                         \r
1937                         filehandles[itBar->second][itPrimer->second] = thisFilename;\r
1938                         temp.open(thisFilename.c_str(), ios::binary);           temp.close();\r
1939                     }\r
1940                 }\r
1941             }\r
1942         }\r
1943         \r
1944         map<string, string> variables;\r
1945         variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(currentFileName));\r
1946         variables["[group]"] = "scrap";\r
1947                 noMatchFile = getOutputFileName("sff",variables);\r
1948         m->mothurRemove(noMatchFile);\r
1949         numNoMatch = 0;\r
1950                 \r
1951         filehandlesHeaders.resize(filehandles.size());\r
1952         numSplitReads.resize(filehandles.size());\r
1953         for (int i = 0; i < filehandles.size(); i++) {\r
1954             numSplitReads[i].resize(filehandles[i].size(), 0);\r
1955             for (int j = 0; j < filehandles[i].size(); j++) {\r
1956                 filehandlesHeaders[i].push_back(filehandles[i][j]+"headers");\r
1957             }\r
1958         }\r
1959         \r
1960                 if (allBlank) {\r
1961                         m->mothurOut("[WARNING]: your oligos file does not contain any group names.  mothur will not create a split the sff file."); m->mothurOutEndLine();\r
1962                         split = 1;\r
1963                         return false;\r
1964                 }\r
1965                 \r
1966                 return true;\r
1967                 \r
1968         }\r
1969         catch(exception& e) {\r
1970                 m->errorOut(e, "SffInfoCommand", "readOligos");\r
1971                 exit(1);\r
1972         }\r
1973 }\r
1974 //***************************************************************************************************************\r
1975 \r
1976 bool SffInfoCommand::readGroup(string oligoFile){\r
1977         try {\r
1978         filehandles.clear();\r
1979         numSplitReads.clear();\r
1980         filehandlesHeaders.clear();\r
1981         \r
1982         groupMap = new GroupMap();\r
1983         groupMap->readMap(oligoFile);\r
1984     \r
1985         //like barcodeNameVector - no primer names\r
1986         vector<string> groups = groupMap->getNamesOfGroups();\r
1987                 \r
1988                 filehandles.resize(groups.size());\r
1989         for (int i = 0; i < filehandles.size(); i++) {\r
1990             for (int j = 0; j < 1; j++) {\r
1991                 \r
1992                 map<string, string> variables;\r
1993                 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(currentFileName));\r
1994                 variables["[group]"] = groups[i];\r
1995                 string thisFilename = getOutputFileName("sff",variables);\r
1996                 outputNames.push_back(thisFilename);\r
1997                 outputTypes["sff"].push_back(thisFilename);\r
1998                \r
1999                 ofstream temp;\r
2000                 m->openOutputFileBinary(thisFilename, temp); temp.close();\r
2001                 filehandles[i].push_back(thisFilename);\r
2002             }\r
2003         }\r
2004         \r
2005         map<string, string> variables;\r
2006         variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(currentFileName));\r
2007         variables["[group]"] = "scrap";\r
2008                 noMatchFile = getOutputFileName("sff",variables);\r
2009         m->mothurRemove(noMatchFile);\r
2010         numNoMatch = 0;\r
2011         \r
2012                 \r
2013         filehandlesHeaders.resize(groups.size());\r
2014         numSplitReads.resize(filehandles.size());\r
2015         for (int i = 0; i < filehandles.size(); i++) {\r
2016             numSplitReads[i].resize(filehandles[i].size(), 0);\r
2017             for (int j = 0; j < filehandles[i].size(); j++) {\r
2018                 ofstream temp ;\r
2019                 string thisHeader = filehandles[i][j]+"headers";\r
2020                 m->openOutputFileBinary(thisHeader, temp); temp.close();\r
2021                 filehandlesHeaders[i].push_back(thisHeader);\r
2022             }\r
2023         }\r
2024                 \r
2025                 return true;\r
2026                 \r
2027         }\r
2028         catch(exception& e) {\r
2029                 m->errorOut(e, "SffInfoCommand", "readGroup");\r
2030                 exit(1);\r
2031         }\r
2032 }\r
2033 \r
2034 //********************************************************************/\r
2035 string SffInfoCommand::reverseOligo(string oligo){\r
2036         try {\r
2037         string reverse = "";\r
2038         \r
2039         for(int i=oligo.length()-1;i>=0;i--){\r
2040             \r
2041             if(oligo[i] == 'A')         {       reverse += 'T'; }\r
2042             else if(oligo[i] == 'T'){   reverse += 'A'; }\r
2043             else if(oligo[i] == 'U'){   reverse += 'A'; }\r
2044             \r
2045             else if(oligo[i] == 'G'){   reverse += 'C'; }\r
2046             else if(oligo[i] == 'C'){   reverse += 'G'; }\r
2047             \r
2048             else if(oligo[i] == 'R'){   reverse += 'Y'; }\r
2049             else if(oligo[i] == 'Y'){   reverse += 'R'; }\r
2050             \r
2051             else if(oligo[i] == 'M'){   reverse += 'K'; }\r
2052             else if(oligo[i] == 'K'){   reverse += 'M'; }\r
2053             \r
2054             else if(oligo[i] == 'W'){   reverse += 'W'; }\r
2055             else if(oligo[i] == 'S'){   reverse += 'S'; }\r
2056             \r
2057             else if(oligo[i] == 'B'){   reverse += 'V'; }\r
2058             else if(oligo[i] == 'V'){   reverse += 'B'; }\r
2059             \r
2060             else if(oligo[i] == 'D'){   reverse += 'H'; }\r
2061             else if(oligo[i] == 'H'){   reverse += 'D'; }\r
2062             \r
2063             else                                                {       reverse += 'N'; }\r
2064         }\r
2065         \r
2066         \r
2067         return reverse;\r
2068     }\r
2069         catch(exception& e) {\r
2070                 m->errorOut(e, "SffInfoCommand", "reverseOligo");\r
2071                 exit(1);\r
2072         }\r
2073 }\r
2074 \r
2075 //**********************************************************************************************************************\r
2076 \r
2077 \r
2078                                 \r
2079                                 \r