]> git.donarmstrong.com Git - mothur.git/blob - sffinfocommand.cpp
added binary file operations to mothurout class. added comment to sequence class...
[mothur.git] / sffinfocommand.cpp
1 /*\r
2  *  sffinfocommand.cpp\r
3  *  Mothur\r
4  *\r
5  *  Created by westcott on 7/7/10.\r
6  *  Copyright 2010 Schloss Lab. All rights reserved.\r
7  *\r
8  */\r
9 \r
10 #include "sffinfocommand.h"\r
11 #include "endiannessmacros.h"\r
12 #include "trimoligos.h"\r
13 #include "sequence.hpp"\r
14 #include "qualityscores.h"\r
15 \r
16 //**********************************************************************************************************************\r
17 vector<string> SffInfoCommand::setParameters(){ \r
18         try {           \r
19                 CommandParameter psff("sff", "InputTypes", "", "", "none", "none", "none","",false,false,true); parameters.push_back(psff);\r
20         CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(poligos);\r
21                 CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(paccnos);\r
22                 CommandParameter psfftxt("sfftxt", "String", "", "", "", "", "","",false,false); parameters.push_back(psfftxt);\r
23                 CommandParameter pflow("flow", "Boolean", "", "T", "", "", "","flow",false,false); parameters.push_back(pflow);\r
24                 CommandParameter ptrim("trim", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(ptrim);\r
25                 CommandParameter pfasta("fasta", "Boolean", "", "T", "", "", "","fasta",false,false); parameters.push_back(pfasta);\r
26                 CommandParameter pqfile("qfile", "Boolean", "", "T", "", "", "","qfile",false,false); parameters.push_back(pqfile);\r
27         CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ppdiffs);\r
28                 CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pbdiffs);\r
29         CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pldiffs);\r
30                 CommandParameter psdiffs("sdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(psdiffs);\r
31         CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ptdiffs);\r
32                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);\r
33                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);\r
34                 \r
35                 vector<string> myArray;\r
36                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }\r
37                 return myArray;\r
38         }\r
39         catch(exception& e) {\r
40                 m->errorOut(e, "SffInfoCommand", "setParameters");\r
41                 exit(1);\r
42         }\r
43 }\r
44 //**********************************************************************************************************************\r
45 string SffInfoCommand::getHelpString(){ \r
46         try {\r
47                 string helpString = "";\r
48                 helpString += "The sffinfo command reads a sff file and extracts the sequence data, or you can use it to parse a sfftxt file.\n";\r
49                 helpString += "The sffinfo command parameters are sff, fasta, qfile, accnos, flow, sfftxt, oligos, bdiffs, tdiffs, ldiffs, sdiffs, pdiffs and trim. sff is required. \n";\r
50                 helpString += "The sff parameter allows you to enter the sff file you would like to extract data from.  You may enter multiple files by separating them by -'s.\n";\r
51                 helpString += "The fasta parameter allows you to indicate if you would like a fasta formatted file generated.  Default=True. \n";\r
52                 helpString += "The qfile parameter allows you to indicate if you would like a quality file generated.  Default=True. \n";\r
53         helpString += "The oligos parameter allows you to provide an oligos file to split your sff file into separate sff files by barcode. \n";\r
54         helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n";\r
55                 helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";\r
56                 helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";\r
57         helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n";\r
58                 helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n";\r
59                 helpString += "The flow parameter allows you to indicate if you would like a flowgram file generated.  Default=True. \n";\r
60                 helpString += "The sfftxt parameter allows you to indicate if you would like a sff.txt file generated.  Default=False. \n";\r
61                 helpString += "If you want to parse an existing sfftxt file into flow, fasta and quality file, enter the file name using the sfftxt parameter. \n";\r
62                 helpString += "The trim parameter allows you to indicate if you would like a sequences and quality scores trimmed to the clipQualLeft and clipQualRight values.  Default=True. \n";\r
63                 helpString += "The accnos parameter allows you to provide a accnos file containing the names of the sequences you would like extracted. You may enter multiple files by separating them by -'s. \n";\r
64                 helpString += "Example sffinfo(sff=mySffFile.sff, trim=F).\n";\r
65                 helpString += "Note: No spaces between parameter labels (i.e. sff), '=' and parameters (i.e.yourSffFileName).\n";\r
66                 return helpString;\r
67         }\r
68         catch(exception& e) {\r
69                 m->errorOut(e, "SffInfoCommand", "getHelpString");\r
70                 exit(1);\r
71         }\r
72 }\r
73 \r
74 //**********************************************************************************************************************\r
75 string SffInfoCommand::getOutputPattern(string type) {\r
76     try {\r
77         string pattern = "";\r
78         \r
79         if (type == "fasta")            {   pattern =  "[filename],fasta-[filename],[tag],fasta";   }\r
80         else if (type == "flow")    {   pattern =  "[filename],flow";   }\r
81         else if (type == "sfftxt")        {   pattern =  "[filename],sff.txt";   }\r
82         else if (type == "sff")        {   pattern =  "[filename],[group],sff";   }\r
83         else if (type == "qfile")       {   pattern =  "[filename],qual-[filename],[tag],qual";   }\r
84         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }\r
85         \r
86         return pattern;\r
87     }\r
88     catch(exception& e) {\r
89         m->errorOut(e, "SffInfoCommand", "getOutputPattern");\r
90         exit(1);\r
91     }\r
92 }\r
93 //**********************************************************************************************************************\r
94 SffInfoCommand::SffInfoCommand(){       \r
95         try {\r
96                 abort = true; calledHelp = true; \r
97                 setParameters();\r
98                 vector<string> tempOutNames;\r
99                 outputTypes["fasta"] = tempOutNames;\r
100                 outputTypes["flow"] = tempOutNames;\r
101                 outputTypes["sfftxt"] = tempOutNames;\r
102                 outputTypes["qfile"] = tempOutNames;\r
103         outputTypes["sff"] = tempOutNames;\r
104         }\r
105         catch(exception& e) {\r
106                 m->errorOut(e, "SffInfoCommand", "SffInfoCommand");\r
107                 exit(1);\r
108         }\r
109 }\r
110 //**********************************************************************************************************************\r
111 \r
112 SffInfoCommand::SffInfoCommand(string option)  {\r
113         try {\r
114                 abort = false; calledHelp = false;   \r
115                 hasAccnos = false; hasOligos = false;\r
116         split = 1;\r
117                 \r
118                 //allow user to run help\r
119                 if(option == "help") { help(); abort = true; calledHelp = true; }\r
120                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}\r
121                 \r
122                 else {\r
123                         //valid paramters for this command\r
124                         vector<string> myArray = setParameters();\r
125                         \r
126                         OptionParser parser(option);\r
127                         map<string, string> parameters = parser.getParameters();\r
128                         \r
129                         ValidParameters validParameter;\r
130                         //check to make sure all parameters are valid for command\r
131                         for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { \r
132                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }\r
133                         }\r
134                         \r
135                         //initialize outputTypes\r
136                         vector<string> tempOutNames;\r
137                         outputTypes["fasta"] = tempOutNames;\r
138                         outputTypes["flow"] = tempOutNames;\r
139                         outputTypes["sfftxt"] = tempOutNames;\r
140                         outputTypes["qfile"] = tempOutNames;\r
141             outputTypes["sff"] = tempOutNames;\r
142                         \r
143                         //if the user changes the output directory command factory will send this info to us in the output parameter \r
144                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }\r
145                         \r
146                         //if the user changes the input directory command factory will send this info to us in the output parameter \r
147                         string inputDir = validParameter.validFile(parameters, "inputdir", false);        if (inputDir == "not found"){ inputDir = "";          }\r
148 \r
149                         sffFilename = validParameter.validFile(parameters, "sff", false);\r
150                         if (sffFilename == "not found") { sffFilename = "";  }\r
151                         else { \r
152                                 m->splitAtDash(sffFilename, filenames);\r
153                                 \r
154                                 //go through files and make sure they are good, if not, then disregard them\r
155                                 for (int i = 0; i < filenames.size(); i++) {\r
156                                         bool ignore = false;\r
157                                         if (filenames[i] == "current") { \r
158                                                 filenames[i] = m->getSFFFile(); \r
159                                                 if (filenames[i] != "") {  m->mothurOut("Using " + filenames[i] + " as input file for the sff parameter where you had given current."); m->mothurOutEndLine(); }\r
160                                                 else {  \r
161                                                         m->mothurOut("You have no current sfffile, ignoring current."); m->mothurOutEndLine(); ignore=true; \r
162                                                         //erase from file list\r
163                                                         filenames.erase(filenames.begin()+i);\r
164                                                         i--;\r
165                                                 }\r
166                                         }\r
167                                         \r
168                                         if (!ignore) {\r
169                                                 if (inputDir != "") {\r
170                                                         string path = m->hasPath(filenames[i]);\r
171                                                         //if the user has not given a path then, add inputdir. else leave path alone.\r
172                                                         if (path == "") {       filenames[i] = inputDir + filenames[i];         }\r
173                                                 }\r
174                 \r
175                                                 ifstream in;\r
176                                                 int ableToOpen = m->openInputFile(filenames[i], in, "noerror");\r
177                                         \r
178                                                 //if you can't open it, try default location\r
179                                                 if (ableToOpen == 1) {\r
180                                                         if (m->getDefaultPath() != "") { //default path is set\r
181                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(filenames[i]);\r
182                                                                 m->mothurOut("Unable to open " + filenames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();\r
183                                                                 ifstream in2;\r
184                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");\r
185                                                                 in2.close();\r
186                                                                 filenames[i] = tryPath;\r
187                                                         }\r
188                                                 }\r
189                                                 \r
190                                                 //if you can't open it, try default location\r
191                                                 if (ableToOpen == 1) {\r
192                                                         if (m->getOutputDir() != "") { //default path is set\r
193                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(filenames[i]);\r
194                                                                 m->mothurOut("Unable to open " + filenames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();\r
195                                                                 ifstream in2;\r
196                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");\r
197                                                                 in2.close();\r
198                                                                 filenames[i] = tryPath;\r
199                                                         }\r
200                                                 }\r
201                                                 \r
202                                                 in.close();\r
203                                                 \r
204                                                 if (ableToOpen == 1) { \r
205                                                         m->mothurOut("Unable to open " + filenames[i] + ". It will be disregarded."); m->mothurOutEndLine();\r
206                                                         //erase from file list\r
207                                                         filenames.erase(filenames.begin()+i);\r
208                                                         i--;\r
209                                                 }else { m->setSFFFile(filenames[i]); }\r
210                                         }\r
211                                 }\r
212                                 \r
213                                 //make sure there is at least one valid file left\r
214                                 if (filenames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; }\r
215                         }\r
216                         \r
217                         accnosName = validParameter.validFile(parameters, "accnos", false);\r
218                         if (accnosName == "not found") { accnosName = "";  }\r
219                         else { \r
220                                 hasAccnos = true;\r
221                                 m->splitAtDash(accnosName, accnosFileNames);\r
222                                 \r
223                                 //go through files and make sure they are good, if not, then disregard them\r
224                                 for (int i = 0; i < accnosFileNames.size(); i++) {\r
225                                         bool ignore = false;\r
226                                         if (accnosFileNames[i] == "current") { \r
227                                                 accnosFileNames[i] = m->getAccnosFile(); \r
228                                                 if (accnosFileNames[i] != "") {  m->mothurOut("Using " + accnosFileNames[i] + " as input file for the accnos parameter where you had given current."); m->mothurOutEndLine(); }\r
229                                                 else {  \r
230                                                         m->mothurOut("You have no current accnosfile, ignoring current."); m->mothurOutEndLine(); ignore=true; \r
231                                                         //erase from file list\r
232                                                         accnosFileNames.erase(accnosFileNames.begin()+i);\r
233                                                         i--;\r
234                                                 }\r
235                                         }\r
236                                         \r
237                                         if (!ignore) {\r
238                                         \r
239                                                 if (inputDir != "") {\r
240                                                         string path = m->hasPath(accnosFileNames[i]);\r
241                                                         //if the user has not given a path then, add inputdir. else leave path alone.\r
242                                                         if (path == "") {       accnosFileNames[i] = inputDir + accnosFileNames[i];             }\r
243                                                 }\r
244                 \r
245                                                 ifstream in;\r
246                                                 int ableToOpen = m->openInputFile(accnosFileNames[i], in, "noerror");\r
247                                         \r
248                                                 //if you can't open it, try default location\r
249                                                 if (ableToOpen == 1) {\r
250                                                         if (m->getDefaultPath() != "") { //default path is set\r
251                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(accnosFileNames[i]);\r
252                                                                 m->mothurOut("Unable to open " + accnosFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();\r
253                                                                 ifstream in2;\r
254                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");\r
255                                                                 in2.close();\r
256                                                                 accnosFileNames[i] = tryPath;\r
257                                                         }\r
258                                                 }\r
259                                                 //if you can't open it, try default location\r
260                                                 if (ableToOpen == 1) {\r
261                                                         if (m->getOutputDir() != "") { //default path is set\r
262                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(accnosFileNames[i]);\r
263                                                                 m->mothurOut("Unable to open " + accnosFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();\r
264                                                                 ifstream in2;\r
265                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");\r
266                                                                 in2.close();\r
267                                                                 accnosFileNames[i] = tryPath;\r
268                                                         }\r
269                                                 }\r
270                                                 in.close();\r
271                                                 \r
272                                                 if (ableToOpen == 1) { \r
273                                                         m->mothurOut("Unable to open " + accnosFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();\r
274                                                         //erase from file list\r
275                                                         accnosFileNames.erase(accnosFileNames.begin()+i);\r
276                                                         i--;\r
277                                                 }\r
278                                         }\r
279                                 }\r
280                                 \r
281                                 //make sure there is at least one valid file left\r
282                                 if (accnosFileNames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; }\r
283                         }\r
284             \r
285             oligosfile = validParameter.validFile(parameters, "oligos", false);\r
286                         if (oligosfile == "not found") { oligosfile = "";  }\r
287                         else { \r
288                                 hasOligos = true;\r
289                                 m->splitAtDash(oligosfile, oligosFileNames);\r
290                                 \r
291                                 //go through files and make sure they are good, if not, then disregard them\r
292                                 for (int i = 0; i < oligosFileNames.size(); i++) {\r
293                                         bool ignore = false;\r
294                                         if (oligosFileNames[i] == "current") { \r
295                                                 oligosFileNames[i] = m->getOligosFile(); \r
296                                                 if (oligosFileNames[i] != "") {  m->mothurOut("Using " + oligosFileNames[i] + " as input file for the accnos parameter where you had given current."); m->mothurOutEndLine(); }\r
297                                                 else {  \r
298                                                         m->mothurOut("You have no current oligosfile, ignoring current."); m->mothurOutEndLine(); ignore=true; \r
299                                                         //erase from file list\r
300                                                         oligosFileNames.erase(oligosFileNames.begin()+i);\r
301                                                         i--;\r
302                                                 }\r
303                                         }\r
304                                         \r
305                                         if (!ignore) {\r
306                         \r
307                                                 if (inputDir != "") {\r
308                                                         string path = m->hasPath(oligosFileNames[i]);\r
309                                                         //if the user has not given a path then, add inputdir. else leave path alone.\r
310                                                         if (path == "") {       oligosFileNames[i] = inputDir + oligosFileNames[i];             }\r
311                                                 }\r
312                         \r
313                                                 ifstream in;\r
314                                                 int ableToOpen = m->openInputFile(oligosFileNames[i], in, "noerror");\r
315                         \r
316                                                 //if you can't open it, try default location\r
317                                                 if (ableToOpen == 1) {\r
318                                                         if (m->getDefaultPath() != "") { //default path is set\r
319                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(oligosFileNames[i]);\r
320                                                                 m->mothurOut("Unable to open " + oligosFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();\r
321                                                                 ifstream in2;\r
322                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");\r
323                                                                 in2.close();\r
324                                                                 oligosFileNames[i] = tryPath;\r
325                                                         }\r
326                                                 }\r
327                                                 //if you can't open it, try default location\r
328                                                 if (ableToOpen == 1) {\r
329                                                         if (m->getOutputDir() != "") { //default path is set\r
330                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(oligosFileNames[i]);\r
331                                                                 m->mothurOut("Unable to open " + oligosFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();\r
332                                                                 ifstream in2;\r
333                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");\r
334                                                                 in2.close();\r
335                                                                 oligosFileNames[i] = tryPath;\r
336                                                         }\r
337                                                 }\r
338                                                 in.close();\r
339                                                 \r
340                                                 if (ableToOpen == 1) { \r
341                                                         m->mothurOut("Unable to open " + oligosFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();\r
342                                                         //erase from file list\r
343                                                         oligosFileNames.erase(oligosFileNames.begin()+i);\r
344                                                         i--;\r
345                                                 }\r
346                                         }\r
347                                 }\r
348                                 \r
349                                 //make sure there is at least one valid file left\r
350                                 if (oligosFileNames.size() == 0) { m->mothurOut("no valid oligos files."); m->mothurOutEndLine(); abort = true; }\r
351                         }\r
352 \r
353                         if (hasOligos) {\r
354                 split = 2;\r
355                                 if (oligosFileNames.size() != filenames.size()) { abort = true; m->mothurOut("If you provide a oligos file, you must have one for each sff file."); m->mothurOutEndLine(); }\r
356                         }\r
357             \r
358                         if (hasAccnos) {\r
359                                 if (accnosFileNames.size() != filenames.size()) { abort = true; m->mothurOut("If you provide a accnos file, you must have one for each sff file."); m->mothurOutEndLine(); }\r
360                         }\r
361                         \r
362                         string temp = validParameter.validFile(parameters, "qfile", false);                     if (temp == "not found"){       temp = "T";                             }\r
363                         qual = m->isTrue(temp); \r
364                         \r
365                         temp = validParameter.validFile(parameters, "fasta", false);                            if (temp == "not found"){       temp = "T";                             }\r
366                         fasta = m->isTrue(temp); \r
367                         \r
368                         temp = validParameter.validFile(parameters, "flow", false);                                     if (temp == "not found"){       temp = "T";                             }\r
369                         flow = m->isTrue(temp); \r
370                         \r
371                         temp = validParameter.validFile(parameters, "trim", false);                                     if (temp == "not found"){       temp = "T";                             }\r
372                         trim = m->isTrue(temp); \r
373             \r
374             temp = validParameter.validFile(parameters, "bdiffs", false);               if (temp == "not found") { temp = "0"; }\r
375                         m->mothurConvert(temp, bdiffs);\r
376                         \r
377                         temp = validParameter.validFile(parameters, "pdiffs", false);           if (temp == "not found") { temp = "0"; }\r
378                         m->mothurConvert(temp, pdiffs);\r
379             \r
380             temp = validParameter.validFile(parameters, "ldiffs", false);               if (temp == "not found") { temp = "0"; }\r
381                         m->mothurConvert(temp, ldiffs);\r
382             \r
383             temp = validParameter.validFile(parameters, "sdiffs", false);               if (temp == "not found") { temp = "0"; }\r
384                         m->mothurConvert(temp, sdiffs);\r
385                         \r
386                         temp = validParameter.validFile(parameters, "tdiffs", false);           if (temp == "not found") { int tempTotal = pdiffs + bdiffs + ldiffs + sdiffs;  temp = toString(tempTotal); }\r
387                         m->mothurConvert(temp, tdiffs);\r
388                         \r
389                         if(tdiffs == 0){        tdiffs = bdiffs + pdiffs + ldiffs + sdiffs;     }\r
390             \r
391                         temp = validParameter.validFile(parameters, "sfftxt", false);                           \r
392                         if (temp == "not found")        {       temp = "F";      sfftxt = false; sfftxtFilename = "";           }\r
393                         else if (m->isTrue(temp))       {       sfftxt = true;          sfftxtFilename = "";                            }\r
394                         else {\r
395                                 //you are a filename\r
396                                 if (inputDir != "") {\r
397                                         map<string,string>::iterator it = parameters.find("sfftxt");\r
398                                         //user has given a template file\r
399                                         if(it != parameters.end()){ \r
400                                                 string path = m->hasPath(it->second);\r
401                                                 //if the user has not given a path then, add inputdir. else leave path alone.\r
402                                                 if (path == "") {       parameters["sfftxt"] = inputDir + it->second;           }\r
403                                         }\r
404                                 }\r
405                                 \r
406                                 sfftxtFilename = validParameter.validFile(parameters, "sfftxt", true);\r
407                                 if (sfftxtFilename == "not found") { sfftxtFilename = "";  }\r
408                                 else if (sfftxtFilename == "not open") { sfftxtFilename = "";  }\r
409                         }\r
410                         \r
411                         if ((sfftxtFilename == "") && (filenames.size() == 0)) {  \r
412                                 //if there is a current sff file, use it\r
413                                 string filename = m->getSFFFile(); \r
414                                 if (filename != "") { filenames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the sff parameter."); m->mothurOutEndLine(); }\r
415                                 else {  m->mothurOut("[ERROR]: you must provide a valid sff or sfftxt file."); m->mothurOutEndLine(); abort=true;  }\r
416                         }\r
417             \r
418             \r
419                 }\r
420         }\r
421         catch(exception& e) {\r
422                 m->errorOut(e, "SffInfoCommand", "SffInfoCommand");\r
423                 exit(1);\r
424         }\r
425 }\r
426 //**********************************************************************************************************************\r
427 int SffInfoCommand::execute(){\r
428         try {\r
429                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }\r
430                 \r
431                 for (int s = 0; s < filenames.size(); s++) {\r
432                         \r
433                         if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]);        } return 0; }\r
434                         \r
435                         int start = time(NULL);\r
436                         \r
437             filenames[s] = m->getFullPathName(filenames[s]);\r
438                         m->mothurOut("Extracting info from " + filenames[s] + " ..." ); m->mothurOutEndLine();\r
439                         \r
440                         string accnos = "";\r
441                         if (hasAccnos) { accnos = accnosFileNames[s]; }\r
442             \r
443             string oligos = "";\r
444             if (hasOligos) { oligos = oligosFileNames[s]; }\r
445                         \r
446                         int numReads = extractSffInfo(filenames[s], accnos, oligos);\r
447 \r
448                         m->mothurOut("It took " + toString(time(NULL) - start) + " secs to extract " + toString(numReads) + ".");\r
449                 }\r
450                 \r
451                 if (sfftxtFilename != "") {  parseSffTxt(); }\r
452                 \r
453                 if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]);        } return 0; }\r
454                 \r
455                 //set fasta file as new current fastafile\r
456                 string current = "";\r
457                 itTypes = outputTypes.find("fasta");\r
458                 if (itTypes != outputTypes.end()) {\r
459                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }\r
460                 }\r
461                 \r
462                 itTypes = outputTypes.find("qfile");\r
463                 if (itTypes != outputTypes.end()) {\r
464                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); }\r
465                 }\r
466                 \r
467                 itTypes = outputTypes.find("flow");\r
468                 if (itTypes != outputTypes.end()) {\r
469                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFlowFile(current); }\r
470                 }\r
471                 \r
472                 //report output filenames\r
473                 m->mothurOutEndLine();\r
474                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();\r
475                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }\r
476                 m->mothurOutEndLine();\r
477 \r
478                 return 0;\r
479         }\r
480         catch(exception& e) {\r
481                 m->errorOut(e, "SffInfoCommand", "execute");\r
482                 exit(1);\r
483         }\r
484 }\r
485 //**********************************************************************************************************************\r
486 int SffInfoCommand::extractSffInfo(string input, string accnos, string oligos){\r
487         try {\r
488                 currentFileName = input;\r
489                 if (outputDir == "") {  outputDir += m->hasPath(input); }\r
490                 \r
491                 if (accnos != "")       {  readAccnosFile(accnos);  }\r
492                 else                            {       seqNames.clear();               }\r
493          \r
494         if (oligos != "")   {   readOligos(oligos);  split = 2;   }\r
495 \r
496                 ofstream outSfftxt, outFasta, outQual, outFlow;\r
497                 string outFastaFileName, outQualFileName;\r
498         string rootName = outputDir + m->getRootName(m->getSimpleName(input));\r
499         if(rootName.find_last_of(".") == rootName.npos){ rootName += "."; }\r
500         \r
501         map<string, string> variables; \r
502                 variables["[filename]"] = rootName;\r
503                 string sfftxtFileName = getOutputFileName("sfftxt",variables);\r
504                 string outFlowFileName = getOutputFileName("flow",variables);\r
505                 if (!trim) { variables["[tag]"] = "raw"; }\r
506                 outFastaFileName = getOutputFileName("fasta",variables);\r
507         outQualFileName = getOutputFileName("qfile",variables);\r
508         \r
509                 if (sfftxt) { m->openOutputFile(sfftxtFileName, outSfftxt); outSfftxt.setf(ios::fixed, ios::floatfield); outSfftxt.setf(ios::showpoint);  outputNames.push_back(sfftxtFileName);  outputTypes["sfftxt"].push_back(sfftxtFileName); }\r
510                 if (fasta)      { m->openOutputFile(outFastaFileName, outFasta);        outputNames.push_back(outFastaFileName); outputTypes["fasta"].push_back(outFastaFileName); }\r
511                 if (qual)       { m->openOutputFile(outQualFileName, outQual);          outputNames.push_back(outQualFileName); outputTypes["qfile"].push_back(outQualFileName);  }\r
512                 if (flow)       { m->openOutputFile(outFlowFileName, outFlow);          outputNames.push_back(outFlowFileName);  outFlow.setf(ios::fixed, ios::floatfield); outFlow.setf(ios::showpoint); outputTypes["flow"].push_back(outFlowFileName);  }\r
513                 \r
514                 ifstream in;\r
515                 m->openInputFileBinary(input, in);\r
516                 \r
517                 CommonHeader header;\r
518                 readCommonHeader(in, header);\r
519         \r
520                 int count = 0;\r
521                 \r
522                 //check magic number and version\r
523                 if (header.magicNumber != 779314790) { m->mothurOut("Magic Number is not correct, not a valid .sff file"); m->mothurOutEndLine(); return count; }\r
524                 if (header.version != "0001") { m->mothurOut("Version is not supported, only support version 0001."); m->mothurOutEndLine(); return count; }\r
525         \r
526                 //print common header\r
527                 if (sfftxt) {   printCommonHeader(outSfftxt, header);           }\r
528                 if (flow)       {       outFlow << header.numFlowsPerRead << endl;      }\r
529                         \r
530                 //read through the sff file\r
531                 while (!in.eof()) {\r
532                         \r
533                         bool print = true;\r
534                                                 \r
535                         //read data\r
536                         seqRead read;  Header readheader;\r
537             readSeqData(in, read, header.numFlowsPerRead, readheader);\r
538             \r
539             bool okay = sanityCheck(readheader, read);\r
540             if (!okay) { break; }\r
541             \r
542                         //if you have provided an accosfile and this seq is not in it, then dont print\r
543                         if (seqNames.size() != 0) {   if (seqNames.count(readheader.name) == 0) { print = false; }  }\r
544                         \r
545                         //print \r
546                         if (print) {\r
547                                 if (sfftxt) { printHeader(outSfftxt, readheader); printSffTxtSeqData(outSfftxt, read, readheader); }\r
548                                 if (fasta)      {       printFastaSeqData(outFasta, read, readheader);  }\r
549                                 if (qual)       {       printQualSeqData(outQual, read, readheader);    }\r
550                                 if (flow)       {       printFlowSeqData(outFlow, read, readheader);    }\r
551                         }\r
552                         \r
553                         count++;\r
554         \r
555                         //report progress\r
556                         if((count+1) % 10000 == 0){     m->mothurOut(toString(count+1)); m->mothurOutEndLine();         }\r
557                 \r
558                         if (m->control_pressed) { count = 0; break;   }\r
559                         \r
560                         if (count >= header.numReads) { break; }\r
561                 }\r
562                 \r
563                 //report progress\r
564                 if (!m->control_pressed) {   if((count) % 10000 != 0){  m->mothurOut(toString(count)); m->mothurOutEndLine();           }  }\r
565                 \r
566                 in.close();\r
567                 \r
568                 if (sfftxt) {  outSfftxt.close();       }\r
569                 if (fasta)      {  outFasta.close();    }\r
570                 if (qual)       {  outQual.close();             }\r
571                 if (flow)       {  outFlow.close();             }\r
572                 \r
573         if (split > 1) {\r
574             //create new common headers for each file with the correct number of reads\r
575             adjustCommonHeader(header);\r
576             \r
577             //close files and delete ofstreams\r
578             for(int i=0;i<filehandles.size();i++){\r
579                                 for(int j=0;j<filehandles[0].size();j++){\r
580                     (filehandles[i][j].begin()->second)->close(); delete (filehandles[i][j].begin()->second);\r
581                     (filehandlesHeaders[i][j].begin()->second)->close(); delete (filehandlesHeaders[i][j].begin()->second);\r
582                 }\r
583             }\r
584             \r
585             //cout << "here" << endl;\r
586                         map<string, string>::iterator it;\r
587                         set<string> namesToRemove;\r
588                         for(int i=0;i<filehandles.size();i++){\r
589                                 for(int j=0;j<filehandles[0].size();j++){\r
590                     //cout << i << '\t' << '\t' << j  << '\t' << filehandles[i][j] << endl;\r
591                                         if (filehandles[i][j].begin()->first != "") {\r
592                                                 if (namesToRemove.count(filehandles[i][j].begin()->first) == 0) {\r
593                                                         if(m->isBlank(filehandles[i][j].begin()->first)){\r
594                                 //cout << i << '\t' << '\t' << j  << '\t' << filehandles[i][j] << " is blank removing" << endl;\r
595                                                                 m->mothurRemove(filehandles[i][j].begin()->first);\r
596                                 m->mothurRemove(filehandlesHeaders[i][j].begin()->first);\r
597                                                                 namesToRemove.insert(filehandles[i][j].begin()->first);\r
598                             }\r
599                                                 }\r
600                                         }\r
601                                 }\r
602                         }\r
603             //cout << "here2" << endl;\r
604             //append new header to reads\r
605             for (int i = 0; i < filehandles.size(); i++) {\r
606                 for (int j = 0; j < filehandles[i].size(); j++) {\r
607                     m->appendBinaryFiles(filehandles[i][j].begin()->first, filehandlesHeaders[i][j].begin()->first);\r
608                     m->renameFile(filehandlesHeaders[i][j].begin()->first, filehandles[i][j].begin()->first);\r
609                     m->mothurRemove(filehandlesHeaders[i][j].begin()->first);\r
610                     //cout << i << '\t' << '\t' << j  << '\t' << filehandles[i][j] << " done appending headers and removing " << filehandlesHeaders[i][j] << endl;\r
611                     if (numSplitReads[i][j] == 0) { m->mothurRemove(filehandles[i][j].begin()->first); }\r
612                 }\r
613             }\r
614                         //cout << "here3" << endl;\r
615                         //remove names for outputFileNames, just cleans up the output\r
616                         for(int i = 0; i < outputNames.size(); i++) { \r
617                 if (namesToRemove.count(outputNames[i]) != 0) {\r
618                     //cout << "erasing " << i << '\t' << outputNames[i] << endl;\r
619                     outputNames.erase(outputNames.begin()+i);\r
620                     i--;\r
621                 } \r
622             }\r
623             //cout << "here4" << endl;\r
624             if(m->isBlank(noMatchFile)){  m->mothurRemove(noMatchFile); }\r
625             else { outputNames.push_back(noMatchFile); outputTypes["sff"].push_back(noMatchFile); }\r
626         }\r
627         \r
628                 return count;\r
629         }\r
630         catch(exception& e) {\r
631                 m->errorOut(e, "SffInfoCommand", "extractSffInfo");\r
632                 exit(1);\r
633         }\r
634 }\r
635 //**********************************************************************************************************************\r
636 int SffInfoCommand::readCommonHeader(ifstream& in, CommonHeader& header){\r
637         try {\r
638         \r
639                 if (!in.eof()) {\r
640 \r
641                         //read magic number\r
642                         char buffer[4];\r
643                         in.read(buffer, 4);\r
644                         header.magicNumber = be_int4(*(unsigned int *)(&buffer));\r
645             \r
646                         //read version\r
647                         char buffer9[4];\r
648                         in.read(buffer9, 4);\r
649                         header.version = "";\r
650                         for (int i = 0; i < 4; i++) {  header.version += toString((int)(buffer9[i]));  }\r
651     \r
652                         //read offset\r
653                         char buffer2 [8];\r
654                         in.read(buffer2, 8);\r
655                         header.indexOffset =  be_int8(*(unsigned long long *)(&buffer2));\r
656                         \r
657                         //read index length\r
658                         char buffer3 [4];\r
659                         in.read(buffer3, 4);\r
660                         header.indexLength =  be_int4(*(unsigned int *)(&buffer3));\r
661             \r
662                         //read num reads\r
663                         char buffer4 [4];\r
664                         in.read(buffer4, 4);\r
665                         header.numReads =  be_int4(*(unsigned int *)(&buffer4));\r
666             \r
667             if (m->debug) { m->mothurOut("[DEBUG]: numReads = " + toString(header.numReads) + "\n"); }\r
668                                 \r
669                         //read header length\r
670                         char buffer5 [2];\r
671                         in.read(buffer5, 2);\r
672                         header.headerLength =  be_int2(*(unsigned short *)(&buffer5));\r
673                                         \r
674                         //read key length\r
675                         char buffer6 [2];\r
676                         in.read(buffer6, 2);\r
677                         header.keyLength = be_int2(*(unsigned short *)(&buffer6));\r
678                         \r
679                         //read number of flow reads\r
680                         char buffer7 [2];\r
681                         in.read(buffer7, 2);\r
682                         header.numFlowsPerRead =  be_int2(*(unsigned short *)(&buffer7));\r
683                                 \r
684                         //read format code\r
685                         char buffer8 [1];\r
686                         in.read(buffer8, 1);\r
687                         header.flogramFormatCode = (int)(buffer8[0]);\r
688                         \r
689                         //read flow chars\r
690                         char* tempBuffer = new char[header.numFlowsPerRead];\r
691                         in.read(&(*tempBuffer), header.numFlowsPerRead); \r
692                         header.flowChars = tempBuffer;\r
693                         if (header.flowChars.length() > header.numFlowsPerRead) { header.flowChars = header.flowChars.substr(0, header.numFlowsPerRead);  }\r
694                         delete[] tempBuffer;\r
695                         \r
696                         //read key\r
697                         char* tempBuffer2 = new char[header.keyLength];\r
698                         in.read(&(*tempBuffer2), header.keyLength);\r
699                         header.keySequence = tempBuffer2;\r
700                         if (header.keySequence.length() > header.keyLength) { header.keySequence = header.keySequence.substr(0, header.keyLength);  }\r
701                         delete[] tempBuffer2;\r
702                         \r
703                         /* Pad to 8 chars */\r
704                         unsigned long long spotInFile = in.tellg();\r
705                         unsigned long long spot = (spotInFile + 7)& ~7;  // ~ inverts\r
706                         in.seekg(spot);\r
707             \r
708         }else{\r
709                         m->mothurOut("Error reading sff common header."); m->mothurOutEndLine();\r
710                 }\r
711         \r
712                 return 0;\r
713         \r
714         }\r
715         catch(exception& e) {\r
716                 m->errorOut(e, "SffInfoCommand", "readCommonHeader");\r
717                 exit(1);\r
718         }\r
719 }\r
720 //**********************************************************************************************************************\r
721 int SffInfoCommand::adjustCommonHeader(CommonHeader header){\r
722         try {\r
723         string endian = m->findEdianness();\r
724         char* mybuffer = new char[4];\r
725         ifstream in;\r
726         m->openInputFileBinary(currentFileName, in);\r
727         \r
728         ofstream outNoMatchHeader;\r
729         string tempNoHeader = "tempNoMatchHeader";\r
730         m->openOutputFileBinary(tempNoHeader, outNoMatchHeader);\r
731         \r
732         //magic number\r
733         in.read(mybuffer,4);\r
734         for (int i = 0; i < filehandlesHeaders.size(); i++) {  \r
735             for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
736                 (*(filehandlesHeaders[i][j].begin()->second)).write(mybuffer, in.gcount());\r
737             }\r
738         }\r
739         outNoMatchHeader.write(mybuffer, in.gcount());\r
740         delete[] mybuffer;\r
741         \r
742         //version\r
743         mybuffer = new char[4];\r
744         in.read(mybuffer,4);\r
745         for (int i = 0; i < filehandlesHeaders.size(); i++) {  \r
746             for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
747                (*(filehandlesHeaders[i][j].begin()->second)).write(mybuffer, in.gcount());\r
748             }\r
749         }\r
750         outNoMatchHeader.write(mybuffer, in.gcount());\r
751         delete[] mybuffer;\r
752         \r
753         //offset\r
754         mybuffer = new char[8];\r
755         in.read(mybuffer,8);\r
756         unsigned long long offset = 0;\r
757         char* thisbuffer = new char[8];\r
758         thisbuffer[0] = (offset >> 56) & 0xFF;\r
759         thisbuffer[1] = (offset >> 48) & 0xFF;\r
760         thisbuffer[2] = (offset >> 40) & 0xFF;\r
761         thisbuffer[3] = (offset >> 32) & 0xFF;\r
762         thisbuffer[4] = (offset >> 24) & 0xFF;\r
763         thisbuffer[5] = (offset >> 16) & 0xFF;\r
764         thisbuffer[6] = (offset >> 8) & 0xFF;\r
765         thisbuffer[7] = offset & 0xFF;\r
766         for (int i = 0; i < filehandlesHeaders.size(); i++) {\r
767             for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
768                 (*(filehandlesHeaders[i][j].begin()->second)).write(thisbuffer, 8);\r
769             }\r
770         }\r
771         outNoMatchHeader.write(thisbuffer, 8);\r
772         delete[] thisbuffer;\r
773         delete[] mybuffer;\r
774             \r
775                         \r
776         //read index length\r
777                 mybuffer = new char[4];\r
778         in.read(mybuffer,4);\r
779         offset = 0;\r
780         char* thisbuffer2 = new char[4];\r
781         thisbuffer2[0] = (offset >> 24) & 0xFF;\r
782         thisbuffer2[1] = (offset >> 16) & 0xFF;\r
783         thisbuffer2[2] = (offset >> 8) & 0xFF;\r
784         thisbuffer2[3] = offset & 0xFF;\r
785         for (int i = 0; i < filehandlesHeaders.size(); i++) {\r
786             for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
787                 (*(filehandlesHeaders[i][j].begin()->second)).write(thisbuffer2, 4);\r
788             }\r
789         }\r
790         outNoMatchHeader.write(thisbuffer2, 4);\r
791         delete[] thisbuffer2;\r
792         delete[] mybuffer;\r
793                 \r
794         //change num reads\r
795         mybuffer = new char[4];\r
796         in.read(mybuffer,4);\r
797         delete[] mybuffer;\r
798         for (int i = 0; i < filehandlesHeaders.size(); i++) {  \r
799             for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
800                 char* thisbuffer = new char[4];\r
801                 if (endian == "BIG_ENDIAN") {\r
802                     thisbuffer[0] = (numSplitReads[i][j] >> 24) & 0xFF;\r
803                     thisbuffer[1] = (numSplitReads[i][j] >> 16) & 0xFF;\r
804                     thisbuffer[2] = (numSplitReads[i][j] >> 8) & 0xFF;\r
805                     thisbuffer[3] = numSplitReads[i][j] & 0xFF;\r
806                 }else {\r
807                     thisbuffer[0] = numSplitReads[i][j] & 0xFF;\r
808                     thisbuffer[1] = (numSplitReads[i][j] >> 8) & 0xFF;\r
809                     thisbuffer[2] = (numSplitReads[i][j] >> 16) & 0xFF;\r
810                     thisbuffer[3] = (numSplitReads[i][j] >> 24) & 0xFF;\r
811                  }\r
812                 (*(filehandlesHeaders[i][j].begin()->second)).write(thisbuffer, 4);\r
813                 delete[] thisbuffer;\r
814             }\r
815         }\r
816         char* thisbuffer3 = new char[4];\r
817         if (endian == "BIG_ENDIAN") {\r
818             thisbuffer3[0] = (numNoMatch >> 24) & 0xFF;\r
819             thisbuffer3[1] = (numNoMatch >> 16) & 0xFF;\r
820             thisbuffer3[2] = (numNoMatch >> 8) & 0xFF;\r
821             thisbuffer3[3] = numNoMatch & 0xFF;\r
822         }else {\r
823             thisbuffer3[0] = numNoMatch & 0xFF;\r
824             thisbuffer3[1] = (numNoMatch >> 8) & 0xFF;\r
825             thisbuffer3[2] = (numNoMatch >> 16) & 0xFF;\r
826             thisbuffer3[3] = (numNoMatch >> 24) & 0xFF;\r
827         }\r
828         outNoMatchHeader.write(thisbuffer3, 4);\r
829         delete[] thisbuffer3;\r
830         \r
831         \r
832         //read header length\r
833         mybuffer = new char[2];\r
834         in.read(mybuffer,2);\r
835         for (int i = 0; i < filehandlesHeaders.size(); i++) {  \r
836             for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
837                 (*(filehandlesHeaders[i][j].begin()->second)).write(mybuffer, in.gcount());\r
838             }\r
839         }\r
840         outNoMatchHeader.write(mybuffer, in.gcount());\r
841         delete[] mybuffer;\r
842             \r
843         //read key length\r
844         mybuffer = new char[2];\r
845         in.read(mybuffer,2);\r
846         for (int i = 0; i < filehandlesHeaders.size(); i++) {  \r
847             for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
848                 (*(filehandlesHeaders[i][j].begin()->second)).write(mybuffer, in.gcount());\r
849             }\r
850         }\r
851         outNoMatchHeader.write(mybuffer, in.gcount());\r
852         delete[] mybuffer;\r
853                         \r
854         //read number of flow reads\r
855         mybuffer = new char[2];\r
856         in.read(mybuffer,2);\r
857         for (int i = 0; i < filehandlesHeaders.size(); i++) {  \r
858             for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
859                 (*(filehandlesHeaders[i][j].begin()->second)).write(mybuffer, in.gcount());\r
860             }\r
861         }\r
862         outNoMatchHeader.write(mybuffer, in.gcount());\r
863         delete[] mybuffer;\r
864             \r
865         //read format code\r
866         mybuffer = new char[1];\r
867         in.read(mybuffer,1);\r
868         for (int i = 0; i < filehandlesHeaders.size(); i++) {  \r
869             for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
870                 (*(filehandlesHeaders[i][j].begin()->second)).write(mybuffer, in.gcount());\r
871             }\r
872         }\r
873         outNoMatchHeader.write(mybuffer, in.gcount());\r
874         delete[] mybuffer;\r
875                         \r
876         //read flow chars\r
877         mybuffer = new char[header.numFlowsPerRead];\r
878         in.read(mybuffer,header.numFlowsPerRead);\r
879         for (int i = 0; i < filehandlesHeaders.size(); i++) {  \r
880             for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
881                 (*(filehandlesHeaders[i][j].begin()->second)).write(mybuffer, in.gcount());\r
882             }\r
883         }\r
884         outNoMatchHeader.write(mybuffer, in.gcount());\r
885         delete[] mybuffer;\r
886                         \r
887         //read key\r
888         mybuffer = new char[header.keyLength];\r
889         in.read(mybuffer,header.keyLength);\r
890         for (int i = 0; i < filehandlesHeaders.size(); i++) {  \r
891             for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
892                 (*(filehandlesHeaders[i][j].begin()->second)).write(mybuffer, in.gcount());\r
893             }\r
894         }\r
895         outNoMatchHeader.write(mybuffer, in.gcount());\r
896         delete[] mybuffer;\r
897         \r
898                         \r
899         /* Pad to 8 chars */\r
900         unsigned long long spotInFile = in.tellg();\r
901         unsigned long long spot = (spotInFile + 7)& ~7;  // ~ inverts\r
902         in.seekg(spot);\r
903         \r
904         mybuffer = new char[spot-spotInFile];\r
905         for (int i = 0; i < filehandlesHeaders.size(); i++) { \r
906             for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
907                 (*(filehandlesHeaders[i][j].begin()->second)).write(mybuffer, spot-spotInFile);\r
908             }\r
909         }\r
910         outNoMatchHeader.write(mybuffer, spot-spotInFile);\r
911         outNoMatchHeader.close();\r
912         delete[] mybuffer;\r
913         in.close();\r
914         \r
915         m->appendBinaryFiles(noMatchFile, tempNoHeader);\r
916         m->renameFile(tempNoHeader, noMatchFile);\r
917         m->mothurRemove(tempNoHeader);\r
918         \r
919                 return 0;\r
920         \r
921         }\r
922         catch(exception& e) {\r
923                 m->errorOut(e, "SffInfoCommand", "adjustCommonHeader");\r
924                 exit(1);\r
925         }\r
926 }\r
927 //**********************************************************************************************************************\r
928 bool SffInfoCommand::readSeqData(ifstream& in, seqRead& read, int numFlowReads, Header& header){\r
929         try {\r
930         unsigned long long startSpotInFile = in.tellg();\r
931                 if (!in.eof()) {\r
932             \r
933             /*****************************************/\r
934             //read header\r
935             \r
936             //read header length\r
937                         char buffer [2];\r
938                         in.read(buffer, 2); \r
939                         header.headerLength = be_int2(*(unsigned short *)(&buffer));\r
940             \r
941                         //read name length\r
942                         char buffer2 [2];\r
943                         in.read(buffer2, 2);\r
944                         header.nameLength = be_int2(*(unsigned short *)(&buffer2));\r
945             \r
946                         //read num bases\r
947                         char buffer3 [4];\r
948                         in.read(buffer3, 4);\r
949                         header.numBases =  be_int4(*(unsigned int *)(&buffer3));\r
950             \r
951                         \r
952                         //read clip qual left\r
953                         char buffer4 [2];\r
954                         in.read(buffer4, 2);\r
955                         header.clipQualLeft =  be_int2(*(unsigned short *)(&buffer4));\r
956                         header.clipQualLeft = 5;\r
957             \r
958                         \r
959                         //read clip qual right\r
960                         char buffer5 [2];\r
961                         in.read(buffer5, 2);\r
962                         header.clipQualRight =  be_int2(*(unsigned short *)(&buffer5));\r
963            \r
964             \r
965                         //read clipAdapterLeft\r
966                         char buffer6 [2];\r
967                         in.read(buffer6, 2);\r
968                         header.clipAdapterLeft = be_int2(*(unsigned short *)(&buffer6));\r
969             \r
970             \r
971                         //read clipAdapterRight\r
972                         char buffer7 [2];\r
973                         in.read(buffer7, 2);\r
974                         header.clipAdapterRight = be_int2(*(unsigned short *)(&buffer7));\r
975             \r
976             \r
977                         //read name\r
978                         char* tempBuffer = new char[header.nameLength];\r
979                         in.read(&(*tempBuffer), header.nameLength);\r
980                         header.name = tempBuffer;\r
981                         if (header.name.length() > header.nameLength) { header.name = header.name.substr(0, header.nameLength);  }\r
982             \r
983                         delete[] tempBuffer;\r
984                         \r
985                         //extract info from name\r
986                         decodeName(header.timestamp, header.region, header.xy, header.name);\r
987                         \r
988                         /* Pad to 8 chars */\r
989                         unsigned long long spotInFile = in.tellg();\r
990                         unsigned long long spot = (spotInFile + 7)& ~7;\r
991                         in.seekg(spot);\r
992 \r
993             /*****************************************/\r
994             //sequence read \r
995             \r
996                         //read flowgram\r
997                         read.flowgram.resize(numFlowReads);\r
998                         for (int i = 0; i < numFlowReads; i++) {  \r
999                                 char buffer [2];\r
1000                                 in.read(buffer, 2);\r
1001                                 read.flowgram[i] = be_int2(*(unsigned short *)(&buffer));\r
1002                         }\r
1003             \r
1004                         //read flowIndex\r
1005                         read.flowIndex.resize(header.numBases);\r
1006                         for (int i = 0; i < header.numBases; i++) {  \r
1007                                 char temp[1];\r
1008                                 in.read(temp, 1);\r
1009                                 read.flowIndex[i] = be_int1(*(unsigned char *)(&temp));\r
1010                         }\r
1011         \r
1012                         //read bases\r
1013                         char* tempBuffer6 = new char[header.numBases];\r
1014                         in.read(&(*tempBuffer6), header.numBases);\r
1015                         read.bases = tempBuffer6;\r
1016                         if (read.bases.length() > header.numBases) { read.bases = read.bases.substr(0, header.numBases);  }\r
1017                         delete[] tempBuffer6;\r
1018 \r
1019                         //read qual scores\r
1020                         read.qualScores.resize(header.numBases);\r
1021                         for (int i = 0; i < header.numBases; i++) {  \r
1022                                 char temp[1];\r
1023                                 in.read(temp, 1);\r
1024                                 read.qualScores[i] = be_int1(*(unsigned char *)(&temp));\r
1025                         }\r
1026         \r
1027                         /* Pad to 8 chars */\r
1028                         spotInFile = in.tellg();\r
1029                         spot = (spotInFile + 7)& ~7;\r
1030                         in.seekg(spot);\r
1031             \r
1032             if (split > 1) { \r
1033                \r
1034                 int barcodeIndex, primerIndex;\r
1035                 int trashCodeLength = findGroup(header, read, barcodeIndex, primerIndex);\r
1036                                 \r
1037                 char * mybuffer;\r
1038                 mybuffer = new char [spot-startSpotInFile];\r
1039                 \r
1040                 ifstream in2;\r
1041                 m->openInputFileBinary(currentFileName, in2);\r
1042                 in2.seekg(startSpotInFile);\r
1043                 in2.read(mybuffer,spot-startSpotInFile);\r
1044                 \r
1045                 \r
1046                 if(trashCodeLength == 0){\r
1047                     (*(filehandles[barcodeIndex][primerIndex].begin()->second)).write(mybuffer, in2.gcount());\r
1048                     numSplitReads[barcodeIndex][primerIndex]++;\r
1049                                 }\r
1050                                 else{\r
1051                                         ofstream out;\r
1052                     m->openOutputFileBinaryAppend(noMatchFile, out);\r
1053                     out.write(mybuffer, in2.gcount());\r
1054                     out.close();\r
1055                     numNoMatch++;\r
1056                                 }\r
1057                                 delete[] mybuffer;\r
1058                 in2.close();\r
1059         }    \r
1060             \r
1061                 }else{\r
1062                         m->mothurOut("Error reading."); m->mothurOutEndLine();\r
1063                 }\r
1064         \r
1065         if (in.eof()) {  return true; }\r
1066         \r
1067                 return false;\r
1068         }\r
1069         catch(exception& e) {\r
1070                 m->errorOut(e, "SffInfoCommand", "readSeqData");\r
1071                 exit(1);\r
1072         }\r
1073 }\r
1074 //**********************************************************************************************************************\r
1075 int SffInfoCommand::findGroup(Header header, seqRead read, int& barcode, int& primer) {\r
1076         try {\r
1077         //find group read belongs to\r
1078         TrimOligos trimOligos(pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, revPrimer, linker, spacer);\r
1079         \r
1080         int success = 1;\r
1081         string trashCode = "";\r
1082         int currentSeqsDiffs = 0;\r
1083         \r
1084         string seq = read.bases;\r
1085         \r
1086         if (trim) {\r
1087             if(header.clipQualRight < header.clipQualLeft){\r
1088                 if (header.clipQualRight == 0) { //don't trim right\r
1089                     seq = seq.substr(header.clipQualLeft-1);\r
1090                 }else {\r
1091                     seq = "NNNN";\r
1092                 }\r
1093             }\r
1094             else if((header.clipQualRight != 0) && ((header.clipQualRight-header.clipQualLeft) >= 0)){\r
1095                 seq = seq.substr((header.clipQualLeft-1), (header.clipQualRight-header.clipQualLeft));\r
1096             }\r
1097             else {\r
1098                 seq = seq.substr(header.clipQualLeft-1);\r
1099             }\r
1100         }else{\r
1101             //if you wanted the sfftxt then you already converted the bases to the right case\r
1102             if (!sfftxt) {\r
1103                 int endValue = header.clipQualRight;\r
1104                 //make the bases you want to clip lowercase and the bases you want to keep upper case\r
1105                 if(endValue == 0){      endValue = seq.length();        }\r
1106                 for (int i = 0; i < (header.clipQualLeft-1); i++) { seq[i] = tolower(seq[i]);  }\r
1107                 for (int i = (header.clipQualLeft-1); i < (endValue-1); i++)  {   seq[i] = toupper(seq[i]);  }\r
1108                 for (int i = (endValue-1); i < seq.length(); i++) {   seq[i] = tolower(seq[i]);  }\r
1109             }\r
1110         }\r
1111         \r
1112         Sequence currSeq(header.name, seq);\r
1113         QualityScores currQual;\r
1114         \r
1115         if(numLinkers != 0){\r
1116             success = trimOligos.stripLinker(currSeq, currQual);\r
1117             if(success > ldiffs)                {       trashCode += 'k';       }\r
1118             else{ currentSeqsDiffs += success;  }\r
1119             \r
1120         }\r
1121         \r
1122         if(barcodes.size() != 0){\r
1123             success = trimOligos.stripBarcode(currSeq, currQual, barcode);\r
1124             if(success > bdiffs)                {       trashCode += 'b';       }\r
1125             else{ currentSeqsDiffs += success;  }\r
1126         }\r
1127         \r
1128         if(numSpacers != 0){\r
1129             success = trimOligos.stripSpacer(currSeq, currQual);\r
1130             if(success > sdiffs)                {       trashCode += 's';       }\r
1131             else{ currentSeqsDiffs += success;  }\r
1132             \r
1133         }\r
1134         \r
1135         if(numFPrimers != 0){\r
1136             success = trimOligos.stripForward(currSeq, currQual, primer, true);\r
1137             if(success > pdiffs)                {       trashCode += 'f';       }\r
1138             else{ currentSeqsDiffs += success;  }\r
1139         }\r
1140         \r
1141         if (currentSeqsDiffs > tdiffs)  {       trashCode += 't';   }\r
1142         \r
1143         if(revPrimer.size() != 0){\r
1144             success = trimOligos.stripReverse(currSeq, currQual);\r
1145             if(!success)                                {       trashCode += 'r';       }\r
1146         }\r
1147 \r
1148         \r
1149         return trashCode.length();\r
1150     }\r
1151         catch(exception& e) {\r
1152                 m->errorOut(e, "SffInfoCommand", "findGroup");\r
1153                 exit(1);\r
1154         }\r
1155 }     \r
1156 //**********************************************************************************************************************\r
1157 int SffInfoCommand::decodeName(string& timestamp, string& region, string& xy, string name) {\r
1158         try {\r
1159                 \r
1160                 if (name.length() >= 6) {\r
1161                         string time = name.substr(0, 6);\r
1162                         unsigned int timeNum = m->fromBase36(time);\r
1163                         \r
1164                         int q1 = timeNum / 60;\r
1165                         int sec = timeNum - 60 * q1;\r
1166                         int q2 = q1 / 60;\r
1167                         int minute = q1 - 60 * q2;\r
1168                         int q3 = q2 / 24;\r
1169                         int hr = q2 - 24 * q3;\r
1170                         int q4 = q3 / 32;\r
1171                         int day = q3 - 32 * q4;\r
1172                         int q5 = q4 / 13;\r
1173                         int mon = q4 - 13 * q5;\r
1174                         int year = 2000 + q5;\r
1175                 \r
1176                         timestamp = toString(year) + "_" + toString(mon) + "_" + toString(day) + "_" + toString(hr) + "_" + toString(minute) + "_" + toString(sec);\r
1177                 }\r
1178                 \r
1179                 if (name.length() >= 9) {\r
1180                         region = name.substr(7, 2);\r
1181                 \r
1182                         string xyNum = name.substr(9);\r
1183                         unsigned int myXy = m->fromBase36(xyNum);\r
1184                         int x = myXy >> 12;\r
1185                         int y = myXy & 4095;\r
1186                 \r
1187                         xy = toString(x) + "_" + toString(y);\r
1188                 }\r
1189                 \r
1190                 return 0;\r
1191         }\r
1192         catch(exception& e) {\r
1193                 m->errorOut(e, "SffInfoCommand", "decodeName");\r
1194                 exit(1);\r
1195         }\r
1196 }\r
1197 //**********************************************************************************************************************\r
1198 int SffInfoCommand::printCommonHeader(ofstream& out, CommonHeader& header) {\r
1199         try {\r
1200         \r
1201                 out << "Common Header:\nMagic Number: " << header.magicNumber << endl;\r
1202                 out << "Version: " << header.version << endl;\r
1203                 out << "Index Offset: " << header.indexOffset << endl;\r
1204                 out << "Index Length: " << header.indexLength << endl;\r
1205                 out << "Number of Reads: " << header.numReads << endl;\r
1206                 out << "Header Length: " << header.headerLength << endl;\r
1207                 out << "Key Length: " << header.keyLength << endl;\r
1208                 out << "Number of Flows: " << header.numFlowsPerRead << endl;\r
1209                 out << "Format Code: " << header.flogramFormatCode << endl;\r
1210                 out << "Flow Chars: " << header.flowChars << endl;\r
1211                 out << "Key Sequence: " << header.keySequence << endl << endl;\r
1212                         \r
1213                 return 0;\r
1214         }\r
1215         catch(exception& e) {\r
1216                 m->errorOut(e, "SffInfoCommand", "printCommonHeader");\r
1217                 exit(1);\r
1218         }\r
1219 }\r
1220 //**********************************************************************************************************************\r
1221 int SffInfoCommand::printHeader(ofstream& out, Header& header) {\r
1222         try {\r
1223                 \r
1224                 out << ">" << header.name << endl;\r
1225                 out << "Run Prefix: " << header.timestamp << endl;\r
1226                 out << "Region #:  " << header.region << endl;\r
1227                 out << "XY Location: " << header.xy << endl << endl;\r
1228                 \r
1229                 out << "Run Name:  " << endl;\r
1230                 out << "Analysis Name:  " << endl;\r
1231                 out << "Full Path: " << endl << endl;\r
1232                 \r
1233                 out << "Read Header Len: " << header.headerLength << endl;\r
1234                 out << "Name Length: " << header.nameLength << endl;\r
1235                 out << "# of Bases: " << header.numBases << endl;\r
1236                 out << "Clip Qual Left: " << header.clipQualLeft << endl;\r
1237                 out << "Clip Qual Right: " << header.clipQualRight << endl;\r
1238                 out << "Clip Adap Left: " << header.clipAdapterLeft << endl;\r
1239                 out << "Clip Adap Right: " << header.clipAdapterRight << endl << endl;\r
1240                 \r
1241                 return 0;\r
1242         }\r
1243         catch(exception& e) {\r
1244                 m->errorOut(e, "SffInfoCommand", "printHeader");\r
1245                 exit(1);\r
1246         }\r
1247 }\r
1248 //**********************************************************************************************************************\r
1249 bool SffInfoCommand::sanityCheck(Header& header, seqRead& read) {\r
1250         try {\r
1251         bool okay = true;\r
1252         string message = "[WARNING]: Your sff file may be corrupted! Sequence: " + header.name + "\n";\r
1253         \r
1254         if (header.clipQualLeft > read.bases.length()) {\r
1255             okay = false; message += "Clip Qual Left = " + toString(header.clipQualLeft) + ", but we only read " + toString(read.bases.length()) + " bases.\n";\r
1256         }\r
1257         if (header.clipQualRight > read.bases.length()) {\r
1258             okay = false; message += "Clip Qual Right = " + toString(header.clipQualRight) + ", but we only read " + toString(read.bases.length()) + " bases.\n";\r
1259         }\r
1260         if (header.clipQualLeft > read.qualScores.size()) {\r
1261             okay = false; message += "Clip Qual Left = " + toString(header.clipQualLeft) + ", but we only read " + toString(read.qualScores.size()) + " quality scores.\n";\r
1262         }\r
1263         if (header.clipQualRight > read.qualScores.size()) {\r
1264             okay = false; message += "Clip Qual Right = " + toString(header.clipQualRight) + ", but we only read " + toString(read.qualScores.size()) + " quality scores.\n";\r
1265         }\r
1266         \r
1267         if (okay == false) {\r
1268             m->mothurOut(message); m->mothurOutEndLine();\r
1269         }\r
1270         \r
1271                 return okay;\r
1272         }\r
1273         catch(exception& e) {\r
1274                 m->errorOut(e, "SffInfoCommand", "sanityCheck");\r
1275                 exit(1);\r
1276         }\r
1277 }\r
1278 //**********************************************************************************************************************\r
1279 int SffInfoCommand::printSffTxtSeqData(ofstream& out, seqRead& read, Header& header) {\r
1280         try {\r
1281                 out << "Flowgram: ";\r
1282                 for (int i = 0; i < read.flowgram.size(); i++) { out << setprecision(2) << (read.flowgram[i]/(float)100) << '\t';  }\r
1283                 \r
1284                 out << endl <<  "Flow Indexes: ";\r
1285                 int sum = 0;\r
1286                 for (int i = 0; i < read.flowIndex.size(); i++) {  sum +=  read.flowIndex[i];  out << sum << '\t'; }\r
1287                 \r
1288                 //make the bases you want to clip lowercase and the bases you want to keep upper case\r
1289         int endValue = header.clipQualRight;\r
1290                 if(endValue == 0){      endValue = read.bases.length(); }\r
1291                 for (int i = 0; i < (header.clipQualLeft-1); i++) { read.bases[i] = tolower(read.bases[i]); }\r
1292                 for (int i = (header.clipQualLeft-1); i < (endValue-1); i++) {   read.bases[i] = toupper(read.bases[i]);  }\r
1293                 for (int i = (endValue-1); i < read.bases.length(); i++) {   read.bases[i] = tolower(read.bases[i]);  }\r
1294                 \r
1295                 out << endl <<  "Bases: " << read.bases << endl << "Quality Scores: ";\r
1296                 for (int i = 0; i < read.qualScores.size(); i++) {   out << read.qualScores[i] << '\t';  }\r
1297         \r
1298                 \r
1299                 out << endl << endl;\r
1300                 \r
1301                 return 0;\r
1302         }\r
1303         catch(exception& e) {\r
1304                 m->errorOut(e, "SffInfoCommand", "printSffTxtSeqData");\r
1305                 exit(1);\r
1306         }\r
1307 }\r
1308 //**********************************************************************************************************************\r
1309 int SffInfoCommand::printFastaSeqData(ofstream& out, seqRead& read, Header& header) {\r
1310         try {\r
1311                 string seq = read.bases;\r
1312                 \r
1313         if (trim) {\r
1314                         if(header.clipQualRight < header.clipQualLeft){\r
1315                                 if (header.clipQualRight == 0) { //don't trim right\r
1316                     seq = seq.substr(header.clipQualLeft-1);\r
1317                 }else {\r
1318                     seq = "NNNN";\r
1319                 }\r
1320                         }\r
1321                         else if((header.clipQualRight != 0) && ((header.clipQualRight-header.clipQualLeft) >= 0)){\r
1322                                 seq = seq.substr((header.clipQualLeft-1), (header.clipQualRight-header.clipQualLeft));\r
1323                         }\r
1324                         else {\r
1325                                 seq = seq.substr(header.clipQualLeft-1);\r
1326                         }\r
1327                 }else{\r
1328                         //if you wanted the sfftxt then you already converted the bases to the right case\r
1329                         if (!sfftxt) {\r
1330                 int endValue = header.clipQualRight;\r
1331                                 //make the bases you want to clip lowercase and the bases you want to keep upper case\r
1332                                 if(endValue == 0){      endValue = seq.length();        }\r
1333                                 for (int i = 0; i < (header.clipQualLeft-1); i++) { seq[i] = tolower(seq[i]);  }\r
1334                                 for (int i = (header.clipQualLeft-1); i < (endValue-1); i++)  {   seq[i] = toupper(seq[i]);  }\r
1335                                 for (int i = (endValue-1); i < seq.length(); i++) {   seq[i] = tolower(seq[i]);  }\r
1336                         }\r
1337                 }\r
1338                 \r
1339                 out << ">" << header.name  << " xy=" << header.xy << endl;\r
1340                 out << seq << endl;\r
1341                 \r
1342                 return 0;\r
1343         }\r
1344         catch(exception& e) {\r
1345                 m->errorOut(e, "SffInfoCommand", "printFastaSeqData");\r
1346                 exit(1);\r
1347         }\r
1348 }\r
1349 \r
1350 //**********************************************************************************************************************\r
1351 int SffInfoCommand::printQualSeqData(ofstream& out, seqRead& read, Header& header) {\r
1352         try {\r
1353                 \r
1354                 if (trim) {\r
1355                         if(header.clipQualRight < header.clipQualLeft){\r
1356                 if (header.clipQualRight == 0) { //don't trim right\r
1357                     out << ">" << header.name << " xy=" << header.xy << " length=" << (read.qualScores.size()-header.clipQualLeft) << endl;\r
1358                     for (int i = (header.clipQualLeft-1); i < read.qualScores.size(); i++) {   out << read.qualScores[i] << '\t';       }       \r
1359                 }else {\r
1360                     out << ">" << header.name << " xy=" << header.xy << endl;\r
1361                     out << "0\t0\t0\t0";\r
1362                 }\r
1363                         }\r
1364                         else if((header.clipQualRight != 0) && ((header.clipQualRight-header.clipQualLeft) >= 0)){\r
1365                                 out << ">" << header.name << " xy=" << header.xy << " length=" << (header.clipQualRight-header.clipQualLeft) << endl;\r
1366                                 for (int i = (header.clipQualLeft-1); i < (header.clipQualRight-1); i++) {   out << read.qualScores[i] << '\t'; }\r
1367                         }\r
1368                         else{\r
1369                                 out << ">" << header.name << " xy=" << header.xy << " length=" << (header.clipQualRight-header.clipQualLeft) << endl;\r
1370                                 for (int i = (header.clipQualLeft-1); i < read.qualScores.size(); i++) {   out << read.qualScores[i] << '\t';   }                       \r
1371                         }\r
1372                 }else{\r
1373                         out << ">" << header.name << " xy=" << header.xy << " length=" << read.qualScores.size() << endl;\r
1374                         for (int i = 0; i < read.qualScores.size(); i++) {   out << read.qualScores[i] << '\t';  }\r
1375                 }\r
1376                 \r
1377                 out << endl;\r
1378                 \r
1379                 return 0;\r
1380         }\r
1381         catch(exception& e) {\r
1382                 m->errorOut(e, "SffInfoCommand", "printQualSeqData");\r
1383                 exit(1);\r
1384         }\r
1385 }\r
1386 \r
1387 //**********************************************************************************************************************\r
1388 int SffInfoCommand::printFlowSeqData(ofstream& out, seqRead& read, Header& header) {\r
1389         try {\r
1390         \r
1391         int endValue = header.clipQualRight;\r
1392         if (header.clipQualRight == 0) {\r
1393             endValue = read.flowIndex.size();\r
1394             if (m->debug) { m->mothurOut("[DEBUG]: " + header.name + " has clipQualRight=0.\n"); }\r
1395         }\r
1396         if(endValue > header.clipQualLeft){\r
1397             \r
1398             int rightIndex = 0;\r
1399             for (int i = 0; i < endValue; i++) {  rightIndex +=  read.flowIndex[i];      }\r
1400             \r
1401             out << header.name << ' ' << rightIndex;\r
1402             for (int i = 0; i < read.flowgram.size(); i++) { out << setprecision(2) << ' ' << (read.flowgram[i]/(float)100);  }\r
1403             out << endl;\r
1404         }\r
1405                 \r
1406                 \r
1407                 return 0;\r
1408         }\r
1409         catch(exception& e) {\r
1410                 m->errorOut(e, "SffInfoCommand", "printFlowSeqData");\r
1411                 exit(1);\r
1412         }\r
1413 }\r
1414 //**********************************************************************************************************************\r
1415 int SffInfoCommand::readAccnosFile(string filename) {\r
1416         try {\r
1417                 //remove old names\r
1418                 seqNames.clear();\r
1419                 \r
1420                 ifstream in;\r
1421                 m->openInputFile(filename, in);\r
1422                 string name;\r
1423                 \r
1424                 while(!in.eof()){\r
1425                         in >> name; m->gobble(in);\r
1426                                                 \r
1427                         seqNames.insert(name);\r
1428                         \r
1429                         if (m->control_pressed) { seqNames.clear(); break; }\r
1430                 }\r
1431                 in.close();             \r
1432                 \r
1433                 return 0;\r
1434         }\r
1435         catch(exception& e) {\r
1436                 m->errorOut(e, "SffInfoCommand", "readAccnosFile");\r
1437                 exit(1);\r
1438         }\r
1439 }\r
1440 //**********************************************************************************************************************\r
1441 int SffInfoCommand::parseSffTxt() {\r
1442         try {\r
1443                 \r
1444                 ifstream inSFF;\r
1445                 m->openInputFile(sfftxtFilename, inSFF);\r
1446                 \r
1447                 if (outputDir == "") {  outputDir += m->hasPath(sfftxtFilename); }\r
1448                 \r
1449                 //output file names\r
1450                 ofstream outFasta, outQual, outFlow;\r
1451                 string outFastaFileName, outQualFileName;\r
1452                 string fileRoot = m->getRootName(m->getSimpleName(sfftxtFilename));\r
1453                 if (fileRoot.length() > 0) {\r
1454                         //rip off last .\r
1455                         fileRoot = fileRoot.substr(0, fileRoot.length()-1);\r
1456                         fileRoot = m->getRootName(fileRoot);\r
1457                 }\r
1458                 \r
1459         map<string, string> variables; \r
1460                 variables["[filename]"] = fileRoot;\r
1461                 string sfftxtFileName = getOutputFileName("sfftxt",variables);\r
1462                 string outFlowFileName = getOutputFileName("flow",variables);\r
1463                 if (!trim) { variables["[tag]"] = "raw"; }\r
1464                 outFastaFileName = getOutputFileName("fasta",variables);\r
1465         outQualFileName = getOutputFileName("qfile",variables);\r
1466                 \r
1467                 if (fasta)      { m->openOutputFile(outFastaFileName, outFasta);        outputNames.push_back(outFastaFileName); outputTypes["fasta"].push_back(outFastaFileName); }\r
1468                 if (qual)       { m->openOutputFile(outQualFileName, outQual);          outputNames.push_back(outQualFileName); outputTypes["qfile"].push_back(outQualFileName);  }\r
1469                 if (flow)       { m->openOutputFile(outFlowFileName, outFlow);          outputNames.push_back(outFlowFileName);  outFlow.setf(ios::fixed, ios::floatfield); outFlow.setf(ios::showpoint); outputTypes["flow"].push_back(outFlowFileName);  }\r
1470                 \r
1471                 //read common header\r
1472                 string commonHeader = m->getline(inSFF);\r
1473                 string magicNumber = m->getline(inSFF); \r
1474                 string version = m->getline(inSFF);\r
1475                 string indexOffset = m->getline(inSFF);\r
1476                 string indexLength = m->getline(inSFF);\r
1477                 int numReads = parseHeaderLineToInt(inSFF);\r
1478                 string headerLength = m->getline(inSFF);\r
1479                 string keyLength = m->getline(inSFF);\r
1480                 int numFlows = parseHeaderLineToInt(inSFF);\r
1481                 string flowgramCode = m->getline(inSFF);\r
1482                 string flowChars = m->getline(inSFF);\r
1483                 string keySequence = m->getline(inSFF);\r
1484                 m->gobble(inSFF);\r
1485                 \r
1486                 string seqName;\r
1487                 \r
1488                 if (flow)       {       outFlow << numFlows << endl;    }\r
1489                 \r
1490                 for(int i=0;i<numReads;i++){\r
1491                         \r
1492                         //sanity check\r
1493                         if (inSFF.eof()) { m->mothurOut("[ERROR]: Expected " + toString(numReads) + " but reached end of file at " + toString(i+1) + "."); m->mothurOutEndLine(); break; }\r
1494                         \r
1495                         Header header;\r
1496                         \r
1497                         //parse read header\r
1498                         inSFF >> seqName;\r
1499                         seqName = seqName.substr(1);\r
1500                         m->gobble(inSFF);\r
1501                         header.name = seqName;\r
1502                         \r
1503                         string runPrefix = parseHeaderLineToString(inSFF);              header.timestamp = runPrefix;\r
1504                         string regionNumber = parseHeaderLineToString(inSFF);   header.region = regionNumber;\r
1505                         string xyLocation = parseHeaderLineToString(inSFF);             header.xy = xyLocation;\r
1506                         m->gobble(inSFF);\r
1507                                 \r
1508                         string runName = parseHeaderLineToString(inSFF);\r
1509                         string analysisName = parseHeaderLineToString(inSFF);\r
1510                         string fullPath = parseHeaderLineToString(inSFF);\r
1511                         m->gobble(inSFF);\r
1512                         \r
1513                         string readHeaderLen = parseHeaderLineToString(inSFF);  convert(readHeaderLen, header.headerLength);\r
1514                         string nameLength = parseHeaderLineToString(inSFF);             convert(nameLength, header.nameLength);\r
1515                         int numBases = parseHeaderLineToInt(inSFF);                             header.numBases = numBases;\r
1516                         string clipQualLeft = parseHeaderLineToString(inSFF);   convert(clipQualLeft, header.clipQualLeft);\r
1517                         int clipQualRight = parseHeaderLineToInt(inSFF);                header.clipQualRight = clipQualRight;\r
1518                         string clipAdapLeft = parseHeaderLineToString(inSFF);   convert(clipAdapLeft, header.clipAdapterLeft);\r
1519                         string clipAdapRight = parseHeaderLineToString(inSFF);  convert(clipAdapRight, header.clipAdapterRight);\r
1520                         m->gobble(inSFF);\r
1521                                 \r
1522                         seqRead read;\r
1523                         \r
1524                         //parse read\r
1525                         vector<unsigned short> flowVector = parseHeaderLineToFloatVector(inSFF, numFlows);      read.flowgram = flowVector;\r
1526                         vector<unsigned int> flowIndices = parseHeaderLineToIntVector(inSFF, numBases); \r
1527                         \r
1528                         //adjust for print\r
1529                         vector<unsigned int> flowIndicesAdjusted; flowIndicesAdjusted.push_back(flowIndices[0]);\r
1530                         for (int j = 1; j < flowIndices.size(); j++) {   flowIndicesAdjusted.push_back(flowIndices[j] - flowIndices[j-1]);   }\r
1531                         read.flowIndex = flowIndicesAdjusted;\r
1532                         \r
1533                         string bases = parseHeaderLineToString(inSFF);                                                                          read.bases = bases;\r
1534                         vector<unsigned int> qualityScores = parseHeaderLineToIntVector(inSFF, numBases);       read.qualScores = qualityScores;\r
1535                         m->gobble(inSFF);\r
1536                                         \r
1537                         //if you have provided an accosfile and this seq is not in it, then dont print\r
1538                         bool print = true;\r
1539                         if (seqNames.size() != 0) {   if (seqNames.count(header.name) == 0) { print = false; }  }\r
1540                         \r
1541                         //print \r
1542                         if (print) {\r
1543                                 if (fasta)      {       printFastaSeqData(outFasta, read, header);      }\r
1544                                 if (qual)       {       printQualSeqData(outQual, read, header);        }\r
1545                                 if (flow)       {       printFlowSeqData(outFlow, read, header);        }\r
1546                         }\r
1547                         \r
1548                         //report progress\r
1549                         if((i+1) % 10000 == 0){ m->mothurOut(toString(i+1)); m->mothurOutEndLine();             }\r
1550                         \r
1551                         if (m->control_pressed) {  break;  }\r
1552                 }\r
1553                 \r
1554                 //report progress\r
1555                 if (!m->control_pressed) {   if((numReads) % 10000 != 0){       m->mothurOut(toString(numReads)); m->mothurOutEndLine();                }  }\r
1556                 \r
1557                 inSFF.close();\r
1558                 \r
1559                 if (fasta)      {  outFasta.close();    }\r
1560                 if (qual)       {  outQual.close();             }\r
1561                 if (flow)       {  outFlow.close();             }\r
1562                 \r
1563                 return 0;\r
1564         }\r
1565         catch(exception& e) {\r
1566                 m->errorOut(e, "SffInfoCommand", "parseSffTxt");\r
1567                 exit(1);\r
1568         }\r
1569 }\r
1570 //**********************************************************************************************************************\r
1571 \r
1572 int SffInfoCommand::parseHeaderLineToInt(ifstream& file){\r
1573         try {\r
1574                 int number;\r
1575                 \r
1576                 while (!file.eof())     {\r
1577                         \r
1578                         char c = file.get(); \r
1579                         if (c == ':'){\r
1580                                 file >> number;\r
1581                                 break;\r
1582                         }\r
1583                         \r
1584                 }\r
1585                 m->gobble(file);\r
1586                 return number;\r
1587         }\r
1588         catch(exception& e) {\r
1589                 m->errorOut(e, "SffInfoCommand", "parseHeaderLineToInt");\r
1590                 exit(1);\r
1591         }\r
1592         \r
1593 }\r
1594 \r
1595 //**********************************************************************************************************************\r
1596 \r
1597 string SffInfoCommand::parseHeaderLineToString(ifstream& file){\r
1598         try {\r
1599                 string text;\r
1600                 \r
1601                 while (!file.eof())     {\r
1602                         char c = file.get(); \r
1603                         \r
1604                         if (c == ':'){\r
1605                                 //m->gobble(file);\r
1606                                 //text = m->getline(file);      \r
1607                                 file >> text;\r
1608                                 break;\r
1609                         }\r
1610                 }\r
1611                 m->gobble(file);\r
1612                 \r
1613                 return text;\r
1614         }\r
1615         catch(exception& e) {\r
1616                 m->errorOut(e, "SffInfoCommand", "parseHeaderLineToString");\r
1617                 exit(1);\r
1618         }\r
1619 }\r
1620 \r
1621 //**********************************************************************************************************************\r
1622 \r
1623 vector<unsigned short> SffInfoCommand::parseHeaderLineToFloatVector(ifstream& file, int length){\r
1624         try {\r
1625                 vector<unsigned short> floatVector(length);\r
1626                 \r
1627                 while (!file.eof())     {\r
1628                         char c = file.get(); \r
1629                         if (c == ':'){\r
1630                                 float temp;\r
1631                                 for(int i=0;i<length;i++){\r
1632                                         file >> temp;\r
1633                                         floatVector[i] = temp * 100;\r
1634                                 }\r
1635                                 break;\r
1636                         }\r
1637                 }\r
1638                 m->gobble(file);        \r
1639                 return floatVector;\r
1640         }\r
1641         catch(exception& e) {\r
1642                 m->errorOut(e, "SffInfoCommand", "parseHeaderLineToFloatVector");\r
1643                 exit(1);\r
1644         }\r
1645 }\r
1646 \r
1647 //**********************************************************************************************************************\r
1648 \r
1649 vector<unsigned int> SffInfoCommand::parseHeaderLineToIntVector(ifstream& file, int length){\r
1650         try {\r
1651                 vector<unsigned int> intVector(length);\r
1652                 \r
1653                 while (!file.eof())     {\r
1654                         char c = file.get(); \r
1655                         if (c == ':'){\r
1656                                 for(int i=0;i<length;i++){\r
1657                                         file >> intVector[i];\r
1658                                 }\r
1659                                 break;\r
1660                         }\r
1661                 }\r
1662                 m->gobble(file);        \r
1663                 return intVector;\r
1664         }\r
1665         catch(exception& e) {\r
1666                 m->errorOut(e, "SffInfoCommand", "parseHeaderLineToIntVector");\r
1667                 exit(1);\r
1668         }\r
1669 }\r
1670 //***************************************************************************************************************\r
1671 \r
1672 bool SffInfoCommand::readOligos(string oligoFile){\r
1673         try {\r
1674         filehandles.clear();\r
1675         numSplitReads.clear();\r
1676         filehandlesHeaders.clear();\r
1677         \r
1678                 ifstream inOligos;\r
1679                 m->openInputFile(oligoFile, inOligos);\r
1680                 \r
1681                 string type, oligo, group;\r
1682         \r
1683                 int indexPrimer = 0;\r
1684                 int indexBarcode = 0;\r
1685                 \r
1686                 while(!inOligos.eof()){\r
1687             \r
1688                         inOligos >> type; \r
1689             \r
1690                         if(type[0] == '#'){\r
1691                                 while (!inOligos.eof()) {       char c = inOligos.get();  if (c == 10 || c == 13){      break;  }       } // get rest of line if there's any crap there\r
1692                                 m->gobble(inOligos);\r
1693                         }\r
1694                         else{\r
1695                                 m->gobble(inOligos);\r
1696                                 //make type case insensitive\r
1697                                 for(int i=0;i<type.length();i++){       type[i] = toupper(type[i]);  }\r
1698                                 \r
1699                                 inOligos >> oligo;\r
1700                                 \r
1701                                 for(int i=0;i<oligo.length();i++){\r
1702                                         oligo[i] = toupper(oligo[i]);\r
1703                                         if(oligo[i] == 'U')     {       oligo[i] = 'T'; }\r
1704                                 }\r
1705                                 \r
1706                                 if(type == "FORWARD"){\r
1707                                         group = "";\r
1708                                         \r
1709                                         // get rest of line in case there is a primer name\r
1710                                         while (!inOligos.eof()) {       \r
1711                                                 char c = inOligos.get(); \r
1712                                                 if (c == 10 || c == 13 || c == -1){     break;  }\r
1713                                                 else if (c == 32 || c == 9){;} //space or tab\r
1714                                                 else {  group += c;  }\r
1715                                         } \r
1716                                         \r
1717                                         //check for repeat barcodes\r
1718                                         map<string, int>::iterator itPrime = primers.find(oligo);\r
1719                                         if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }\r
1720                                         \r
1721                                         primers[oligo]=indexPrimer; indexPrimer++;              \r
1722                                         primerNameVector.push_back(group);\r
1723                                 }else if(type == "REVERSE"){\r
1724                                         //Sequence oligoRC("reverse", oligo);\r
1725                                         //oligoRC.reverseComplement();\r
1726                     string oligoRC = reverseOligo(oligo);\r
1727                                         revPrimer.push_back(oligoRC);\r
1728                                 }\r
1729                                 else if(type == "BARCODE"){\r
1730                                         inOligos >> group;\r
1731                                         \r
1732                                         //check for repeat barcodes\r
1733                                         map<string, int>::iterator itBar = barcodes.find(oligo);\r
1734                                         if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }\r
1735                     \r
1736                                         barcodes[oligo]=indexBarcode; indexBarcode++;\r
1737                                         barcodeNameVector.push_back(group);\r
1738                                 }else if(type == "LINKER"){\r
1739                                         linker.push_back(oligo);\r
1740                                 }else if(type == "SPACER"){\r
1741                                         spacer.push_back(oligo);\r
1742                                 }\r
1743                                 else{   m->mothurOut("[WARNING]: " + type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); }\r
1744                         }\r
1745                         m->gobble(inOligos);\r
1746                 }       \r
1747                 inOligos.close();\r
1748                 \r
1749                 if(barcodeNameVector.size() == 0 && primerNameVector[0] == ""){ split = 1;      }\r
1750                 \r
1751                 //add in potential combos\r
1752                 if(barcodeNameVector.size() == 0){\r
1753                         barcodes[""] = 0;\r
1754                         barcodeNameVector.push_back("");                        \r
1755                 }\r
1756                 \r
1757                 if(primerNameVector.size() == 0){\r
1758                         primers[""] = 0;\r
1759                         primerNameVector.push_back("");                 \r
1760                 }\r
1761                 \r
1762                 filehandles.resize(barcodeNameVector.size());\r
1763         for (int i = 0; i < filehandles.size(); i++) {\r
1764             for (int j = 0; j < primerNameVector.size(); j++) {\r
1765                 ofstream* temp;\r
1766                 map<string, ofstream*> myMap; myMap[""] = temp;\r
1767                 filehandles[i].push_back(myMap);\r
1768             }\r
1769         }\r
1770                         \r
1771                 if(split > 1){\r
1772                         set<string> uniqueNames; //used to cleanup outputFileNames\r
1773                         for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){\r
1774                                 for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){\r
1775                                         \r
1776                                         string primerName = primerNameVector[itPrimer->second];\r
1777                                         string barcodeName = barcodeNameVector[itBar->second];\r
1778                                         \r
1779                                         string comboGroupName = "";\r
1780                                         string fastaFileName = "";\r
1781                                         string qualFileName = "";\r
1782                                         string nameFileName = "";\r
1783                                         \r
1784                                         if(primerName == ""){\r
1785                                                 comboGroupName = barcodeNameVector[itBar->second];\r
1786                                         }\r
1787                                         else{\r
1788                                                 if(barcodeName == ""){\r
1789                                                         comboGroupName = primerNameVector[itPrimer->second];\r
1790                                                 }\r
1791                                                 else{\r
1792                                                         comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second];\r
1793                                                 }\r
1794                                         }\r
1795                                         \r
1796                                         ofstream* temp = new ofstream;\r
1797                     map<string, string> variables; \r
1798                     variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(currentFileName));\r
1799                     variables["[group]"] = comboGroupName;\r
1800                                         string thisFilename = getOutputFileName("sff",variables);\r
1801                                         if (uniqueNames.count(thisFilename) == 0) {\r
1802                                                 outputNames.push_back(thisFilename);\r
1803                                                 outputTypes["sff"].push_back(thisFilename);\r
1804                                                 uniqueNames.insert(thisFilename);\r
1805                                         }\r
1806                                         \r
1807                     map<string, ofstream*> myMap; myMap[thisFilename] = temp;\r
1808                     m->openOutputFileBinary(thisFilename, *(temp));\r
1809                                         filehandles[itBar->second][itPrimer->second] = myMap;\r
1810                     map<string, ofstream*>::iterator itOfstream = filehandles[itBar->second][itPrimer->second].find("");\r
1811                     if (itOfstream != filehandles[itBar->second][itPrimer->second].end()) { filehandles[itBar->second][itPrimer->second].erase(itOfstream); } //remove blank entry so we dont mess with .begin() above. code above assumes only 1 file name in the map\r
1812                                 }\r
1813                         }\r
1814                 }\r
1815                 numFPrimers = primers.size();\r
1816         numLinkers = linker.size();\r
1817         numSpacers = spacer.size();\r
1818         map<string, string> variables; \r
1819         variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(currentFileName));\r
1820         variables["[group]"] = "scrap";\r
1821                 noMatchFile = getOutputFileName("sff",variables);\r
1822         m->mothurRemove(noMatchFile);\r
1823         numNoMatch = 0;\r
1824         \r
1825                 bool allBlank = true;\r
1826                 for (int i = 0; i < barcodeNameVector.size(); i++) {\r
1827                         if (barcodeNameVector[i] != "") {\r
1828                                 allBlank = false;\r
1829                                 break;\r
1830                         }\r
1831                 }\r
1832                 for (int i = 0; i < primerNameVector.size(); i++) {\r
1833                         if (primerNameVector[i] != "") {\r
1834                                 allBlank = false;\r
1835                                 break;\r
1836                         }\r
1837                 }\r
1838                 \r
1839         filehandlesHeaders.resize(filehandles.size());\r
1840         numSplitReads.resize(filehandles.size());\r
1841         for (int i = 0; i < filehandles.size(); i++) { \r
1842             numSplitReads[i].resize(filehandles[i].size(), 0); \r
1843             for (int j = 0; j < filehandles[i].size(); j++) {\r
1844                 ofstream* temp = new ofstream;\r
1845                 map<string, ofstream* > myMap;\r
1846                 string thisHeader = (filehandles[i][j].begin())->first+"headers";\r
1847                 myMap[thisHeader] = temp;\r
1848                 m->openOutputFileBinary(thisHeader, *(temp));\r
1849                 filehandlesHeaders[i].push_back(myMap);\r
1850             }\r
1851         }\r
1852         \r
1853                 if (allBlank) {\r
1854                         m->mothurOut("[WARNING]: your oligos file does not contain any group names.  mothur will not create a split the sff file."); m->mothurOutEndLine();\r
1855                         split = 1;\r
1856                         return false;\r
1857                 }\r
1858                 \r
1859                 return true;\r
1860                 \r
1861         }\r
1862         catch(exception& e) {\r
1863                 m->errorOut(e, "SffInfoCommand", "readOligos");\r
1864                 exit(1);\r
1865         }\r
1866 }\r
1867 //********************************************************************/\r
1868 string SffInfoCommand::reverseOligo(string oligo){\r
1869         try {\r
1870         string reverse = "";\r
1871         \r
1872         for(int i=oligo.length()-1;i>=0;i--){\r
1873             \r
1874             if(oligo[i] == 'A')         {       reverse += 'T'; }\r
1875             else if(oligo[i] == 'T'){   reverse += 'A'; }\r
1876             else if(oligo[i] == 'U'){   reverse += 'A'; }\r
1877             \r
1878             else if(oligo[i] == 'G'){   reverse += 'C'; }\r
1879             else if(oligo[i] == 'C'){   reverse += 'G'; }\r
1880             \r
1881             else if(oligo[i] == 'R'){   reverse += 'Y'; }\r
1882             else if(oligo[i] == 'Y'){   reverse += 'R'; }\r
1883             \r
1884             else if(oligo[i] == 'M'){   reverse += 'K'; }\r
1885             else if(oligo[i] == 'K'){   reverse += 'M'; }\r
1886             \r
1887             else if(oligo[i] == 'W'){   reverse += 'W'; }\r
1888             else if(oligo[i] == 'S'){   reverse += 'S'; }\r
1889             \r
1890             else if(oligo[i] == 'B'){   reverse += 'V'; }\r
1891             else if(oligo[i] == 'V'){   reverse += 'B'; }\r
1892             \r
1893             else if(oligo[i] == 'D'){   reverse += 'H'; }\r
1894             else if(oligo[i] == 'H'){   reverse += 'D'; }\r
1895             \r
1896             else                                                {       reverse += 'N'; }\r
1897         }\r
1898         \r
1899         \r
1900         return reverse;\r
1901     }\r
1902         catch(exception& e) {\r
1903                 m->errorOut(e, "SffInfoCommand", "reverseOligo");\r
1904                 exit(1);\r
1905         }\r
1906 }\r
1907 \r
1908 //**********************************************************************************************************************\r
1909 \r
1910 \r
1911                                 \r
1912                                 \r