]> git.donarmstrong.com Git - mothur.git/blob - mergesfffilecommand.cpp
fixes while testing 1.33.0
[mothur.git] / mergesfffilecommand.cpp
1 //
2 //  mergesfffilecommand.cpp
3 //  Mothur
4 //
5 //  Created by Sarah Westcott on 1/31/14.
6 //  Copyright (c) 2014 Schloss Lab. All rights reserved.
7 //
8
9 #include "mergesfffilecommand.h"
10 #include "endiannessmacros.h"
11
12 //**********************************************************************************************************************
13 vector<string> MergeSfffilesCommand::setParameters(){
14         try {
15                 CommandParameter psff("sff", "InputTypes", "", "", "sffFile", "sffFile", "none","sff",false,false); parameters.push_back(psff);
16         CommandParameter pfile("file", "InputTypes", "", "", "sffFile", "sffFile", "none","sff",false,false); parameters.push_back(pfile);
17                 CommandParameter poutput("output", "String", "", "", "", "", "","",false,true,true); parameters.push_back(poutput);
18         CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
19                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
20                 
21                 vector<string> myArray;
22                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
23                 return myArray;
24         }
25         catch(exception& e) {
26                 m->errorOut(e, "MergeSfffilesCommand", "setParameters");
27                 exit(1);
28         }
29 }
30 //**********************************************************************************************************************
31 string MergeSfffilesCommand::getHelpString(){
32         try {
33                 string helpString = "";
34                 helpString += "The merge.sfffiles command reads a sff file or a file containing a list of sff files and merges the individual files into a single sff file. \n";
35                 helpString += "The merge.sfffiles command parameters are sff, file and output. sff or file is required. \n";
36                 helpString += "The sff parameter allows you to enter the sff list of sff files separated by -'s.\n";
37                 helpString += "The file parameter allows you to provide a file containing a list of sff files to merge.  \n";
38         helpString += "The output parameter allows you to provide an output filename.  \n";
39                 helpString += "Example sffinfo(sff=mySffFile.sff-mySecond.sff).\n";
40                 helpString += "Note: No spaces between parameter labels (i.e. sff), '=' and parameters (i.e.yourSffFileName).\n";
41                 return helpString;
42         }
43         catch(exception& e) {
44                 m->errorOut(e, "MergeSfffilesCommand", "getHelpString");
45                 exit(1);
46         }
47 }
48
49 //**********************************************************************************************************************
50 string MergeSfffilesCommand::getOutputPattern(string type) {
51     try {
52         string pattern = "";
53         
54         if (type == "sff")            {   pattern =  "[filename],";   }
55         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
56         
57         return pattern;
58     }
59     catch(exception& e) {
60         m->errorOut(e, "MergeSfffilesCommand", "getOutputPattern");
61         exit(1);
62     }
63 }
64 //**********************************************************************************************************************
65 MergeSfffilesCommand::MergeSfffilesCommand(){
66         try {
67                 abort = true; calledHelp = true;
68                 setParameters();
69                 vector<string> tempOutNames;
70         outputTypes["sff"] = tempOutNames;
71         }
72         catch(exception& e) {
73                 m->errorOut(e, "MergeSfffilesCommand", "MergeSfffilesCommand");
74                 exit(1);
75         }
76 }
77 //**********************************************************************************************************************
78
79 MergeSfffilesCommand::MergeSfffilesCommand(string option)  {
80         try {
81                 abort = false; calledHelp = false;
82                 
83                 //allow user to run help
84                 if(option == "help") { help(); abort = true; calledHelp = true; }
85                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
86                 
87                 else {
88                         //valid paramters for this command
89                         vector<string> myArray = setParameters();
90                         
91                         OptionParser parser(option);
92                         map<string, string> parameters = parser.getParameters();
93             map<string,string>::iterator it;
94                         
95                         ValidParameters validParameter;
96                         //check to make sure all parameters are valid for command
97                         for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
98                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
99                         }
100                         
101                         //initialize outputTypes
102                         vector<string> tempOutNames;
103             outputTypes["sff"] = tempOutNames;
104                         
105                         //if the user changes the output directory command factory will send this info to us in the output parameter
106                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
107                         
108                         //if the user changes the input directory command factory will send this info to us in the output parameter
109                         string inputDir = validParameter.validFile(parameters, "inputdir", false);        if (inputDir == "not found"){ inputDir = "";          }
110             else {
111                 it = parameters.find("file");
112                                 //user has given a template file
113                                 if(it != parameters.end()){
114                                         string path = m->hasPath(it->second);
115                                         //if the user has not given a path then, add inputdir. else leave path alone.
116                                         if (path == "") {       parameters["file"] = inputDir + it->second;             }
117                                 }
118             }
119             
120                         sffFilename = validParameter.validFile(parameters, "sff", false);
121                         if (sffFilename == "not found") { sffFilename = "";  }
122                         else {
123                                 m->splitAtDash(sffFilename, filenames);
124                                 
125                                 //go through files and make sure they are good, if not, then disregard them
126                                 for (int i = 0; i < filenames.size(); i++) {
127                                         bool ignore = false;
128                                         if (filenames[i] == "current") {
129                                                 filenames[i] = m->getSFFFile();
130                                                 if (filenames[i] != "") {  m->mothurOut("Using " + filenames[i] + " as input file for the sff parameter where you had given current."); m->mothurOutEndLine(); }
131                                                 else {
132                                                         m->mothurOut("You have no current sfffile, ignoring current."); m->mothurOutEndLine(); ignore=true;
133                                                         //erase from file list
134                                                         filenames.erase(filenames.begin()+i);
135                                                         i--;
136                                                 }
137                                         }
138                                         
139                                         if (!ignore) {
140                                                 if (inputDir != "") {
141                                                         string path = m->hasPath(filenames[i]);
142                                                         //if the user has not given a path then, add inputdir. else leave path alone.
143                                                         if (path == "") {       filenames[i] = inputDir + filenames[i];         }
144                                                 }
145                         
146                                                 ifstream in;
147                                                 int ableToOpen = m->openInputFile(filenames[i], in, "noerror");
148                         
149                                                 //if you can't open it, try default location
150                                                 if (ableToOpen == 1) {
151                                                         if (m->getDefaultPath() != "") { //default path is set
152                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(filenames[i]);
153                                                                 m->mothurOut("Unable to open " + filenames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
154                                                                 ifstream in2;
155                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
156                                                                 in2.close();
157                                                                 filenames[i] = tryPath;
158                                                         }
159                                                 }
160                                                 
161                                                 //if you can't open it, try default location
162                                                 if (ableToOpen == 1) {
163                                                         if (m->getOutputDir() != "") { //default path is set
164                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(filenames[i]);
165                                                                 m->mothurOut("Unable to open " + filenames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
166                                                                 ifstream in2;
167                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
168                                                                 in2.close();
169                                                                 filenames[i] = tryPath;
170                                                         }
171                                                 }
172                                                 
173                                                 in.close();
174                                                 
175                                                 if (ableToOpen == 1) {
176                                                         m->mothurOut("Unable to open " + filenames[i] + ". It will be disregarded."); m->mothurOutEndLine();
177                                                         //erase from file list
178                                                         filenames.erase(filenames.begin()+i);
179                                                         i--;
180                                                 }else { m->setSFFFile(filenames[i]); }
181                                         }
182                                 }
183                         }
184                         
185                         file = validParameter.validFile(parameters, "file", true);
186                         if (file == "not open") {  abort = true; }
187                         else if (file == "not found") { file = "";  }
188             
189             if ((file == "") && (filenames.size() == 0)) {
190                 m->mothurOut("[ERROR]: no valid files."); m->mothurOutEndLine(); abort = true;
191             }
192             
193             if ((file != "") && (filenames.size() != 0)) { //both are given
194                 m->mothurOut("[ERROR]: cannot use file option and sff option at the same time, choose one."); m->mothurOutEndLine(); abort = true;
195             }
196             
197             outputFile = validParameter.validFile(parameters, "output", false);
198                         if (outputFile == "not found") { m->mothurOut("you must enter an output file name"); m->mothurOutEndLine();  abort=true;  }
199                         if (outputDir != "") { outputFile = outputDir + m->getSimpleName(outputFile);  }
200             
201                 }
202         }
203         catch(exception& e) {
204                 m->errorOut(e, "MergeSfffilesCommand", "MergeSfffilesCommand");
205                 exit(1);
206         }
207 }
208 //**********************************************************************************************************************
209 int MergeSfffilesCommand::execute(){
210         try {
211                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
212         
213         if (file != "") {
214             readFile();
215             if (outputDir == "") { outputDir = m->hasPath(file); }
216         }
217         ofstream out;
218         map<string, string> variables;
219         string thisOutputDir = outputDir;
220                 if (outputDir == "") {  thisOutputDir += m->hasPath(outputFile);  }
221         variables["[filename]"] = thisOutputDir + m->getSimpleName(outputFile);
222                 outputFile = getOutputFileName("sff",variables);
223         m->openOutputFile(outputFile, out);
224         outputNames.push_back(outputFile); outputTypes["sff"].push_back(outputFile);
225         outputFileHeader = outputFile + ".headers";
226         numTotalReads = 0;
227         
228                 for (int s = 0; s < filenames.size(); s++) {
229                         
230                         if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]);        } return 0; }
231                         
232                         int start = time(NULL);
233                         
234             filenames[s] = m->getFullPathName(filenames[s]);
235                         m->mothurOut("\nMerging info from " + filenames[s] + " ..." ); m->mothurOutEndLine();
236             
237                         int numReads = mergeSffInfo(filenames[s], out);
238             
239                         m->mothurOut("It took " + toString(time(NULL) - start) + " secs to merge " + toString(numReads) + ".\n");
240                 }
241         out.close();
242         
243         //create new common header and add to merged file
244         adjustCommonHeader();
245
246                 if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]);        } return 0; }
247                 
248                 //set sff file as new current sff file
249                 string current = "";
250                 itTypes = outputTypes.find("sff");
251                 if (itTypes != outputTypes.end()) {
252                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSFFFile(current); }
253                 }
254                 
255                 //report output filenames
256                 m->mothurOutEndLine();
257                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
258                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
259                 m->mothurOutEndLine();
260         
261                 return 0;
262         }
263         catch(exception& e) {
264                 m->errorOut(e, "MergeSfffilesCommand", "execute");
265                 exit(1);
266         }
267 }
268 //**********************************************************************************************************************
269 int MergeSfffilesCommand::mergeSffInfo(string input, ofstream& out){
270         try {
271                 currentFileName = input;
272         
273                 ifstream in;
274                 m->openInputFileBinary(input, in);
275                 
276                 CommonHeader header;
277                 readCommonHeader(in, header);
278         
279                 int count = 0;
280                 
281                 //check magic number and version
282                 if (header.magicNumber != 779314790) { m->mothurOut("Magic Number is not correct, not a valid .sff file"); m->mothurOutEndLine(); return count; }
283                 if (header.version != "0001") { m->mothurOut("Version is not supported, only support version 0001."); m->mothurOutEndLine(); return count; }
284                 
285         //save for adjustHeader sanity check
286         commonHeaders.push_back(header);
287         
288                 //read through the sff file
289                 while (!in.eof()) {
290             
291                         //read data
292                         seqRead read;  Header readheader;
293             readSeqData(in, read, header.numFlowsPerRead, readheader, out);
294             
295             bool okay = sanityCheck(readheader, read);
296             if (!okay) { break; }
297                         
298                         count++;
299             
300                         //report progress
301                         if((count+1) % 10000 == 0){     m->mothurOut(toString(count+1)); m->mothurOutEndLine();         }
302             
303                         if (m->control_pressed) { count = 0; break;   }
304                         
305                         if (count >= header.numReads) { break; }
306                 }
307                 
308                 //report progress
309                 if (!m->control_pressed) {   if((count) % 10000 != 0){  m->mothurOut(toString(count)); m->mothurOutEndLine();           }  }
310                 
311                 in.close();
312                 
313                 return count;
314         }
315         catch(exception& e) {
316                 m->errorOut(e, "MergeSfffilesCommand", "mergeSffInfo");
317                 exit(1);
318         }
319 }
320 //**********************************************************************************************************************
321 int MergeSfffilesCommand::readCommonHeader(ifstream& in, CommonHeader& header){
322         try {
323         
324                 if (!in.eof()) {
325             
326                         //read magic number
327                         char buffer[4];
328                         in.read(buffer, 4);
329                         header.magicNumber = be_int4(*(unsigned int *)(&buffer));
330             
331                         //read version
332                         char buffer9[4];
333                         in.read(buffer9, 4);
334                         header.version = "";
335                         for (int i = 0; i < 4; i++) {  header.version += toString((int)(buffer9[i]));  }
336             
337                         //read offset
338                         char buffer2 [8];
339                         in.read(buffer2, 8);
340                         header.indexOffset =  be_int8(*(unsigned long long *)(&buffer2));
341                         
342                         //read index length
343                         char buffer3 [4];
344                         in.read(buffer3, 4);
345                         header.indexLength =  be_int4(*(unsigned int *)(&buffer3));
346             
347                         //read num reads
348                         char buffer4 [4];
349                         in.read(buffer4, 4);
350                         header.numReads =  be_int4(*(unsigned int *)(&buffer4));
351             
352             if (m->debug) { m->mothurOut("[DEBUG]: numReads = " + toString(header.numReads) + "\n"); }
353             
354                         //read header length
355                         char buffer5 [2];
356                         in.read(buffer5, 2);
357                         header.headerLength =  be_int2(*(unsigned short *)(&buffer5));
358             
359                         //read key length
360                         char buffer6 [2];
361                         in.read(buffer6, 2);
362                         header.keyLength = be_int2(*(unsigned short *)(&buffer6));
363                         
364                         //read number of flow reads
365                         char buffer7 [2];
366                         in.read(buffer7, 2);
367                         header.numFlowsPerRead =  be_int2(*(unsigned short *)(&buffer7));
368             
369                         //read format code
370                         char buffer8 [1];
371                         in.read(buffer8, 1);
372                         header.flogramFormatCode = (int)(buffer8[0]);
373                         
374                         //read flow chars
375                         char* tempBuffer = new char[header.numFlowsPerRead];
376                         in.read(&(*tempBuffer), header.numFlowsPerRead);
377                         header.flowChars = tempBuffer;
378                         if (header.flowChars.length() > header.numFlowsPerRead) { header.flowChars = header.flowChars.substr(0, header.numFlowsPerRead);  }
379                         delete[] tempBuffer;
380                         
381                         //read key
382                         char* tempBuffer2 = new char[header.keyLength];
383                         in.read(&(*tempBuffer2), header.keyLength);
384                         header.keySequence = tempBuffer2;
385                         if (header.keySequence.length() > header.keyLength) { header.keySequence = header.keySequence.substr(0, header.keyLength);  }
386                         delete[] tempBuffer2;
387                         
388                         /* Pad to 8 chars */
389                         unsigned long long spotInFile = in.tellg();
390                         unsigned long long spot = (spotInFile + 7)& ~7;  // ~ inverts
391                         in.seekg(spot);
392             
393         }else{
394                         m->mothurOut("Error reading sff common header."); m->mothurOutEndLine();
395                 }
396         
397                 return 0;
398         
399         }
400         catch(exception& e) {
401                 m->errorOut(e, "MergeSfffilesCommand", "readCommonHeader");
402                 exit(1);
403         }
404 }
405 //**********************************************************************************************************************
406 int MergeSfffilesCommand::adjustCommonHeader(){
407         try {
408         //sanity check
409         bool okay = true;
410         if (commonHeaders.size() != 0) {
411             unsigned int magicN = commonHeaders[0].magicNumber;
412             string version = commonHeaders[0].version;
413             unsigned short headerLength = commonHeaders[0].headerLength;
414             unsigned short keyLength = commonHeaders[0].keyLength;
415             unsigned short numFlows = commonHeaders[0].numFlowsPerRead;
416             int flowCode = commonHeaders[0].flogramFormatCode;
417             string flowChars = commonHeaders[0].flowChars;
418             string keySeq = commonHeaders[0].keySequence;
419             
420             for (int i = 1; i < commonHeaders.size(); i++) {
421                 if (commonHeaders[i].magicNumber != magicN)             { okay = false;  m->mothurOut("[ERROR]: merge issue with common headers. Magic numbers do not match. " + filenames[0] + " magic number is " + toString(commonHeaders[0].magicNumber) + ", but " + filenames[i] + " magic number is " + toString(commonHeaders[i].magicNumber) + ".\n");  }
422                 if (commonHeaders[i].version != version)                { okay = false;   m->mothurOut("[ERROR]: merge issue with common headers. Versions do not match. " + filenames[0] + " version is " + commonHeaders[0].version + ", but " + filenames[i] + " version is " + commonHeaders[i].version + ".\n");     }
423                 if (commonHeaders[i].headerLength != headerLength)      { okay = false;    m->mothurOut("[ERROR]: merge issue with common headers. Header lengths do not match. " + filenames[0] + " header length is " + toString(commonHeaders[0].headerLength) + ", but " + filenames[i] + " header length is " + toString(commonHeaders[i].headerLength) + ".\n");    }
424                 if (commonHeaders[i].keyLength != keyLength)            { okay = false;  m->mothurOut("[ERROR]: merge issue with common headers. Key Lengths do not match. " + filenames[0] + " Key length is " + toString(commonHeaders[0].keyLength) + ", but " + filenames[i] + " key length is " + toString(commonHeaders[i].keyLength) + ".\n");    }
425                 if (commonHeaders[i].numFlowsPerRead != numFlows)       { okay = false;   m->mothurOut("[ERROR]: merge issue with common headers. Number of flows per read do not match. " + filenames[0] + " number of flows is " + toString(commonHeaders[0].numFlowsPerRead) + ", but " + filenames[i] + " number of flows is " + toString(commonHeaders[i].numFlowsPerRead) + ".\n");     }
426                 if (commonHeaders[i].flogramFormatCode != flowCode)     { okay = false;    m->mothurOut("[ERROR]: merge issue with common headers. Flow format codes do not match. " + filenames[0] + " Flow format code is " + toString(commonHeaders[0].flogramFormatCode) + ", but " + filenames[i] + " flow format code is " + toString(commonHeaders[i].flogramFormatCode) + ".\n");    }
427                 if (commonHeaders[i].flowChars != flowChars)            { okay = false;   m->mothurOut("[ERROR]: merge issue with common headers. Flow characters do not match. " + filenames[0] + " Flow characters are " + commonHeaders[0].flowChars + ", but " + filenames[i] + " flow characters are " + commonHeaders[i].flowChars + ".\n");    }
428                 if (commonHeaders[i].keySequence != keySeq)             { okay = false;    m->mothurOut("[ERROR]: merge issue with common headers. Key sequences do not match. " + filenames[0] + " Key sequence is " + commonHeaders[0].keySequence + ", but " + filenames[i] + " key sequence is " + commonHeaders[i].keySequence + ".\n");     }
429             }
430         }else { m->control_pressed = true; return 0; } //should never get here
431         
432         if (!okay) { m->control_pressed = true; return 0; }
433         
434         string endian = m->findEdianness();
435         char* mybuffer = new char[4];
436         ifstream in;
437         m->openInputFileBinary(currentFileName, in);
438         
439         //magic number
440         in.read(mybuffer,4);
441         ofstream out;
442         m->openOutputFileBinaryAppend(outputFileHeader, out);
443         out.write(mybuffer, in.gcount());
444         delete[] mybuffer;
445         
446         //version
447         mybuffer = new char[4];
448         in.read(mybuffer,4);
449         out.write(mybuffer, in.gcount());
450         delete[] mybuffer;
451         
452         //offset
453         mybuffer = new char[8];
454         in.read(mybuffer,8);
455         unsigned long long offset = 0;
456         char* thisbuffer = new char[8];
457         thisbuffer[0] = (offset >> 56) & 0xFF;
458         thisbuffer[1] = (offset >> 48) & 0xFF;
459         thisbuffer[2] = (offset >> 40) & 0xFF;
460         thisbuffer[3] = (offset >> 32) & 0xFF;
461         thisbuffer[4] = (offset >> 24) & 0xFF;
462         thisbuffer[5] = (offset >> 16) & 0xFF;
463         thisbuffer[6] = (offset >> 8) & 0xFF;
464         thisbuffer[7] = offset & 0xFF;
465         out.write(thisbuffer, 8);
466         delete[] thisbuffer;
467         delete[] mybuffer;
468         
469         //read index length
470                 mybuffer = new char[4];
471         in.read(mybuffer,4);
472         offset = 0;
473         char* thisbuffer2 = new char[4];
474         thisbuffer2[0] = (offset >> 24) & 0xFF;
475         thisbuffer2[1] = (offset >> 16) & 0xFF;
476         thisbuffer2[2] = (offset >> 8) & 0xFF;
477         thisbuffer2[3] = offset & 0xFF;
478         out.write(thisbuffer2, 4);
479         delete[] thisbuffer2;
480         delete[] mybuffer;
481                 
482         //change num reads
483         mybuffer = new char[4];
484         in.read(mybuffer,4);
485         delete[] mybuffer;
486         thisbuffer2 = new char[4];
487         if (endian == "BIG_ENDIAN") {
488             thisbuffer2[0] = (numTotalReads >> 24) & 0xFF;
489             thisbuffer2[1] = (numTotalReads >> 16) & 0xFF;
490             thisbuffer2[2] = (numTotalReads >> 8) & 0xFF;
491             thisbuffer2[3] = numTotalReads & 0xFF;
492         }else {
493             thisbuffer2[0] = numTotalReads & 0xFF;
494             thisbuffer2[1] = (numTotalReads >> 8) & 0xFF;
495             thisbuffer2[2] = (numTotalReads >> 16) & 0xFF;
496             thisbuffer2[3] = (numTotalReads >> 24) & 0xFF;
497         }
498         out.write(thisbuffer2, 4);
499         delete[] thisbuffer2;
500         
501         
502         //read header length
503         mybuffer = new char[2];
504         in.read(mybuffer,2);
505         out.write(mybuffer, in.gcount());
506         delete[] mybuffer;
507         
508         //read key length
509         mybuffer = new char[2];
510         in.read(mybuffer,2);
511         out.write(mybuffer, in.gcount());
512         delete[] mybuffer;
513         
514         //read number of flow reads
515         mybuffer = new char[2];
516         in.read(mybuffer,2);
517         out.write(mybuffer, in.gcount());
518         delete[] mybuffer;
519         
520         //read format code
521         mybuffer = new char[1];
522         in.read(mybuffer,1);
523         out.write(mybuffer, in.gcount());
524         delete[] mybuffer;
525         
526         //read flow chars
527         mybuffer = new char[commonHeaders[0].numFlowsPerRead];
528         in.read(mybuffer,commonHeaders[0].numFlowsPerRead);
529         out.write(mybuffer, in.gcount());
530         delete[] mybuffer;
531         
532         //read key
533         mybuffer = new char[commonHeaders[0].keyLength];
534         in.read(mybuffer,commonHeaders[0].keyLength);
535         out.write(mybuffer, in.gcount());
536         delete[] mybuffer;
537         
538         /* Pad to 8 chars */
539         unsigned long long spotInFile = in.tellg();
540         unsigned long long spot = (spotInFile + 7)& ~7;  // ~ inverts
541         in.seekg(spot);
542         
543         mybuffer = new char[spot-spotInFile];
544         out.write(mybuffer, spot-spotInFile);
545         delete[] mybuffer;
546         in.close();
547         out.close();
548         
549         m->appendBinaryFiles(outputFile, outputFileHeader);
550         m->renameFile(outputFileHeader, outputFile);
551         m->mothurRemove(outputFileHeader);
552         
553                 return 0;
554         
555         }
556         catch(exception& e) {
557                 m->errorOut(e, "MergeSfffilesCommand", "adjustCommonHeader");
558                 exit(1);
559         }
560 }
561 //**********************************************************************************************************************
562 bool MergeSfffilesCommand::readSeqData(ifstream& in, seqRead& read, int numFlowReads, Header& header, ofstream& out){
563         try {
564         unsigned long long startSpotInFile = in.tellg();
565                 if (!in.eof()) {
566             
567             /*****************************************/
568             //read header
569             
570             //read header length
571                         char buffer [2];
572                         in.read(buffer, 2);
573                         header.headerLength = be_int2(*(unsigned short *)(&buffer));
574             
575                         //read name length
576                         char buffer2 [2];
577                         in.read(buffer2, 2);
578                         header.nameLength = be_int2(*(unsigned short *)(&buffer2));
579             
580                         //read num bases
581                         char buffer3 [4];
582                         in.read(buffer3, 4);
583                         header.numBases =  be_int4(*(unsigned int *)(&buffer3));
584             
585                         
586                         //read clip qual left
587                         char buffer4 [2];
588                         in.read(buffer4, 2);
589                         header.clipQualLeft =  be_int2(*(unsigned short *)(&buffer4));
590                         header.clipQualLeft = 5;
591             
592                         
593                         //read clip qual right
594                         char buffer5 [2];
595                         in.read(buffer5, 2);
596                         header.clipQualRight =  be_int2(*(unsigned short *)(&buffer5));
597             
598             
599                         //read clipAdapterLeft
600                         char buffer6 [2];
601                         in.read(buffer6, 2);
602                         header.clipAdapterLeft = be_int2(*(unsigned short *)(&buffer6));
603             
604             
605                         //read clipAdapterRight
606                         char buffer7 [2];
607                         in.read(buffer7, 2);
608                         header.clipAdapterRight = be_int2(*(unsigned short *)(&buffer7));
609             
610             
611                         //read name
612                         char* tempBuffer = new char[header.nameLength];
613                         in.read(&(*tempBuffer), header.nameLength);
614                         header.name = tempBuffer;
615                         if (header.name.length() > header.nameLength) { header.name = header.name.substr(0, header.nameLength);  }
616             
617                         delete[] tempBuffer;
618                                                 
619                         /* Pad to 8 chars */
620                         unsigned long long spotInFile = in.tellg();
621                         unsigned long long spot = (spotInFile + 7)& ~7;
622                         in.seekg(spot);
623             
624             /*****************************************/
625             //sequence read
626             
627                         //read flowgram
628                         read.flowgram.resize(numFlowReads);
629                         for (int i = 0; i < numFlowReads; i++) {
630                                 char buffer [2];
631                                 in.read(buffer, 2);
632                                 read.flowgram[i] = be_int2(*(unsigned short *)(&buffer));
633                         }
634             
635                         //read flowIndex
636                         read.flowIndex.resize(header.numBases);
637                         for (int i = 0; i < header.numBases; i++) {
638                                 char temp[1];
639                                 in.read(temp, 1);
640                                 read.flowIndex[i] = be_int1(*(unsigned char *)(&temp));
641                         }
642             
643                         //read bases
644                         char* tempBuffer6 = new char[header.numBases];
645                         in.read(&(*tempBuffer6), header.numBases);
646                         read.bases = tempBuffer6;
647                         if (read.bases.length() > header.numBases) { read.bases = read.bases.substr(0, header.numBases);  }
648                         delete[] tempBuffer6;
649             
650                         //read qual scores
651                         read.qualScores.resize(header.numBases);
652                         for (int i = 0; i < header.numBases; i++) {
653                                 char temp[1];
654                                 in.read(temp, 1);
655                                 read.qualScores[i] = be_int1(*(unsigned char *)(&temp));
656                         }
657             
658                         /* Pad to 8 chars */
659                         spotInFile = in.tellg();
660                         spot = (spotInFile + 7)& ~7;
661                         in.seekg(spot);
662             
663             char * mybuffer;
664             mybuffer = new char [spot-startSpotInFile];
665             
666             ifstream in2;
667             m->openInputFileBinary(currentFileName, in2);
668             in2.seekg(startSpotInFile);
669             in2.read(mybuffer,spot-startSpotInFile);
670             
671             out.write(mybuffer, in2.gcount());
672             numTotalReads++;
673             
674             delete[] mybuffer;
675             in2.close();
676             
677                 }else{
678                         m->mothurOut("Error reading."); m->mothurOutEndLine();
679                 }
680         
681         if (in.eof()) {  return true; }
682         
683                 return false;
684         }
685         catch(exception& e) {
686                 m->errorOut(e, "MergeSfffilesCommand", "readSeqData");
687                 exit(1);
688         }
689 }
690 //**********************************************************************************************************************
691 bool MergeSfffilesCommand::sanityCheck(Header& header, seqRead& read) {
692         try {
693         bool okay = true;
694         string message = "[WARNING]: Your sff file may be corrupted! Sequence: " + header.name + "\n";
695         
696         if (header.clipQualLeft > read.bases.length()) {
697             okay = false; message += "Clip Qual Left = " + toString(header.clipQualLeft) + ", but we only read " + toString(read.bases.length()) + " bases.\n";
698         }
699         if (header.clipQualRight > read.bases.length()) {
700             okay = false; message += "Clip Qual Right = " + toString(header.clipQualRight) + ", but we only read " + toString(read.bases.length()) + " bases.\n";
701         }
702         if (header.clipQualLeft > read.qualScores.size()) {
703             okay = false; message += "Clip Qual Left = " + toString(header.clipQualLeft) + ", but we only read " + toString(read.qualScores.size()) + " quality scores.\n";
704         }
705         if (header.clipQualRight > read.qualScores.size()) {
706             okay = false; message += "Clip Qual Right = " + toString(header.clipQualRight) + ", but we only read " + toString(read.qualScores.size()) + " quality scores.\n";
707         }
708         
709         if (okay == false) {
710             m->mothurOut(message); m->mothurOutEndLine();
711         }
712         
713                 return okay;
714         }
715         catch(exception& e) {
716                 m->errorOut(e, "MergeSfffilesCommand", "sanityCheck");
717                 exit(1);
718         }
719 }
720 //**********************************************************************************************************************
721 int MergeSfffilesCommand::readFile(){
722         try {
723         
724         string filename;
725         
726         ifstream in;
727         m->openInputFile(file, in);
728         
729         while(!in.eof()) {
730             
731             if (m->control_pressed) { return 0; }
732             
733             in >> filename; m->gobble(in);
734             
735             if (m->debug) { m->mothurOut("[DEBUG]: filename = " + filename + ".\n"); }
736             
737             //check to make sure both are able to be opened
738             ifstream in2;
739             int openForward = m->openInputFile(filename, in2, "noerror");
740             
741             //if you can't open it, try default location
742             if (openForward == 1) {
743                 if (m->getDefaultPath() != "") { //default path is set
744                     string tryPath = m->getDefaultPath() + m->getSimpleName(filename);
745                     m->mothurOut("Unable to open " + filename + ". Trying default " + tryPath); m->mothurOutEndLine();
746                     ifstream in3;
747                     openForward = m->openInputFile(tryPath, in3, "noerror");
748                     in3.close();
749                     filename = tryPath;
750                 }
751             }
752             
753             //if you can't open it, try output location
754             if (openForward == 1) {
755                 if (m->getOutputDir() != "") { //default path is set
756                     string tryPath = m->getOutputDir() + m->getSimpleName(filename);
757                     m->mothurOut("Unable to open " + filename + ". Trying output directory " + tryPath); m->mothurOutEndLine();
758                     ifstream in4;
759                     openForward = m->openInputFile(tryPath, in4, "noerror");
760                     filename = tryPath;
761                     in4.close();
762                 }
763             }
764             
765             if (openForward == 1) { //can't find it
766                 m->mothurOut("[WARNING]: can't find " + filename + ", ignoring.\n");
767             }else{  filenames.push_back(filename); }
768             
769         }
770         in.close();
771         
772         return 0;
773     }
774     catch(exception& e) {
775         m->errorOut(e, "MergeSfffilesCommand", "readFileNames");
776         exit(1);
777     }
778 }
779 //**********************************************************************************************************************
780
781
782
783