]> git.donarmstrong.com Git - mothur.git/blob - makecontigscommand.cpp
Merge remote-tracking branch 'origin/master'
[mothur.git] / makecontigscommand.cpp
1 //
2 //  makecontigscommand.cpp
3 //  Mothur
4 //
5 //  Created by Sarah Westcott on 5/15/12.
6 //  Copyright (c) 2012 Schloss Lab. All rights reserved.
7 //
8
9 #include "makecontigscommand.h"
10
11 //**********************************************************************************************************************
12 vector<string> MakeContigsCommand::setParameters(){     
13         try {
14                 CommandParameter pfasta("ffastq", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
15         CommandParameter prfasta("rfastq", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(prfasta);
16                 CommandParameter palign("align", "Multiple", "needleman-gotoh", "needleman", "", "", "",false,false); parameters.push_back(palign);
17                 CommandParameter pmatch("match", "Number", "", "1.0", "", "", "",false,false); parameters.push_back(pmatch);
18                 CommandParameter pmismatch("mismatch", "Number", "", "-1.0", "", "", "",false,false); parameters.push_back(pmismatch);
19                 CommandParameter pgapopen("gapopen", "Number", "", "-2.0", "", "", "",false,false); parameters.push_back(pgapopen);
20                 CommandParameter pgapextend("gapextend", "Number", "", "-1.0", "", "", "",false,false); parameters.push_back(pgapextend);
21                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
22                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
23                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
24                 
25                 vector<string> myArray;
26                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
27                 return myArray;
28         }
29         catch(exception& e) {
30                 m->errorOut(e, "MakeContigsCommand", "setParameters");
31                 exit(1);
32         }
33 }
34 //**********************************************************************************************************************
35 string MakeContigsCommand::getHelpString(){     
36         try {
37                 string helpString = "";
38                 helpString += "The make.contigs command reads a forward fastq file and a reverse fastq file and outputs new fasta and quality files.\n";
39                 helpString += "The make.contigs command parameters are ffastq, rfastq, align, match, mismatch, gapopen, gapextend and processors.\n";
40                 helpString += "The ffastq and rfastq parameter is required.\n";
41                 helpString += "The align parameter allows you to specify the alignment method to use.  Your options are: gotoh, needleman, blast and noalign. The default is needleman.\n";
42                 helpString += "The match parameter allows you to specify the bonus for having the same base. The default is 1.0.\n";
43                 helpString += "The mistmatch parameter allows you to specify the penalty for having different bases.  The default is -1.0.\n";
44                 helpString += "The gapopen parameter allows you to specify the penalty for opening a gap in an alignment. The default is -2.0.\n";
45                 helpString += "The gapextend parameter allows you to specify the penalty for extending a gap in an alignment.  The default is -1.0.\n";
46         helpString += "The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n";
47         helpString += "The make.contigs command should be in the following format: \n";
48                 helpString += "make.contigs(ffastq=yourForwardFastqFile, rfastq=yourReverseFastqFile, align=yourAlignmentMethod) \n";
49                 helpString += "Note: No spaces between parameter labels (i.e. ffastq), '=' and parameters (i.e.yourForwardFastqFile).\n";
50                 return helpString;
51         }
52         catch(exception& e) {
53                 m->errorOut(e, "MakeContigsCommand", "getHelpString");
54                 exit(1);
55         }
56 }
57
58 //**********************************************************************************************************************
59 MakeContigsCommand::MakeContigsCommand(){       
60         try {
61                 abort = true; calledHelp = true; 
62                 setParameters();
63                 vector<string> tempOutNames;
64                 outputTypes["fasta"] = tempOutNames;
65                 outputTypes["qfile"] = tempOutNames;
66         outputTypes["mismatch"] = tempOutNames;
67         }
68         catch(exception& e) {
69                 m->errorOut(e, "MakeContigsCommand", "MakeContigsCommand");
70                 exit(1);
71         }
72 }
73 //**********************************************************************************************************************
74 MakeContigsCommand::MakeContigsCommand(string option)  {
75         try {
76                 abort = false; calledHelp = false;   
77         
78                 //allow user to run help
79                 if(option == "help") { help(); abort = true; calledHelp = true; }
80                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
81                 
82                 else {
83                         vector<string> myArray = setParameters();
84                         
85                         OptionParser parser(option);
86                         map<string, string> parameters = parser.getParameters(); 
87                         
88                         ValidParameters validParameter("pairwise.seqs");
89                         map<string, string>::iterator it;
90                         
91                         //check to make sure all parameters are valid for command
92                         for (it = parameters.begin(); it != parameters.end(); it++) { 
93                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
94                         }
95                         
96                         //initialize outputTypes
97                         vector<string> tempOutNames;
98                         outputTypes["fasta"] = tempOutNames;
99                         outputTypes["qfile"] = tempOutNames;
100             outputTypes["mismatch"] = tempOutNames;
101                         
102             
103                         //if the user changes the input directory command factory will send this info to us in the output parameter 
104                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
105                         if (inputDir == "not found"){   inputDir = "";          }
106                         else { 
107                                 string path;
108                 it = parameters.find("ffastq");
109                                 //user has given a template file
110                                 if(it != parameters.end()){ 
111                                         path = m->hasPath(it->second);
112                                         //if the user has not given a path then, add inputdir. else leave path alone.
113                                         if (path == "") {       parameters["ffastq"] = inputDir + it->second;           }
114                                 }
115                 
116                 it = parameters.find("rfastq");
117                                 //user has given a template file
118                                 if(it != parameters.end()){ 
119                                         path = m->hasPath(it->second);
120                                         //if the user has not given a path then, add inputdir. else leave path alone.
121                                         if (path == "") {       parameters["rfastq"] = inputDir + it->second;           }
122                                 }
123             }
124             
125             ffastqfile = validParameter.validFile(parameters, "ffastq", true);
126                         if (ffastqfile == "not open") { ffastqfile = ""; abort = true; }        
127                         else if (ffastqfile == "not found") { ffastqfile = ""; abort=true;  m->mothurOut("The ffastq parameter is required.\n"); }
128                         
129                         rfastqfile = validParameter.validFile(parameters, "rfastq", true);
130                         if (rfastqfile == "not open") { rfastqfile = ""; abort = true; }        
131                         else if (rfastqfile == "not found") { rfastqfile = ""; abort=true;  m->mothurOut("The rfastq parameter is required.\n"); }
132             
133             //if the user changes the output directory command factory will send this info to us in the output parameter 
134                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(ffastqfile);             }
135                         
136
137                         //check for optional parameter and set defaults
138                         // ...at some point should added some additional type checking...
139                         string temp;
140                         temp = validParameter.validFile(parameters, "match", false);            if (temp == "not found"){       temp = "1.0";                   }
141                         m->mothurConvert(temp, match);  
142                         
143                         temp = validParameter.validFile(parameters, "mismatch", false);         if (temp == "not found"){       temp = "-1.0";                  }
144                         m->mothurConvert(temp, misMatch);  
145             if (misMatch > 0) { m->mothurOut("[ERROR]: mismatch must be negative.\n"); abort=true; }
146                         
147                         temp = validParameter.validFile(parameters, "gapopen", false);          if (temp == "not found"){       temp = "-2.0";                  }
148                         m->mothurConvert(temp, gapOpen);  
149             if (gapOpen > 0) { m->mothurOut("[ERROR]: gapopen must be negative.\n"); abort=true; }
150                         
151                         temp = validParameter.validFile(parameters, "gapextend", false);        if (temp == "not found"){       temp = "-1.0";                  }
152                         m->mothurConvert(temp, gapExtend); 
153             if (gapExtend > 0) { m->mothurOut("[ERROR]: gapextend must be negative.\n"); abort=true; }
154                         
155                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
156                         m->setProcessors(temp);
157                         m->mothurConvert(temp, processors);
158                         
159                         align = validParameter.validFile(parameters, "align", false);           if (align == "not found"){      align = "needleman";    }
160                         if ((align != "needleman") && (align != "gotoh")) { m->mothurOut(align + " is not a valid alignment method. Options are needleman or gotoh. I will use needleman."); m->mothurOutEndLine(); align = "needleman"; }
161         }
162                 
163         }
164         catch(exception& e) {
165                 m->errorOut(e, "MakeContigsCommand", "MakeContigsCommand");
166                 exit(1);
167         }
168 }
169 //**********************************************************************************************************************
170 int MakeContigsCommand::execute(){
171         try {
172                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
173         
174         //read ffastq and rfastq files creating fasta and qual files.
175         //this function will create a forward and reverse, fasta and qual files for each processor.
176         //files has an entry for each processor. files[i][0] = forwardFasta, files[i][1] = forwardQual, files[i][2] = reverseFasta, files[i][3] = reverseQual
177         int numReads = 0;
178         int start = time(NULL);
179         longestBase = 1000;
180         m->mothurOut("Reading fastq data...\n"); 
181         vector< vector<string> > files = readFastqFiles(numReads);  
182         m->mothurOut("Done.\n");
183     
184         if (m->control_pressed) { return 0; }
185         
186         string outFastaFile = outputDir + m->getRootName(m->getSimpleName(ffastqfile)) + "contigs.fasta";
187         string outQualFile = outputDir + m->getRootName(m->getSimpleName(ffastqfile)) + "contigs.qual";
188         string outMisMatchFile = outputDir + m->getRootName(m->getSimpleName(ffastqfile)) + "contigs.mismatches";
189         outputNames.push_back(outFastaFile); outputTypes["fasta"].push_back(outFastaFile);
190         outputNames.push_back(outQualFile); outputTypes["qfile"].push_back(outQualFile);
191         outputNames.push_back(outMisMatchFile); outputTypes["mismatch"].push_back(outMisMatchFile);
192         
193         m->mothurOut("Making contigs...\n"); 
194         createProcesses(files, outFastaFile, outQualFile, outMisMatchFile);
195         m->mothurOut("Done.\n");
196         
197         //remove temp fasta and qual files
198         for (int i = 0; i < processors; i++) { for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); }  }
199         
200         if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); }  return 0; }
201         
202         m->mothurOut("It took " + toString(time(NULL) - start) + " secs to process " + toString(numReads) + " sequences.\n");
203         
204         string currentFasta = "";
205                 itTypes = outputTypes.find("fasta");
206                 if (itTypes != outputTypes.end()) {
207                         if ((itTypes->second).size() != 0) { currentFasta = (itTypes->second)[0]; m->setFastaFile(currentFasta); }
208                 }
209         
210         string currentQual = "";
211                 itTypes = outputTypes.find("qfile");
212                 if (itTypes != outputTypes.end()) {
213                         if ((itTypes->second).size() != 0) { currentQual = (itTypes->second)[0]; m->setQualFile(currentQual); }
214                 }
215                 
216         //output files created by command
217                 m->mothurOutEndLine();
218                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
219                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
220                 m->mothurOutEndLine();
221
222         
223         return 0;
224     }
225         catch(exception& e) {
226                 m->errorOut(e, "MakeContigsCommand", "execute");
227                 exit(1);
228         }
229 }
230 //**********************************************************************************************************************
231 int MakeContigsCommand::createProcesses(vector< vector<string> > files, string outputFasta, string outputQual, string outputMisMatches) {
232         try {
233                 int num = 0;
234                 vector<int> processIDS;
235 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
236                 int process = 0;
237                 
238                 //loop through and create all the processes you want
239                 while (process != processors-1) {
240                         int pid = fork();
241                         
242                         if (pid > 0) {
243                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
244                                 process++;
245                         }else if (pid == 0){
246                                 num = driver(files[process], outputFasta + toString(getpid()) + ".temp", outputQual + toString(getpid()) + ".temp", outputMisMatches + toString(getpid()) + ".temp");
247                                 
248                                 //pass numSeqs to parent
249                                 ofstream out;
250                                 string tempFile = outputFasta + toString(getpid()) + ".num.temp";
251                                 m->openOutputFile(tempFile, out);
252                                 out << num << endl;
253                                 out.close();
254                                 
255                                 exit(0);
256                         }else { 
257                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
258                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
259                                 exit(0);
260                         }
261                 }
262                 
263                 //do my part
264                 num = driver(files[processors-1], outputFasta, outputQual, outputMisMatches);
265                 
266                 //force parent to wait until all the processes are done
267                 for (int i=0;i<processIDS.size();i++) { 
268                         int temp = processIDS[i];
269                         wait(&temp);
270                 }
271         
272                 for (int i = 0; i < processIDS.size(); i++) {
273                         ifstream in;
274                         string tempFile =  outputFasta + toString(processIDS[i]) + ".num.temp";
275                         m->openInputFile(tempFile, in);
276                         if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
277                         in.close(); m->mothurRemove(tempFile);
278         }
279     #else
280         
281         //////////////////////////////////////////////////////////////////////////////////////////////////////
282                 //Windows version shared memory, so be careful when passing variables through the contigsData struct. 
283                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
284                 //////////////////////////////////////////////////////////////////////////////////////////////////////
285                 
286                 vector<contigsData*> pDataArray; 
287                 DWORD   dwThreadIdArray[processors-1];
288                 HANDLE  hThreadArray[processors-1]; 
289                 
290                 //Create processor worker threads.
291                 for( int i=0; i<processors-1; i++ ){
292                         string extension = toString(i) + ".temp";
293                         
294                         contigsData* tempcontig = new contigsData(files[i], (outputFasta + extension), (outputQual + extension), (outputMisMatches + extension), align, m, match, misMatch, gapOpen, gapExtend, i);
295                         pDataArray.push_back(tempcontig);
296                         processIDS.push_back(i);
297             
298                         hThreadArray[i] = CreateThread(NULL, 0, MyContigsThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);   
299                 }
300                 
301         num = driver(files[processors-1], outputFasta, outputQual, outputMisMatches);           
302         
303                 //Wait until all threads have terminated.
304                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
305                 
306                 //Close all thread handles and free memory allocations.
307                 for(int i=0; i < pDataArray.size(); i++){
308                         num += pDataArray[i]->count;
309             CloseHandle(hThreadArray[i]);
310                         delete pDataArray[i];
311         }
312                                 
313     #endif      
314         
315         for (int i = 0; i < processIDS.size(); i++) {
316                         m->appendFiles((outputFasta + toString(processIDS[i]) + ".temp"), outputFasta);
317                         m->mothurRemove((outputFasta + toString(processIDS[i]) + ".temp"));
318                         
319                         m->appendFiles((outputQual + toString(processIDS[i]) + ".temp"), outputQual);
320                         m->mothurRemove((outputQual + toString(processIDS[i]) + ".temp"));
321             
322             m->appendFiles((outputMisMatches + toString(processIDS[i]) + ".temp"), outputMisMatches);
323                         m->mothurRemove((outputMisMatches + toString(processIDS[i]) + ".temp"));
324                 }
325                 
326                 return num;
327         }
328         catch(exception& e) {
329                 m->errorOut(e, "MakeContigsCommand", "createProcesses");
330                 exit(1);
331         }
332 }
333 //**********************************************************************************************************************
334 int MakeContigsCommand::driver(vector<string> files, string outputFasta, string outputQual, string outputMisMatches){
335     try {
336         
337         Alignment* alignment;
338         if(align == "gotoh")                    {       alignment = new GotohOverlap(gapOpen, gapExtend, match, misMatch, longestBase);                 }
339                 else if(align == "needleman")   {       alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase);                                }
340         
341         int num = 0;
342         string thisffastafile = files[0];
343         string thisfqualfile = files[1];
344         string thisrfastafile = files[2];
345         string thisrqualfile = files[3];
346         
347         if (m->debug) {  m->mothurOut("[DEBUG]: ffasta = " + thisffastafile + ".\n[DEBUG]: fqual = " + thisfqualfile + ".\n[DEBUG]: rfasta = " + thisrfastafile + ".\n[DEBUG]: rqual = " + thisrqualfile + ".\n"); }
348         
349         ifstream inFFasta, inRFasta, inFQual, inRQual;
350         m->openInputFile(thisffastafile, inFFasta);
351         m->openInputFile(thisfqualfile, inFQual);
352         m->openInputFile(thisrfastafile, inRFasta);
353         m->openInputFile(thisrqualfile, inRQual);
354         
355         ofstream outFasta, outQual, outMisMatch;
356         m->openOutputFile(outputFasta, outFasta);
357         m->openOutputFile(outputQual, outQual);
358         m->openOutputFile(outputMisMatches, outMisMatch);
359         outMisMatch << "Name\tLength\tMisMatches\n";
360         
361         while ((!inFQual.eof()) && (!inFFasta.eof()) && (!inRFasta.eof()) && (!inRQual.eof())) {
362             
363             if (m->control_pressed) { break; }
364             
365             //read seqs and quality info
366             Sequence fSeq(inFFasta); m->gobble(inFFasta);
367             Sequence rSeq(inRFasta); m->gobble(inRFasta);
368             QualityScores fQual(inFQual); m->gobble(inFQual);
369             QualityScores rQual(inRQual); m->gobble(inRQual);
370             
371             //flip the reverse reads
372             rSeq.reverseComplement();
373             rQual.flipQScores();
374             
375             //pairwise align
376             alignment->align(fSeq.getUnaligned(), rSeq.getUnaligned());
377             map<int, int> ABaseMap = alignment->getSeqAAlnBaseMap();
378             map<int, int> BBaseMap = alignment->getSeqBAlnBaseMap();
379             fSeq.setAligned(alignment->getSeqAAln());
380             rSeq.setAligned(alignment->getSeqBAln());
381             int length = fSeq.getAligned().length();
382         
383             //traverse alignments merging into one contiguous seq
384             string contig = "";
385             vector<int> contigScores; 
386             int numMismatches = 0;
387             string seq1 = fSeq.getAligned();
388             string seq2 = rSeq.getAligned();
389            
390             vector<int> scores1 = fQual.getQualityScores();
391             vector<int> scores2 = rQual.getQualityScores();
392             
393             for (int i = 0; i < length; i++) {
394                 if (seq1[i] == seq2[i]) { //match, add base and choose highest score
395                     contig += seq1[i];
396                     contigScores.push_back(scores1[ABaseMap[i]]);
397                     if (scores1[ABaseMap[i]] < scores2[BBaseMap[i]]) { contigScores[i] = scores2[BBaseMap[i]]; }
398                 }else if (((seq1[i] == '.') || (seq1[i] == '-')) && ((seq2[i] != '-') && (seq2[i] != '.'))) { //seq1 is a gap and seq2 is a base, choose seq2
399                     contig += seq2[i];
400                     contigScores.push_back(scores2[BBaseMap[i]]);
401                 }else if (((seq2[i] == '.') || (seq2[i] == '-')) && ((seq1[i] != '-') && (seq1[i] != '.'))) { //seq2 is a gap and seq1 is a base, choose seq1
402                     contig += seq1[i];
403                     contigScores.push_back(scores1[ABaseMap[i]]);
404                 }else if (((seq1[i] != '-') && (seq1[i] != '.')) && ((seq2[i] != '-') && (seq2[i] != '.'))) { //both bases choose one with better quality
405                     char c = seq1[i];
406                     contigScores.push_back(scores1[ABaseMap[i]]);
407                     if (scores1[ABaseMap[i]] < scores2[BBaseMap[i]]) { contigScores[i] = scores2[BBaseMap[i]]; c = seq2[i]; }
408                     contig += c;
409                     numMismatches++;
410                 }else { //should never get here
411                     m->mothurOut("[ERROR]: case I didn't think of seq1 = " + toString(seq1[i]) + " and seq2 = " + toString(seq2[i]) + "\n");
412                 }
413             }
414             
415             //output
416             outFasta << ">" << fSeq.getName() << endl << contig << endl;
417             outQual << ">" << fSeq.getName() << endl;
418             for (int i = 0; i < contigScores.size(); i++) { outQual << contigScores[i] << ' '; }
419             outQual << endl;
420             outMisMatch << fSeq.getName() << '\t' << contig.length() << '\t' << numMismatches << endl;
421             
422             num++;
423             
424                         //report progress
425                         if((num) % 1000 == 0){  m->mothurOut(toString(num)); m->mothurOutEndLine();             }
426                 }
427         
428                 //report progress
429                 if((num) % 1000 != 0){  m->mothurOut(toString(num)); m->mothurOutEndLine();             }
430         
431         inFFasta.close();
432         inFQual.close();
433         inRFasta.close();
434         inRQual.close();
435         outFasta.close();
436         outQual.close();
437         outMisMatch.close();
438         delete alignment;
439         
440         if (m->control_pressed) { m->mothurRemove(outputQual); m->mothurRemove(outputFasta);  m->mothurRemove(outputMisMatches);}
441         
442         return num;
443     }
444         catch(exception& e) {
445                 m->errorOut(e, "MakeContigsCommand", "driver");
446                 exit(1);
447         }
448 }
449 //**********************************************************************************************************************
450 vector< vector<string> > MakeContigsCommand::readFastqFiles(int& count){
451     try {
452         vector< vector<string> > files;
453         
454         //maps processors number to file pointer
455         map<int, vector<ofstream*> > tempfiles;  //tempfiles[0] = forwardFasta, [1] = forwardQual, [2] = reverseFasta, [3] = reverseQual
456         map<int, vector<ofstream*> >::iterator it;
457         
458         //create files to write to
459         for (int i = 0; i < processors; i++) {
460             vector<ofstream*> temp;
461             ofstream* outFF = new ofstream;     temp.push_back(outFF);
462             ofstream* outFQ = new ofstream;     temp.push_back(outFQ);
463             ofstream* outRF = new ofstream;     temp.push_back(outRF);
464             ofstream* outRQ = new ofstream;     temp.push_back(outRQ);
465             tempfiles[i] = temp;
466             
467             vector<string> names;
468             string ffastafilename = outputDir + m->getRootName(m->getSimpleName(ffastqfile)) + toString(i) + "ffasta.temp";
469             string rfastafilename = outputDir + m->getRootName(m->getSimpleName(rfastqfile)) + toString(i) + "rfasta.temp";
470             string fqualfilename = outputDir + m->getRootName(m->getSimpleName(ffastqfile)) + toString(i) + "fqual.temp";
471             string rqualfilename = outputDir + m->getRootName(m->getSimpleName(rfastqfile)) + toString(i) + "rqual.temp";
472             names.push_back(ffastafilename); names.push_back(fqualfilename);
473             names.push_back(rfastafilename); names.push_back(rqualfilename);
474             files.push_back(names);
475             
476             m->openOutputFile(ffastafilename, *outFF);
477             m->openOutputFile(rfastafilename, *outRF);
478             m->openOutputFile(fqualfilename, *outFQ);
479             m->openOutputFile(rqualfilename, *outRQ);
480         }
481         
482         if (m->control_pressed) {
483             //close files, delete ofstreams
484             for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close();  delete (it->second)[i]; } }
485             //remove files
486             for (int i = 0; i < files.size(); i++) {  
487                 for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); }
488             }
489         }
490         
491         ifstream inForward;
492         m->openInputFile(ffastqfile, inForward);
493         
494         ifstream inReverse;
495         m->openInputFile(rfastqfile, inReverse);
496         
497         count = 0;
498         while ((!inForward.eof()) && (!inReverse.eof())) {
499             
500             if (m->control_pressed) { for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close();  delete (it->second)[i]; } } for (int i = 0; i < files.size(); i++) {  for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); } } inForward.close(); inReverse.close(); return files; }
501             
502             //get a read from forward and reverse fastq files
503             fastqRead fread = readFastq(inForward);
504             fastqRead rread = readFastq(inReverse);
505             checkReads(fread, rread);
506             
507             if (m->control_pressed) { for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close();  delete (it->second)[i]; } } for (int i = 0; i < files.size(); i++) {  for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); } } inForward.close(); inReverse.close(); return files; }
508             
509             //if the reads are okay write to output files
510             int process = count % processors;
511             
512             *(tempfiles[process][0]) << ">" << fread.name << endl << fread.sequence << endl;
513             *(tempfiles[process][1]) << ">" << fread.name << endl;
514             for (int i = 0; i < fread.scores.size(); i++) { *(tempfiles[process][1]) << fread.scores[i] << " "; }
515             *(tempfiles[process][1]) << endl;
516             *(tempfiles[process][2]) << ">" << rread.name << endl << rread.sequence << endl;
517             *(tempfiles[process][3]) << ">" << rread.name << endl;
518             for (int i = 0; i < rread.scores.size(); i++) { *(tempfiles[process][3]) << rread.scores[i] << " "; }
519             *(tempfiles[process][3]) << endl;
520             
521             count++;
522             
523             //report progress
524                         if((count) % 10000 == 0){       m->mothurOut(toString(count)); m->mothurOutEndLine();           }
525                         
526                 }
527                 //report progress
528                 if((count) % 10000 != 0){       m->mothurOut(toString(count)); m->mothurOutEndLine();           }
529                 
530         
531         
532         //close files, delete ofstreams
533         for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close();  delete (it->second)[i]; } }
534         inForward.close();
535         inReverse.close();
536         
537         //adjust for really large processors or really small files
538         if (count < processors) { 
539             for (int i = count; i < processors; i++) { for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); } files[i].clear(); }
540             files.resize(count);
541             processors = count; 
542         }
543         
544         return files;
545     }
546     catch(exception& e) {
547         m->errorOut(e, "MakeContigsCommand", "readFastqFiles");
548         exit(1);
549     }
550 }
551 //**********************************************************************************************************************
552 fastqRead MakeContigsCommand::readFastq(ifstream& in){
553     try {
554         fastqRead read;
555         
556         //read sequence name
557         string name = m->getline(in); m->gobble(in);
558         if (name == "") {  m->mothurOut("[ERROR]: Blank fasta name."); m->mothurOutEndLine(); m->control_pressed = true; return read; }
559         else if (name[0] != '@') { m->mothurOut("[ERROR]: reading " + name + " expected a name with @ as a leading character."); m->mothurOutEndLine(); m->control_pressed = true; return read; }
560         else { name = name.substr(1); }
561         
562         //read sequence
563         string sequence = m->getline(in); m->gobble(in);
564         if (sequence == "") {  m->mothurOut("[ERROR]: missing sequence for " + name); m->mothurOutEndLine(); m->control_pressed = true; return read; }
565         
566         //read sequence name
567         string name2 = m->getline(in); m->gobble(in);
568         if (name2 == "") {  m->mothurOut("[ERROR]: Blank quality name."); m->mothurOutEndLine(); m->control_pressed = true; return read; }
569         else if (name2[0] != '+') { m->mothurOut("[ERROR]: reading " + name2 + " expected a name with + as a leading character."); m->mothurOutEndLine(); m->control_pressed = true; return read; }
570         else { name2 = name2.substr(1);  }
571         
572         //read quality scores
573         string quality = m->getline(in); m->gobble(in);
574         if (quality == "") {  m->mothurOut("[ERROR]: missing quality for " + name2); m->mothurOutEndLine(); m->control_pressed = true; return read; }
575         
576         //sanity check sequence length and number of quality scores match
577         if (name2 != "") { if (name != name2) { m->mothurOut("[ERROR]: names do not match. read " + name + " for fasta and " + name2 + " for quality."); m->mothurOutEndLine(); m->control_pressed = true; return read; } }
578         if (quality.length() != sequence.length()) { m->mothurOut("[ERROR]: Lengths do not match for sequence " + name + ". Read " + toString(sequence.length()) + " characters for fasta and " + toString(quality.length()) + " characters for quality scores."); m->mothurOutEndLine(); m->control_pressed = true; return read; }
579         
580         vector<int> qualScores;
581                 int controlChar = int('@');
582                 for (int i = 0; i < quality.length(); i++) { 
583                         int temp = int(quality[i]);
584                         temp -= controlChar;
585                         
586                         qualScores.push_back(temp);
587                 }
588
589         read.name = name;
590         read.sequence = sequence;
591         read.scores = qualScores;
592
593         return read;
594     }
595     catch(exception& e) {
596         m->errorOut(e, "MakeContigsCommand", "readFastq");
597         exit(1);
598     }
599 }
600 //**********************************************************************************************************************
601 bool MakeContigsCommand::checkReads(fastqRead& forward, fastqRead& reverse){
602     try {
603         bool good = true;
604         
605         //fix names
606         if ((forward.name.length() > 2) && (reverse.name.length() > 2)) {
607             forward.name = forward.name.substr(0, forward.name.length()-2);
608             reverse.name = reverse.name.substr(0, reverse.name.length()-2);
609         }else { good = false; m->control_pressed = true; }
610         
611         //do names match
612         if (forward.name != reverse.name) {
613             m->mothurOut("[ERROR]: read " + forward.name + " from " + ffastqfile + ", but read " + reverse.name + " from " + rfastqfile + ".\n");
614             good = false; m->control_pressed = true;
615         }
616         
617         //do sequence lengths match
618         if (forward.sequence.length() != reverse.sequence.length()) {
619             m->mothurOut("[ERROR]: For sequence " + forward.name + " I read a sequence of length " + toString(forward.sequence.length()) + " from " + ffastqfile + ", but read a sequence of length " + toString(reverse.sequence.length()) + " from " + rfastqfile + ".\n");
620             good = false; m->control_pressed = true;
621         }
622         
623         //do number of qual scores match 
624         if (forward.scores.size() != reverse.scores.size()) {
625             m->mothurOut("[ERROR]: For sequence " + forward.name + " I read " + toString(forward.scores.size()) + " quality scores from " + ffastqfile + ", but read  " + toString(reverse.scores.size()) + " quality scores from " + rfastqfile + ".\n");
626             good = false; m->control_pressed = true;
627         }
628
629         return good;
630     }
631     catch(exception& e) {
632         m->errorOut(e, "MakeContigsCommand", "readFastq");
633         exit(1);
634     }
635 }
636 //**********************************************************************************************************************
637
638
639
640