]> git.donarmstrong.com Git - mothur.git/blob - makecontigscommand.cpp
added debug output classify.seqs. worked on make.contigs command. added large paramet...
[mothur.git] / makecontigscommand.cpp
1 //
2 //  makecontigscommand.cpp
3 //  Mothur
4 //
5 //  Created by Sarah Westcott on 5/15/12.
6 //  Copyright (c) 2012 Schloss Lab. All rights reserved.
7 //
8
9 #include "makecontigscommand.h"
10
11 //**********************************************************************************************************************
12 vector<string> MakeContigsCommand::setParameters(){     
13         try {
14                 CommandParameter pfasta("ffastq", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
15         CommandParameter prfasta("rfastq", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(prfasta);
16                 CommandParameter palign("align", "Multiple", "needleman-gotoh", "needleman", "", "", "",false,false); parameters.push_back(palign);
17                 CommandParameter pmatch("match", "Number", "", "1.0", "", "", "",false,false); parameters.push_back(pmatch);
18                 CommandParameter pmismatch("mismatch", "Number", "", "-1.0", "", "", "",false,false); parameters.push_back(pmismatch);
19                 CommandParameter pgapopen("gapopen", "Number", "", "-2.0", "", "", "",false,false); parameters.push_back(pgapopen);
20                 CommandParameter pgapextend("gapextend", "Number", "", "-1.0", "", "", "",false,false); parameters.push_back(pgapextend);
21         CommandParameter pthreshold("threshold", "Number", "", "40", "", "", "",false,false); parameters.push_back(pthreshold);
22                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
23                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
24                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
25                 
26                 vector<string> myArray;
27                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
28                 return myArray;
29         }
30         catch(exception& e) {
31                 m->errorOut(e, "MakeContigsCommand", "setParameters");
32                 exit(1);
33         }
34 }
35 //**********************************************************************************************************************
36 string MakeContigsCommand::getHelpString(){     
37         try {
38                 string helpString = "";
39                 helpString += "The make.contigs command reads a forward fastq file and a reverse fastq file and outputs new fasta and quality files.\n";
40                 helpString += "The make.contigs command parameters are ffastq, rfastq, align, match, mismatch, gapopen, gapextend and processors.\n";
41                 helpString += "The ffastq and rfastq parameter is required.\n";
42                 helpString += "The align parameter allows you to specify the alignment method to use.  Your options are: gotoh, needleman, blast and noalign. The default is needleman.\n";
43                 helpString += "The match parameter allows you to specify the bonus for having the same base. The default is 1.0.\n";
44                 helpString += "The mistmatch parameter allows you to specify the penalty for having different bases.  The default is -1.0.\n";
45                 helpString += "The gapopen parameter allows you to specify the penalty for opening a gap in an alignment. The default is -2.0.\n";
46                 helpString += "The gapextend parameter allows you to specify the penalty for extending a gap in an alignment.  The default is -1.0.\n";
47         helpString += "The threshold parameter allows you to set a quality scores threshold. In the case where we are trying to decide whether to keep a base or remove it because the base is compared to a gap in the other fragment, if the base has a quality score below the threshold we eliminate it. Default=40.\n";
48         helpString += "The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n";
49         helpString += "The make.contigs command should be in the following format: \n";
50                 helpString += "make.contigs(ffastq=yourForwardFastqFile, rfastq=yourReverseFastqFile, align=yourAlignmentMethod) \n";
51                 helpString += "Note: No spaces between parameter labels (i.e. ffastq), '=' and parameters (i.e.yourForwardFastqFile).\n";
52                 return helpString;
53         }
54         catch(exception& e) {
55                 m->errorOut(e, "MakeContigsCommand", "getHelpString");
56                 exit(1);
57         }
58 }
59
60 //**********************************************************************************************************************
61 MakeContigsCommand::MakeContigsCommand(){       
62         try {
63                 abort = true; calledHelp = true; 
64                 setParameters();
65                 vector<string> tempOutNames;
66                 outputTypes["fasta"] = tempOutNames;
67                 outputTypes["qfile"] = tempOutNames;
68         outputTypes["mismatch"] = tempOutNames;
69         }
70         catch(exception& e) {
71                 m->errorOut(e, "MakeContigsCommand", "MakeContigsCommand");
72                 exit(1);
73         }
74 }
75 //**********************************************************************************************************************
76 MakeContigsCommand::MakeContigsCommand(string option)  {
77         try {
78                 abort = false; calledHelp = false;   
79         
80                 //allow user to run help
81                 if(option == "help") { help(); abort = true; calledHelp = true; }
82                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
83                 
84                 else {
85                         vector<string> myArray = setParameters();
86                         
87                         OptionParser parser(option);
88                         map<string, string> parameters = parser.getParameters(); 
89                         
90                         ValidParameters validParameter("pairwise.seqs");
91                         map<string, string>::iterator it;
92                         
93                         //check to make sure all parameters are valid for command
94                         for (it = parameters.begin(); it != parameters.end(); it++) { 
95                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
96                         }
97                         
98                         //initialize outputTypes
99                         vector<string> tempOutNames;
100                         outputTypes["fasta"] = tempOutNames;
101                         outputTypes["qfile"] = tempOutNames;
102             outputTypes["mismatch"] = tempOutNames;
103                         
104             
105                         //if the user changes the input directory command factory will send this info to us in the output parameter 
106                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
107                         if (inputDir == "not found"){   inputDir = "";          }
108                         else { 
109                                 string path;
110                 it = parameters.find("ffastq");
111                                 //user has given a template file
112                                 if(it != parameters.end()){ 
113                                         path = m->hasPath(it->second);
114                                         //if the user has not given a path then, add inputdir. else leave path alone.
115                                         if (path == "") {       parameters["ffastq"] = inputDir + it->second;           }
116                                 }
117                 
118                 it = parameters.find("rfastq");
119                                 //user has given a template file
120                                 if(it != parameters.end()){ 
121                                         path = m->hasPath(it->second);
122                                         //if the user has not given a path then, add inputdir. else leave path alone.
123                                         if (path == "") {       parameters["rfastq"] = inputDir + it->second;           }
124                                 }
125             }
126             
127             ffastqfile = validParameter.validFile(parameters, "ffastq", true);
128                         if (ffastqfile == "not open") { ffastqfile = ""; abort = true; }        
129                         else if (ffastqfile == "not found") { ffastqfile = ""; abort=true;  m->mothurOut("The ffastq parameter is required.\n"); }
130                         
131                         rfastqfile = validParameter.validFile(parameters, "rfastq", true);
132                         if (rfastqfile == "not open") { rfastqfile = ""; abort = true; }        
133                         else if (rfastqfile == "not found") { rfastqfile = ""; abort=true;  m->mothurOut("The rfastq parameter is required.\n"); }
134             
135             //if the user changes the output directory command factory will send this info to us in the output parameter 
136                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(ffastqfile);             }
137                         
138
139                         //check for optional parameter and set defaults
140                         // ...at some point should added some additional type checking...
141                         string temp;
142                         temp = validParameter.validFile(parameters, "match", false);            if (temp == "not found"){       temp = "1.0";                   }
143                         m->mothurConvert(temp, match);  
144                         
145                         temp = validParameter.validFile(parameters, "mismatch", false);         if (temp == "not found"){       temp = "-1.0";                  }
146                         m->mothurConvert(temp, misMatch);  
147             if (misMatch > 0) { m->mothurOut("[ERROR]: mismatch must be negative.\n"); abort=true; }
148                         
149                         temp = validParameter.validFile(parameters, "gapopen", false);          if (temp == "not found"){       temp = "-2.0";                  }
150                         m->mothurConvert(temp, gapOpen);  
151             if (gapOpen > 0) { m->mothurOut("[ERROR]: gapopen must be negative.\n"); abort=true; }
152                         
153                         temp = validParameter.validFile(parameters, "gapextend", false);        if (temp == "not found"){       temp = "-1.0";                  }
154                         m->mothurConvert(temp, gapExtend); 
155             if (gapExtend > 0) { m->mothurOut("[ERROR]: gapextend must be negative.\n"); abort=true; }
156                         
157             temp = validParameter.validFile(parameters, "threshold", false);    if (temp == "not found"){       temp = "40";                    }
158                         m->mothurConvert(temp, threshold); 
159             if ((threshold < 0) || (threshold > 40)) { m->mothurOut("[ERROR]: threshold must be between 0 and 40.\n"); abort=true; }
160
161                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
162                         m->setProcessors(temp);
163                         m->mothurConvert(temp, processors);
164                         
165                         align = validParameter.validFile(parameters, "align", false);           if (align == "not found"){      align = "needleman";    }
166                         if ((align != "needleman") && (align != "gotoh")) { m->mothurOut(align + " is not a valid alignment method. Options are needleman or gotoh. I will use needleman."); m->mothurOutEndLine(); align = "needleman"; }
167         }
168                 
169         }
170         catch(exception& e) {
171                 m->errorOut(e, "MakeContigsCommand", "MakeContigsCommand");
172                 exit(1);
173         }
174 }
175 //**********************************************************************************************************************
176 int MakeContigsCommand::execute(){
177         try {
178                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
179         
180         //read ffastq and rfastq files creating fasta and qual files.
181         //this function will create a forward and reverse, fasta and qual files for each processor.
182         //files has an entry for each processor. files[i][0] = forwardFasta, files[i][1] = forwardQual, files[i][2] = reverseFasta, files[i][3] = reverseQual
183         int numReads = 0;
184         int start = time(NULL);
185         longestBase = 1000;
186         m->mothurOut("Reading fastq data...\n"); 
187         vector< vector<string> > files = readFastqFiles(numReads);  
188         m->mothurOut("Done.\n");
189     
190         if (m->control_pressed) { return 0; }
191         
192         string outFastaFile = outputDir + m->getRootName(m->getSimpleName(ffastqfile)) + "contigs.fasta";
193         string outQualFile = outputDir + m->getRootName(m->getSimpleName(ffastqfile)) + "contigs.qual";
194         string outMisMatchFile = outputDir + m->getRootName(m->getSimpleName(ffastqfile)) + "contigs.mismatches";
195         outputNames.push_back(outFastaFile); outputTypes["fasta"].push_back(outFastaFile);
196         outputNames.push_back(outQualFile); outputTypes["qfile"].push_back(outQualFile);
197         outputNames.push_back(outMisMatchFile); outputTypes["mismatch"].push_back(outMisMatchFile);
198         
199         m->mothurOut("Making contigs...\n"); 
200         createProcesses(files, outFastaFile, outQualFile, outMisMatchFile);
201         m->mothurOut("Done.\n");
202         
203         //remove temp fasta and qual files
204         for (int i = 0; i < processors; i++) { for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); }  }
205         
206         if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); }  return 0; }
207         
208         m->mothurOut("It took " + toString(time(NULL) - start) + " secs to process " + toString(numReads) + " sequences.\n");
209         
210         string currentFasta = "";
211                 itTypes = outputTypes.find("fasta");
212                 if (itTypes != outputTypes.end()) {
213                         if ((itTypes->second).size() != 0) { currentFasta = (itTypes->second)[0]; m->setFastaFile(currentFasta); }
214                 }
215         
216         string currentQual = "";
217                 itTypes = outputTypes.find("qfile");
218                 if (itTypes != outputTypes.end()) {
219                         if ((itTypes->second).size() != 0) { currentQual = (itTypes->second)[0]; m->setQualFile(currentQual); }
220                 }
221                 
222         //output files created by command
223                 m->mothurOutEndLine();
224                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
225                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
226                 m->mothurOutEndLine();
227
228         
229         return 0;
230     }
231         catch(exception& e) {
232                 m->errorOut(e, "MakeContigsCommand", "execute");
233                 exit(1);
234         }
235 }
236 //**********************************************************************************************************************
237 int MakeContigsCommand::createProcesses(vector< vector<string> > files, string outputFasta, string outputQual, string outputMisMatches) {
238         try {
239                 int num = 0;
240                 vector<int> processIDS;
241 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
242                 int process = 0;
243                 
244                 //loop through and create all the processes you want
245                 while (process != processors-1) {
246                         int pid = fork();
247                         
248                         if (pid > 0) {
249                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
250                                 process++;
251                         }else if (pid == 0){
252                                 num = driver(files[process], outputFasta + toString(getpid()) + ".temp", outputQual + toString(getpid()) + ".temp", outputMisMatches + toString(getpid()) + ".temp");
253                                 
254                                 //pass numSeqs to parent
255                                 ofstream out;
256                                 string tempFile = outputFasta + toString(getpid()) + ".num.temp";
257                                 m->openOutputFile(tempFile, out);
258                                 out << num << endl;
259                                 out.close();
260                                 
261                                 exit(0);
262                         }else { 
263                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
264                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
265                                 exit(0);
266                         }
267                 }
268                 
269                 //do my part
270                 num = driver(files[processors-1], outputFasta, outputQual, outputMisMatches);
271                 
272                 //force parent to wait until all the processes are done
273                 for (int i=0;i<processIDS.size();i++) { 
274                         int temp = processIDS[i];
275                         wait(&temp);
276                 }
277         
278                 for (int i = 0; i < processIDS.size(); i++) {
279                         ifstream in;
280                         string tempFile =  outputFasta + toString(processIDS[i]) + ".num.temp";
281                         m->openInputFile(tempFile, in);
282                         if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
283                         in.close(); m->mothurRemove(tempFile);
284         }
285     #else
286         
287         //////////////////////////////////////////////////////////////////////////////////////////////////////
288                 //Windows version shared memory, so be careful when passing variables through the contigsData struct. 
289                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
290                 //////////////////////////////////////////////////////////////////////////////////////////////////////
291                 
292                 vector<contigsData*> pDataArray; 
293                 DWORD   dwThreadIdArray[processors-1];
294                 HANDLE  hThreadArray[processors-1]; 
295                 
296                 //Create processor worker threads.
297                 for( int i=0; i<processors-1; i++ ){
298                         string extension = toString(i) + ".temp";
299                         
300                         contigsData* tempcontig = new contigsData(files[i], (outputFasta + extension), (outputQual + extension), (outputMisMatches + extension), align, m, match, misMatch, gapOpen, gapExtend, threshold, i);
301                         pDataArray.push_back(tempcontig);
302                         processIDS.push_back(i);
303             
304                         hThreadArray[i] = CreateThread(NULL, 0, MyContigsThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);   
305                 }
306                 
307         num = driver(files[processors-1], outputFasta, outputQual, outputMisMatches);           
308         
309                 //Wait until all threads have terminated.
310                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
311                 
312                 //Close all thread handles and free memory allocations.
313                 for(int i=0; i < pDataArray.size(); i++){
314                         num += pDataArray[i]->count;
315             CloseHandle(hThreadArray[i]);
316                         delete pDataArray[i];
317         }
318                                 
319     #endif      
320         
321         for (int i = 0; i < processIDS.size(); i++) {
322                         m->appendFiles((outputFasta + toString(processIDS[i]) + ".temp"), outputFasta);
323                         m->mothurRemove((outputFasta + toString(processIDS[i]) + ".temp"));
324                         
325                         m->appendFiles((outputQual + toString(processIDS[i]) + ".temp"), outputQual);
326                         m->mothurRemove((outputQual + toString(processIDS[i]) + ".temp"));
327             
328             m->appendFiles((outputMisMatches + toString(processIDS[i]) + ".temp"), outputMisMatches);
329                         m->mothurRemove((outputMisMatches + toString(processIDS[i]) + ".temp"));
330                 }
331                 
332                 return num;
333         }
334         catch(exception& e) {
335                 m->errorOut(e, "MakeContigsCommand", "createProcesses");
336                 exit(1);
337         }
338 }
339 //**********************************************************************************************************************
340 int MakeContigsCommand::driver(vector<string> files, string outputFasta, string outputQual, string outputMisMatches){
341     try {
342         
343         Alignment* alignment;
344         if(align == "gotoh")                    {       alignment = new GotohOverlap(gapOpen, gapExtend, match, misMatch, longestBase);                 }
345                 else if(align == "needleman")   {       alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase);                                }
346         
347         int num = 0;
348         string thisffastafile = files[0];
349         string thisfqualfile = files[1];
350         string thisrfastafile = files[2];
351         string thisrqualfile = files[3];
352         
353         if (m->debug) {  m->mothurOut("[DEBUG]: ffasta = " + thisffastafile + ".\n[DEBUG]: fqual = " + thisfqualfile + ".\n[DEBUG]: rfasta = " + thisrfastafile + ".\n[DEBUG]: rqual = " + thisrqualfile + ".\n"); }
354         
355         ifstream inFFasta, inRFasta, inFQual, inRQual;
356         m->openInputFile(thisffastafile, inFFasta);
357         m->openInputFile(thisfqualfile, inFQual);
358         m->openInputFile(thisrfastafile, inRFasta);
359         m->openInputFile(thisrqualfile, inRQual);
360         
361         ofstream outFasta, outQual, outMisMatch;
362         m->openOutputFile(outputFasta, outFasta);
363         m->openOutputFile(outputQual, outQual);
364         m->openOutputFile(outputMisMatches, outMisMatch);
365         outMisMatch << "Name\tLength\tMisMatches\n";
366         
367         while ((!inFQual.eof()) && (!inFFasta.eof()) && (!inRFasta.eof()) && (!inRQual.eof())) {
368             
369             if (m->control_pressed) { break; }
370             
371             //read seqs and quality info
372             Sequence fSeq(inFFasta); m->gobble(inFFasta);
373             Sequence rSeq(inRFasta); m->gobble(inRFasta);
374             QualityScores fQual(inFQual); m->gobble(inFQual);
375             QualityScores rQual(inRQual); m->gobble(inRQual);
376             
377             //flip the reverse reads
378             rSeq.reverseComplement();
379             rQual.flipQScores();
380             
381             //pairwise align
382             alignment->align(fSeq.getUnaligned(), rSeq.getUnaligned());
383             map<int, int> ABaseMap = alignment->getSeqAAlnBaseMap();
384             map<int, int> BBaseMap = alignment->getSeqBAlnBaseMap();
385             fSeq.setAligned(alignment->getSeqAAln());
386             rSeq.setAligned(alignment->getSeqBAln());
387             int length = fSeq.getAligned().length();
388         
389             //traverse alignments merging into one contiguous seq
390             string contig = "";
391             vector<int> contigScores; 
392             int numMismatches = 0;
393             string seq1 = fSeq.getAligned();
394             string seq2 = rSeq.getAligned();
395             vector<int> scores1 = fQual.getQualityScores();
396             vector<int> scores2 = rQual.getQualityScores();
397
398            // if (num < 5) {  cout << fSeq.getStartPos() << '\t' << fSeq.getEndPos() << '\t' << rSeq.getStartPos() << '\t' << rSeq.getEndPos() << endl; }
399             int overlapStart = fSeq.getStartPos();
400             int seq2Start = rSeq.getStartPos();
401             //bigger of the 2 starting positions is the location of the overlapping start
402             if (overlapStart < seq2Start) { //seq2 starts later so take from 0 to seq2Start from seq1
403                 overlapStart = seq2Start; 
404                 for (int i = 0; i < overlapStart; i++) {
405                     contig += seq1[i];
406                     contigScores.push_back(scores1[ABaseMap[i]]);
407                 }
408             }else { //seq1 starts later so take from 0 to overlapStart from seq2
409                 for (int i = 0; i < overlapStart; i++) {
410                     contig += seq2[i];
411                     contigScores.push_back(scores2[BBaseMap[i]]);
412                 }
413             }
414             
415             int seq1End = fSeq.getEndPos();
416             int seq2End = rSeq.getEndPos();
417             int overlapEnd = seq1End;
418             if (seq2End < overlapEnd) { overlapEnd = seq2End; }  //smallest end position is where overlapping ends
419             
420             for (int i = overlapStart; i < overlapEnd; i++) {
421                 if (seq1[i] == seq2[i]) { //match, add base and choose highest score
422                     contig += seq1[i];
423                     contigScores.push_back(scores1[ABaseMap[i]]);
424                     if (scores1[ABaseMap[i]] < scores2[BBaseMap[i]]) { contigScores[contigScores.size()-1] = scores2[BBaseMap[i]]; }
425                 }else if (((seq1[i] == '.') || (seq1[i] == '-')) && ((seq2[i] != '-') && (seq2[i] != '.'))) { //seq1 is a gap and seq2 is a base, choose seq2, unless quality score for base is below threshold. In that case eliminate base
426                     if (scores2[BBaseMap[i]] < threshold) { } //
427                     else {
428                         contig += seq2[i];
429                         contigScores.push_back(scores2[BBaseMap[i]]);
430                     }
431                 }else if (((seq2[i] == '.') || (seq2[i] == '-')) && ((seq1[i] != '-') && (seq1[i] != '.'))) { //seq2 is a gap and seq1 is a base, choose seq1, unless quality score for base is below threshold. In that case eliminate base
432                     if (scores1[ABaseMap[i]] < threshold) { } //
433                     else {
434                         contig += seq1[i];
435                         contigScores.push_back(scores1[ABaseMap[i]]);
436                     }
437                 }else if (((seq1[i] != '-') && (seq1[i] != '.')) && ((seq2[i] != '-') && (seq2[i] != '.'))) { //both bases choose one with better quality
438                     char c = seq1[i];
439                     contigScores.push_back(scores1[ABaseMap[i]]);
440                     if (scores1[ABaseMap[i]] < scores2[BBaseMap[i]]) { contigScores[contigScores.size()-1] = scores2[BBaseMap[i]]; c = seq2[i]; }
441                     contig += c;
442                     numMismatches++;
443                 }else { //should never get here
444                     m->mothurOut("[ERROR]: case I didn't think of seq1 = " + toString(seq1[i]) + " and seq2 = " + toString(seq2[i]) + "\n");
445                 }
446             }
447             
448             if (seq1End < seq2End) { //seq1 ends before seq2 so take from overlap to length from seq2
449                 for (int i = overlapEnd; i < length; i++) {
450                     contig += seq2[i];
451                     contigScores.push_back(scores2[BBaseMap[i]]);
452                 }
453             }else { //seq2 ends before seq1 so take from overlap to length from seq1
454                 for (int i = overlapEnd; i < length; i++) {
455                     contig += seq1[i];
456                     contigScores.push_back(scores1[ABaseMap[i]]);
457                 }
458
459             }
460             //if (num < 5) { cout << overlapStart << '\t' << overlapEnd << endl << seq1 << endl << seq2 << endl<< contig << endl; }
461             //output
462             outFasta << ">" << fSeq.getName() << endl << contig << endl;
463             outQual << ">" << fSeq.getName() << endl;
464             for (int i = 0; i < contigScores.size(); i++) { outQual << contigScores[i] << ' '; }
465             outQual << endl;
466             outMisMatch << fSeq.getName() << '\t' << contig.length() << '\t' << numMismatches << endl;
467             
468             num++;
469             
470                         //report progress
471                         if((num) % 1000 == 0){  m->mothurOut(toString(num)); m->mothurOutEndLine();             }
472                 }
473         
474                 //report progress
475                 if((num) % 1000 != 0){  m->mothurOut(toString(num)); m->mothurOutEndLine();             }
476         
477         inFFasta.close();
478         inFQual.close();
479         inRFasta.close();
480         inRQual.close();
481         outFasta.close();
482         outQual.close();
483         outMisMatch.close();
484         delete alignment;
485         
486         if (m->control_pressed) { m->mothurRemove(outputQual); m->mothurRemove(outputFasta);  m->mothurRemove(outputMisMatches);}
487         
488         return num;
489     }
490         catch(exception& e) {
491                 m->errorOut(e, "MakeContigsCommand", "driver");
492                 exit(1);
493         }
494 }
495 //**********************************************************************************************************************
496 vector< vector<string> > MakeContigsCommand::readFastqFiles(int& count){
497     try {
498         vector< vector<string> > files;
499         
500         //maps processors number to file pointer
501         map<int, vector<ofstream*> > tempfiles;  //tempfiles[0] = forwardFasta, [1] = forwardQual, [2] = reverseFasta, [3] = reverseQual
502         map<int, vector<ofstream*> >::iterator it;
503         
504         //create files to write to
505         for (int i = 0; i < processors; i++) {
506             vector<ofstream*> temp;
507             ofstream* outFF = new ofstream;     temp.push_back(outFF);
508             ofstream* outFQ = new ofstream;     temp.push_back(outFQ);
509             ofstream* outRF = new ofstream;     temp.push_back(outRF);
510             ofstream* outRQ = new ofstream;     temp.push_back(outRQ);
511             tempfiles[i] = temp;
512             
513             vector<string> names;
514             string ffastafilename = outputDir + m->getRootName(m->getSimpleName(ffastqfile)) + toString(i) + "ffasta.temp";
515             string rfastafilename = outputDir + m->getRootName(m->getSimpleName(rfastqfile)) + toString(i) + "rfasta.temp";
516             string fqualfilename = outputDir + m->getRootName(m->getSimpleName(ffastqfile)) + toString(i) + "fqual.temp";
517             string rqualfilename = outputDir + m->getRootName(m->getSimpleName(rfastqfile)) + toString(i) + "rqual.temp";
518             names.push_back(ffastafilename); names.push_back(fqualfilename);
519             names.push_back(rfastafilename); names.push_back(rqualfilename);
520             files.push_back(names);
521             
522             m->openOutputFile(ffastafilename, *outFF);
523             m->openOutputFile(rfastafilename, *outRF);
524             m->openOutputFile(fqualfilename, *outFQ);
525             m->openOutputFile(rqualfilename, *outRQ);
526         }
527         
528         if (m->control_pressed) {
529             //close files, delete ofstreams
530             for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close();  delete (it->second)[i]; } }
531             //remove files
532             for (int i = 0; i < files.size(); i++) {  
533                 for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); }
534             }
535         }
536         
537         ifstream inForward;
538         m->openInputFile(ffastqfile, inForward);
539         
540         ifstream inReverse;
541         m->openInputFile(rfastqfile, inReverse);
542         
543         count = 0;
544         while ((!inForward.eof()) && (!inReverse.eof())) {
545             
546             if (m->control_pressed) { for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close();  delete (it->second)[i]; } } for (int i = 0; i < files.size(); i++) {  for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); } } inForward.close(); inReverse.close(); return files; }
547             
548             //get a read from forward and reverse fastq files
549             fastqRead fread = readFastq(inForward);
550             fastqRead rread = readFastq(inReverse);
551             checkReads(fread, rread);
552             
553             if (m->control_pressed) { for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close();  delete (it->second)[i]; } } for (int i = 0; i < files.size(); i++) {  for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); } } inForward.close(); inReverse.close(); return files; }
554             
555             //if the reads are okay write to output files
556             int process = count % processors;
557             
558             *(tempfiles[process][0]) << ">" << fread.name << endl << fread.sequence << endl;
559             *(tempfiles[process][1]) << ">" << fread.name << endl;
560             for (int i = 0; i < fread.scores.size(); i++) { *(tempfiles[process][1]) << fread.scores[i] << " "; }
561             *(tempfiles[process][1]) << endl;
562             *(tempfiles[process][2]) << ">" << rread.name << endl << rread.sequence << endl;
563             *(tempfiles[process][3]) << ">" << rread.name << endl;
564             for (int i = 0; i < rread.scores.size(); i++) { *(tempfiles[process][3]) << rread.scores[i] << " "; }
565             *(tempfiles[process][3]) << endl;
566             
567             count++;
568             
569             //report progress
570                         if((count) % 10000 == 0){       m->mothurOut(toString(count)); m->mothurOutEndLine();           }
571                         
572                 }
573                 //report progress
574                 if((count) % 10000 != 0){       m->mothurOut(toString(count)); m->mothurOutEndLine();           }
575                 
576         
577         
578         //close files, delete ofstreams
579         for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close();  delete (it->second)[i]; } }
580         inForward.close();
581         inReverse.close();
582         
583         //adjust for really large processors or really small files
584         if (count < processors) { 
585             for (int i = count; i < processors; i++) { for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); } files[i].clear(); }
586             files.resize(count);
587             processors = count; 
588         }
589         
590         return files;
591     }
592     catch(exception& e) {
593         m->errorOut(e, "MakeContigsCommand", "readFastqFiles");
594         exit(1);
595     }
596 }
597 //**********************************************************************************************************************
598 fastqRead MakeContigsCommand::readFastq(ifstream& in){
599     try {
600         fastqRead read;
601         
602         //read sequence name
603         string name = m->getline(in); m->gobble(in);
604         if (name == "") {  m->mothurOut("[ERROR]: Blank fasta name."); m->mothurOutEndLine(); m->control_pressed = true; return read; }
605         else if (name[0] != '@') { m->mothurOut("[ERROR]: reading " + name + " expected a name with @ as a leading character."); m->mothurOutEndLine(); m->control_pressed = true; return read; }
606         else { name = name.substr(1); }
607         
608         //read sequence
609         string sequence = m->getline(in); m->gobble(in);
610         if (sequence == "") {  m->mothurOut("[ERROR]: missing sequence for " + name); m->mothurOutEndLine(); m->control_pressed = true; return read; }
611         
612         //read sequence name
613         string name2 = m->getline(in); m->gobble(in);
614         if (name2 == "") {  m->mothurOut("[ERROR]: Blank quality name."); m->mothurOutEndLine(); m->control_pressed = true; return read; }
615         else if (name2[0] != '+') { m->mothurOut("[ERROR]: reading " + name2 + " expected a name with + as a leading character."); m->mothurOutEndLine(); m->control_pressed = true; return read; }
616         else { name2 = name2.substr(1);  }
617         
618         //read quality scores
619         string quality = m->getline(in); m->gobble(in);
620         if (quality == "") {  m->mothurOut("[ERROR]: missing quality for " + name2); m->mothurOutEndLine(); m->control_pressed = true; return read; }
621         
622         //sanity check sequence length and number of quality scores match
623         if (name2 != "") { if (name != name2) { m->mothurOut("[ERROR]: names do not match. read " + name + " for fasta and " + name2 + " for quality."); m->mothurOutEndLine(); m->control_pressed = true; return read; } }
624         if (quality.length() != sequence.length()) { m->mothurOut("[ERROR]: Lengths do not match for sequence " + name + ". Read " + toString(sequence.length()) + " characters for fasta and " + toString(quality.length()) + " characters for quality scores."); m->mothurOutEndLine(); m->control_pressed = true; return read; }
625         
626         vector<int> qualScores;
627                 int controlChar = int('@');
628                 for (int i = 0; i < quality.length(); i++) { 
629                         int temp = int(quality[i]);
630                         temp -= controlChar;
631                         
632                         qualScores.push_back(temp);
633                 }
634
635         read.name = name;
636         read.sequence = sequence;
637         read.scores = qualScores;
638
639         return read;
640     }
641     catch(exception& e) {
642         m->errorOut(e, "MakeContigsCommand", "readFastq");
643         exit(1);
644     }
645 }
646 //**********************************************************************************************************************
647 bool MakeContigsCommand::checkReads(fastqRead& forward, fastqRead& reverse){
648     try {
649         bool good = true;
650         
651         //fix names
652         if ((forward.name.length() > 2) && (reverse.name.length() > 2)) {
653             forward.name = forward.name.substr(0, forward.name.length()-2);
654             reverse.name = reverse.name.substr(0, reverse.name.length()-2);
655         }else { good = false; m->control_pressed = true; }
656         
657         //do names match
658         if (forward.name != reverse.name) {
659             m->mothurOut("[ERROR]: read " + forward.name + " from " + ffastqfile + ", but read " + reverse.name + " from " + rfastqfile + ".\n");
660             good = false; m->control_pressed = true;
661         }
662         
663         //do sequence lengths match
664         if (forward.sequence.length() != reverse.sequence.length()) {
665             m->mothurOut("[ERROR]: For sequence " + forward.name + " I read a sequence of length " + toString(forward.sequence.length()) + " from " + ffastqfile + ", but read a sequence of length " + toString(reverse.sequence.length()) + " from " + rfastqfile + ".\n");
666             good = false; m->control_pressed = true;
667         }
668         
669         //do number of qual scores match 
670         if (forward.scores.size() != reverse.scores.size()) {
671             m->mothurOut("[ERROR]: For sequence " + forward.name + " I read " + toString(forward.scores.size()) + " quality scores from " + ffastqfile + ", but read  " + toString(reverse.scores.size()) + " quality scores from " + rfastqfile + ".\n");
672             good = false; m->control_pressed = true;
673         }
674
675         return good;
676     }
677     catch(exception& e) {
678         m->errorOut(e, "MakeContigsCommand", "readFastq");
679         exit(1);
680     }
681 }
682 //**********************************************************************************************************************
683
684
685
686