]> git.donarmstrong.com Git - mothur.git/blob - makecontigscommand.cpp
changes while testing 1.26
[mothur.git] / makecontigscommand.cpp
1 //
2 //  makecontigscommand.cpp
3 //  Mothur
4 //
5 //  Created by Sarah Westcott on 5/15/12.
6 //  Copyright (c) 2012 Schloss Lab. All rights reserved.
7 //
8
9 #include "makecontigscommand.h"
10
11 //**********************************************************************************************************************
12 vector<string> MakeContigsCommand::setParameters(){     
13         try {
14                 CommandParameter pfasta("ffastq", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
15         CommandParameter prfasta("rfastq", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(prfasta);
16                 CommandParameter palign("align", "Multiple", "needleman-gotoh", "needleman", "", "", "",false,false); parameters.push_back(palign);
17                 CommandParameter pmatch("match", "Number", "", "1.0", "", "", "",false,false); parameters.push_back(pmatch);
18                 CommandParameter pmismatch("mismatch", "Number", "", "-1.0", "", "", "",false,false); parameters.push_back(pmismatch);
19                 CommandParameter pgapopen("gapopen", "Number", "", "-2.0", "", "", "",false,false); parameters.push_back(pgapopen);
20                 CommandParameter pgapextend("gapextend", "Number", "", "-1.0", "", "", "",false,false); parameters.push_back(pgapextend);
21         CommandParameter pthreshold("threshold", "Number", "", "40", "", "", "",false,false); parameters.push_back(pthreshold);
22                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
23                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
24                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
25                 
26                 vector<string> myArray;
27                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
28                 return myArray;
29         }
30         catch(exception& e) {
31                 m->errorOut(e, "MakeContigsCommand", "setParameters");
32                 exit(1);
33         }
34 }
35 //**********************************************************************************************************************
36 string MakeContigsCommand::getHelpString(){     
37         try {
38                 string helpString = "";
39                 helpString += "The make.contigs command reads a forward fastq file and a reverse fastq file and outputs new fasta and quality files.\n";
40                 helpString += "The make.contigs command parameters are ffastq, rfastq, align, match, mismatch, gapopen, gapextend and processors.\n";
41                 helpString += "The ffastq and rfastq parameters are required.\n";
42                 helpString += "The align parameter allows you to specify the alignment method to use.  Your options are: gotoh and needleman. The default is needleman.\n";
43                 helpString += "The match parameter allows you to specify the bonus for having the same base. The default is 1.0.\n";
44                 helpString += "The mistmatch parameter allows you to specify the penalty for having different bases.  The default is -1.0.\n";
45                 helpString += "The gapopen parameter allows you to specify the penalty for opening a gap in an alignment. The default is -2.0.\n";
46                 helpString += "The gapextend parameter allows you to specify the penalty for extending a gap in an alignment.  The default is -1.0.\n";
47         helpString += "The threshold parameter allows you to set a quality scores threshold. In the case where we are trying to decide whether to keep a base or remove it because the base is compared to a gap in the other fragment, if the base has a quality score below the threshold we eliminate it. Default=40.\n";
48         helpString += "The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n";
49         helpString += "The make.contigs command should be in the following format: \n";
50                 helpString += "make.contigs(ffastq=yourForwardFastqFile, rfastq=yourReverseFastqFile, align=yourAlignmentMethod) \n";
51                 helpString += "Note: No spaces between parameter labels (i.e. ffastq), '=' and parameters (i.e.yourForwardFastqFile).\n";
52                 return helpString;
53         }
54         catch(exception& e) {
55                 m->errorOut(e, "MakeContigsCommand", "getHelpString");
56                 exit(1);
57         }
58 }
59 //**********************************************************************************************************************
60 string MakeContigsCommand::getOutputFileNameTag(string type, string inputName=""){      
61         try {
62         string outputFileName = "";
63                 map<string, vector<string> >::iterator it;
64         
65         //is this a type this command creates
66         it = outputTypes.find(type);
67         if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
68         else {
69             if (type == "fasta")             {   outputFileName =  "contigs.fasta";         }
70             else if (type == "qfile")        {   outputFileName =  "contigs.qual";          }
71             else if (type == "mismatch")     {   outputFileName =  "contigs.mismatch";      }
72             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
73         }
74         return outputFileName;
75         }
76         catch(exception& e) {
77                 m->errorOut(e, "MakeContigsCommand", "getOutputFileNameTag");
78                 exit(1);
79         }
80 }
81 //**********************************************************************************************************************
82 MakeContigsCommand::MakeContigsCommand(){       
83         try {
84                 abort = true; calledHelp = true; 
85                 setParameters();
86                 vector<string> tempOutNames;
87                 outputTypes["fasta"] = tempOutNames;
88                 outputTypes["qfile"] = tempOutNames;
89         outputTypes["mismatch"] = tempOutNames;
90         }
91         catch(exception& e) {
92                 m->errorOut(e, "MakeContigsCommand", "MakeContigsCommand");
93                 exit(1);
94         }
95 }
96 //**********************************************************************************************************************
97 MakeContigsCommand::MakeContigsCommand(string option)  {
98         try {
99                 abort = false; calledHelp = false;   
100         
101                 //allow user to run help
102                 if(option == "help") { help(); abort = true; calledHelp = true; }
103                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
104                 
105                 else {
106                         vector<string> myArray = setParameters();
107                         
108                         OptionParser parser(option);
109                         map<string, string> parameters = parser.getParameters(); 
110                         
111                         ValidParameters validParameter("pairwise.seqs");
112                         map<string, string>::iterator it;
113                         
114                         //check to make sure all parameters are valid for command
115                         for (it = parameters.begin(); it != parameters.end(); it++) { 
116                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
117                         }
118                         
119                         //initialize outputTypes
120                         vector<string> tempOutNames;
121                         outputTypes["fasta"] = tempOutNames;
122                         outputTypes["qfile"] = tempOutNames;
123             outputTypes["mismatch"] = tempOutNames;
124                         
125             
126                         //if the user changes the input directory command factory will send this info to us in the output parameter 
127                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
128                         if (inputDir == "not found"){   inputDir = "";          }
129                         else { 
130                                 string path;
131                 it = parameters.find("ffastq");
132                                 //user has given a template file
133                                 if(it != parameters.end()){ 
134                                         path = m->hasPath(it->second);
135                                         //if the user has not given a path then, add inputdir. else leave path alone.
136                                         if (path == "") {       parameters["ffastq"] = inputDir + it->second;           }
137                                 }
138                 
139                 it = parameters.find("rfastq");
140                                 //user has given a template file
141                                 if(it != parameters.end()){ 
142                                         path = m->hasPath(it->second);
143                                         //if the user has not given a path then, add inputdir. else leave path alone.
144                                         if (path == "") {       parameters["rfastq"] = inputDir + it->second;           }
145                                 }
146             }
147             
148             ffastqfile = validParameter.validFile(parameters, "ffastq", true);
149                         if (ffastqfile == "not open") { ffastqfile = ""; abort = true; }        
150                         else if (ffastqfile == "not found") { ffastqfile = ""; abort=true;  m->mothurOut("The ffastq parameter is required.\n"); }
151                         
152                         rfastqfile = validParameter.validFile(parameters, "rfastq", true);
153                         if (rfastqfile == "not open") { rfastqfile = ""; abort = true; }        
154                         else if (rfastqfile == "not found") { rfastqfile = ""; abort=true;  m->mothurOut("The rfastq parameter is required.\n"); }
155             
156             //if the user changes the output directory command factory will send this info to us in the output parameter 
157                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(ffastqfile);             }
158                         
159
160                         //check for optional parameter and set defaults
161                         // ...at some point should added some additional type checking...
162                         string temp;
163                         temp = validParameter.validFile(parameters, "match", false);            if (temp == "not found"){       temp = "1.0";                   }
164                         m->mothurConvert(temp, match);  
165                         
166                         temp = validParameter.validFile(parameters, "mismatch", false);         if (temp == "not found"){       temp = "-1.0";                  }
167                         m->mothurConvert(temp, misMatch);  
168             if (misMatch > 0) { m->mothurOut("[ERROR]: mismatch must be negative.\n"); abort=true; }
169                         
170                         temp = validParameter.validFile(parameters, "gapopen", false);          if (temp == "not found"){       temp = "-2.0";                  }
171                         m->mothurConvert(temp, gapOpen);  
172             if (gapOpen > 0) { m->mothurOut("[ERROR]: gapopen must be negative.\n"); abort=true; }
173                         
174                         temp = validParameter.validFile(parameters, "gapextend", false);        if (temp == "not found"){       temp = "-1.0";                  }
175                         m->mothurConvert(temp, gapExtend); 
176             if (gapExtend > 0) { m->mothurOut("[ERROR]: gapextend must be negative.\n"); abort=true; }
177                         
178             temp = validParameter.validFile(parameters, "threshold", false);    if (temp == "not found"){       temp = "40";                    }
179                         m->mothurConvert(temp, threshold); 
180             if ((threshold < 0) || (threshold > 40)) { m->mothurOut("[ERROR]: threshold must be between 0 and 40.\n"); abort=true; }
181
182                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
183                         m->setProcessors(temp);
184                         m->mothurConvert(temp, processors);
185                         
186                         align = validParameter.validFile(parameters, "align", false);           if (align == "not found"){      align = "needleman";    }
187                         if ((align != "needleman") && (align != "gotoh")) { m->mothurOut(align + " is not a valid alignment method. Options are needleman or gotoh. I will use needleman."); m->mothurOutEndLine(); align = "needleman"; }
188         }
189                 
190         }
191         catch(exception& e) {
192                 m->errorOut(e, "MakeContigsCommand", "MakeContigsCommand");
193                 exit(1);
194         }
195 }
196 //**********************************************************************************************************************
197 int MakeContigsCommand::execute(){
198         try {
199                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
200         
201         //read ffastq and rfastq files creating fasta and qual files.
202         //this function will create a forward and reverse, fasta and qual files for each processor.
203         //files has an entry for each processor. files[i][0] = forwardFasta, files[i][1] = forwardQual, files[i][2] = reverseFasta, files[i][3] = reverseQual
204         int numReads = 0;
205         int start = time(NULL);
206         longestBase = 1000;
207         m->mothurOut("Reading fastq data...\n"); 
208         vector< vector<string> > files = readFastqFiles(numReads);  
209         m->mothurOut("Done.\n");
210     
211         if (m->control_pressed) { return 0; }
212         
213         string outFastaFile = outputDir + m->getRootName(m->getSimpleName(ffastqfile)) + getOutputFileNameTag("fasta");
214         string outQualFile = outputDir + m->getRootName(m->getSimpleName(ffastqfile)) + getOutputFileNameTag("qfile");
215         string outMisMatchFile = outputDir + m->getRootName(m->getSimpleName(ffastqfile)) + getOutputFileNameTag("mismatch");
216         outputNames.push_back(outFastaFile); outputTypes["fasta"].push_back(outFastaFile);
217         outputNames.push_back(outQualFile); outputTypes["qfile"].push_back(outQualFile);
218         outputNames.push_back(outMisMatchFile); outputTypes["mismatch"].push_back(outMisMatchFile);
219         
220         m->mothurOut("Making contigs...\n"); 
221         createProcesses(files, outFastaFile, outQualFile, outMisMatchFile);
222         m->mothurOut("Done.\n");
223         
224         //remove temp fasta and qual files
225         for (int i = 0; i < processors; i++) { for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); }  }
226         
227         if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); }  return 0; }
228         
229         m->mothurOut("It took " + toString(time(NULL) - start) + " secs to process " + toString(numReads) + " sequences.\n");
230         
231         string currentFasta = "";
232                 itTypes = outputTypes.find("fasta");
233                 if (itTypes != outputTypes.end()) {
234                         if ((itTypes->second).size() != 0) { currentFasta = (itTypes->second)[0]; m->setFastaFile(currentFasta); }
235                 }
236         
237         string currentQual = "";
238                 itTypes = outputTypes.find("qfile");
239                 if (itTypes != outputTypes.end()) {
240                         if ((itTypes->second).size() != 0) { currentQual = (itTypes->second)[0]; m->setQualFile(currentQual); }
241                 }
242                 
243         //output files created by command
244                 m->mothurOutEndLine();
245                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
246                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
247                 m->mothurOutEndLine();
248
249         
250         return 0;
251     }
252         catch(exception& e) {
253                 m->errorOut(e, "MakeContigsCommand", "execute");
254                 exit(1);
255         }
256 }
257 //**********************************************************************************************************************
258 int MakeContigsCommand::createProcesses(vector< vector<string> > files, string outputFasta, string outputQual, string outputMisMatches) {
259         try {
260                 int num = 0;
261                 vector<int> processIDS;
262 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
263                 int process = 0;
264                 
265                 //loop through and create all the processes you want
266                 while (process != processors-1) {
267                         int pid = fork();
268                         
269                         if (pid > 0) {
270                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
271                                 process++;
272                         }else if (pid == 0){
273                                 num = driver(files[process], outputFasta + toString(getpid()) + ".temp", outputQual + toString(getpid()) + ".temp", outputMisMatches + toString(getpid()) + ".temp");
274                                 
275                                 //pass numSeqs to parent
276                                 ofstream out;
277                                 string tempFile = outputFasta + toString(getpid()) + ".num.temp";
278                                 m->openOutputFile(tempFile, out);
279                                 out << num << endl;
280                                 out.close();
281                                 
282                                 exit(0);
283                         }else { 
284                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
285                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
286                                 exit(0);
287                         }
288                 }
289                 
290                 //do my part
291                 num = driver(files[processors-1], outputFasta, outputQual, outputMisMatches);
292                 
293                 //force parent to wait until all the processes are done
294                 for (int i=0;i<processIDS.size();i++) { 
295                         int temp = processIDS[i];
296                         wait(&temp);
297                 }
298         
299                 for (int i = 0; i < processIDS.size(); i++) {
300                         ifstream in;
301                         string tempFile =  outputFasta + toString(processIDS[i]) + ".num.temp";
302                         m->openInputFile(tempFile, in);
303                         if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
304                         in.close(); m->mothurRemove(tempFile);
305         }
306     #else
307         
308         //////////////////////////////////////////////////////////////////////////////////////////////////////
309                 //Windows version shared memory, so be careful when passing variables through the contigsData struct. 
310                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
311                 //////////////////////////////////////////////////////////////////////////////////////////////////////
312                 
313                 vector<contigsData*> pDataArray; 
314                 DWORD   dwThreadIdArray[processors-1];
315                 HANDLE  hThreadArray[processors-1]; 
316                 
317                 //Create processor worker threads.
318                 for( int i=0; i<processors-1; i++ ){
319                         string extension = toString(i) + ".temp";
320                         
321                         contigsData* tempcontig = new contigsData(files[i], (outputFasta + extension), (outputQual + extension), (outputMisMatches + extension), align, m, match, misMatch, gapOpen, gapExtend, threshold, i);
322                         pDataArray.push_back(tempcontig);
323                         processIDS.push_back(i);
324             
325                         hThreadArray[i] = CreateThread(NULL, 0, MyContigsThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);   
326                 }
327                 
328         num = driver(files[processors-1], outputFasta, outputQual, outputMisMatches);           
329         
330                 //Wait until all threads have terminated.
331                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
332                 
333                 //Close all thread handles and free memory allocations.
334                 for(int i=0; i < pDataArray.size(); i++){
335                         num += pDataArray[i]->count;
336             CloseHandle(hThreadArray[i]);
337                         delete pDataArray[i];
338         }
339                                 
340     #endif      
341         
342         for (int i = 0; i < processIDS.size(); i++) {
343                         m->appendFiles((outputFasta + toString(processIDS[i]) + ".temp"), outputFasta);
344                         m->mothurRemove((outputFasta + toString(processIDS[i]) + ".temp"));
345                         
346                         m->appendFiles((outputQual + toString(processIDS[i]) + ".temp"), outputQual);
347                         m->mothurRemove((outputQual + toString(processIDS[i]) + ".temp"));
348             
349             m->appendFiles((outputMisMatches + toString(processIDS[i]) + ".temp"), outputMisMatches);
350                         m->mothurRemove((outputMisMatches + toString(processIDS[i]) + ".temp"));
351                 }
352                 
353                 return num;
354         }
355         catch(exception& e) {
356                 m->errorOut(e, "MakeContigsCommand", "createProcesses");
357                 exit(1);
358         }
359 }
360 //**********************************************************************************************************************
361 int MakeContigsCommand::driver(vector<string> files, string outputFasta, string outputQual, string outputMisMatches){
362     try {
363         
364         Alignment* alignment;
365         if(align == "gotoh")                    {       alignment = new GotohOverlap(gapOpen, gapExtend, match, misMatch, longestBase);                 }
366                 else if(align == "needleman")   {       alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase);                                }
367         
368         int num = 0;
369         string thisffastafile = files[0];
370         string thisfqualfile = files[1];
371         string thisrfastafile = files[2];
372         string thisrqualfile = files[3];
373         
374         if (m->debug) {  m->mothurOut("[DEBUG]: ffasta = " + thisffastafile + ".\n[DEBUG]: fqual = " + thisfqualfile + ".\n[DEBUG]: rfasta = " + thisrfastafile + ".\n[DEBUG]: rqual = " + thisrqualfile + ".\n"); }
375         
376         ifstream inFFasta, inRFasta, inFQual, inRQual;
377         m->openInputFile(thisffastafile, inFFasta);
378         m->openInputFile(thisfqualfile, inFQual);
379         m->openInputFile(thisrfastafile, inRFasta);
380         m->openInputFile(thisrqualfile, inRQual);
381         
382         ofstream outFasta, outQual, outMisMatch;
383         m->openOutputFile(outputFasta, outFasta);
384         m->openOutputFile(outputQual, outQual);
385         m->openOutputFile(outputMisMatches, outMisMatch);
386         outMisMatch << "Name\tLength\tMisMatches\n";
387         
388         while ((!inFQual.eof()) && (!inFFasta.eof()) && (!inRFasta.eof()) && (!inRQual.eof())) {
389             
390             if (m->control_pressed) { break; }
391             
392             //read seqs and quality info
393             Sequence fSeq(inFFasta); m->gobble(inFFasta);
394             Sequence rSeq(inRFasta); m->gobble(inRFasta);
395             QualityScores fQual(inFQual); m->gobble(inFQual);
396             QualityScores rQual(inRQual); m->gobble(inRQual);
397             
398             //flip the reverse reads
399             rSeq.reverseComplement();
400             rQual.flipQScores();
401             
402             //pairwise align
403             alignment->align(fSeq.getUnaligned(), rSeq.getUnaligned());
404             map<int, int> ABaseMap = alignment->getSeqAAlnBaseMap();
405             map<int, int> BBaseMap = alignment->getSeqBAlnBaseMap();
406             fSeq.setAligned(alignment->getSeqAAln());
407             rSeq.setAligned(alignment->getSeqBAln());
408             int length = fSeq.getAligned().length();
409         
410             //traverse alignments merging into one contiguous seq
411             string contig = "";
412             vector<int> contigScores; 
413             int numMismatches = 0;
414             string seq1 = fSeq.getAligned();
415             string seq2 = rSeq.getAligned();
416             vector<int> scores1 = fQual.getQualityScores();
417             vector<int> scores2 = rQual.getQualityScores();
418
419            // if (num < 5) {  cout << fSeq.getStartPos() << '\t' << fSeq.getEndPos() << '\t' << rSeq.getStartPos() << '\t' << rSeq.getEndPos() << endl; }
420             int overlapStart = fSeq.getStartPos();
421             int seq2Start = rSeq.getStartPos();
422             //bigger of the 2 starting positions is the location of the overlapping start
423             if (overlapStart < seq2Start) { //seq2 starts later so take from 0 to seq2Start from seq1
424                 overlapStart = seq2Start; 
425                 for (int i = 0; i < overlapStart; i++) {
426                     contig += seq1[i];
427                     contigScores.push_back(scores1[ABaseMap[i]]);
428                 }
429             }else { //seq1 starts later so take from 0 to overlapStart from seq2
430                 for (int i = 0; i < overlapStart; i++) {
431                     contig += seq2[i];
432                     contigScores.push_back(scores2[BBaseMap[i]]);
433                 }
434             }
435             
436             int seq1End = fSeq.getEndPos();
437             int seq2End = rSeq.getEndPos();
438             int overlapEnd = seq1End;
439             if (seq2End < overlapEnd) { overlapEnd = seq2End; }  //smallest end position is where overlapping ends
440             
441             for (int i = overlapStart; i < overlapEnd; i++) {
442                 if (seq1[i] == seq2[i]) { //match, add base and choose highest score
443                     contig += seq1[i];
444                     contigScores.push_back(scores1[ABaseMap[i]]);
445                     if (scores1[ABaseMap[i]] < scores2[BBaseMap[i]]) { contigScores[contigScores.size()-1] = scores2[BBaseMap[i]]; }
446                 }else if (((seq1[i] == '.') || (seq1[i] == '-')) && ((seq2[i] != '-') && (seq2[i] != '.'))) { //seq1 is a gap and seq2 is a base, choose seq2, unless quality score for base is below threshold. In that case eliminate base
447                     if (scores2[BBaseMap[i]] < threshold) { } //
448                     else {
449                         contig += seq2[i];
450                         contigScores.push_back(scores2[BBaseMap[i]]);
451                     }
452                 }else if (((seq2[i] == '.') || (seq2[i] == '-')) && ((seq1[i] != '-') && (seq1[i] != '.'))) { //seq2 is a gap and seq1 is a base, choose seq1, unless quality score for base is below threshold. In that case eliminate base
453                     if (scores1[ABaseMap[i]] < threshold) { } //
454                     else {
455                         contig += seq1[i];
456                         contigScores.push_back(scores1[ABaseMap[i]]);
457                     }
458                 }else if (((seq1[i] != '-') && (seq1[i] != '.')) && ((seq2[i] != '-') && (seq2[i] != '.'))) { //both bases choose one with better quality
459                     char c = seq1[i];
460                     contigScores.push_back(scores1[ABaseMap[i]]);
461                     if (scores1[ABaseMap[i]] < scores2[BBaseMap[i]]) { contigScores[contigScores.size()-1] = scores2[BBaseMap[i]]; c = seq2[i]; }
462                     contig += c;
463                     numMismatches++;
464                 }else { //should never get here
465                     m->mothurOut("[ERROR]: case I didn't think of seq1 = " + toString(seq1[i]) + " and seq2 = " + toString(seq2[i]) + "\n");
466                 }
467             }
468             
469             if (seq1End < seq2End) { //seq1 ends before seq2 so take from overlap to length from seq2
470                 for (int i = overlapEnd; i < length; i++) {
471                     contig += seq2[i];
472                     contigScores.push_back(scores2[BBaseMap[i]]);
473                 }
474             }else { //seq2 ends before seq1 so take from overlap to length from seq1
475                 for (int i = overlapEnd; i < length; i++) {
476                     contig += seq1[i];
477                     contigScores.push_back(scores1[ABaseMap[i]]);
478                 }
479
480             }
481             //if (num < 5) { cout << overlapStart << '\t' << overlapEnd << endl << seq1 << endl << seq2 << endl<< contig << endl; }
482             //output
483             outFasta << ">" << fSeq.getName() << endl << contig << endl;
484             outQual << ">" << fSeq.getName() << endl;
485             for (int i = 0; i < contigScores.size(); i++) { outQual << contigScores[i] << ' '; }
486             outQual << endl;
487             outMisMatch << fSeq.getName() << '\t' << contig.length() << '\t' << numMismatches << endl;
488             
489             num++;
490             
491                         //report progress
492                         if((num) % 1000 == 0){  m->mothurOut(toString(num)); m->mothurOutEndLine();             }
493                 }
494         
495                 //report progress
496                 if((num) % 1000 != 0){  m->mothurOut(toString(num)); m->mothurOutEndLine();             }
497         
498         inFFasta.close();
499         inFQual.close();
500         inRFasta.close();
501         inRQual.close();
502         outFasta.close();
503         outQual.close();
504         outMisMatch.close();
505         delete alignment;
506         
507         if (m->control_pressed) { m->mothurRemove(outputQual); m->mothurRemove(outputFasta);  m->mothurRemove(outputMisMatches);}
508         
509         return num;
510     }
511         catch(exception& e) {
512                 m->errorOut(e, "MakeContigsCommand", "driver");
513                 exit(1);
514         }
515 }
516 //**********************************************************************************************************************
517 vector< vector<string> > MakeContigsCommand::readFastqFiles(int& count){
518     try {
519         vector< vector<string> > files;
520         
521         //maps processors number to file pointer
522         map<int, vector<ofstream*> > tempfiles;  //tempfiles[0] = forwardFasta, [1] = forwardQual, [2] = reverseFasta, [3] = reverseQual
523         map<int, vector<ofstream*> >::iterator it;
524         
525         //create files to write to
526         for (int i = 0; i < processors; i++) {
527             vector<ofstream*> temp;
528             ofstream* outFF = new ofstream;     temp.push_back(outFF);
529             ofstream* outFQ = new ofstream;     temp.push_back(outFQ);
530             ofstream* outRF = new ofstream;     temp.push_back(outRF);
531             ofstream* outRQ = new ofstream;     temp.push_back(outRQ);
532             tempfiles[i] = temp;
533             
534             vector<string> names;
535             string ffastafilename = outputDir + m->getRootName(m->getSimpleName(ffastqfile)) + toString(i) + "ffasta.temp";
536             string rfastafilename = outputDir + m->getRootName(m->getSimpleName(rfastqfile)) + toString(i) + "rfasta.temp";
537             string fqualfilename = outputDir + m->getRootName(m->getSimpleName(ffastqfile)) + toString(i) + "fqual.temp";
538             string rqualfilename = outputDir + m->getRootName(m->getSimpleName(rfastqfile)) + toString(i) + "rqual.temp";
539             names.push_back(ffastafilename); names.push_back(fqualfilename);
540             names.push_back(rfastafilename); names.push_back(rqualfilename);
541             files.push_back(names);
542             
543             m->openOutputFile(ffastafilename, *outFF);
544             m->openOutputFile(rfastafilename, *outRF);
545             m->openOutputFile(fqualfilename, *outFQ);
546             m->openOutputFile(rqualfilename, *outRQ);
547         }
548         
549         if (m->control_pressed) {
550             //close files, delete ofstreams
551             for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close();  delete (it->second)[i]; } }
552             //remove files
553             for (int i = 0; i < files.size(); i++) {  
554                 for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); }
555             }
556         }
557         
558         ifstream inForward;
559         m->openInputFile(ffastqfile, inForward);
560         
561         ifstream inReverse;
562         m->openInputFile(rfastqfile, inReverse);
563         
564         count = 0;
565         while ((!inForward.eof()) && (!inReverse.eof())) {
566             
567             if (m->control_pressed) { for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close();  delete (it->second)[i]; } } for (int i = 0; i < files.size(); i++) {  for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); } } inForward.close(); inReverse.close(); return files; }
568             
569             //get a read from forward and reverse fastq files
570             fastqRead fread = readFastq(inForward);
571             fastqRead rread = readFastq(inReverse);
572             checkReads(fread, rread);
573             
574             if (m->control_pressed) { for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close();  delete (it->second)[i]; } } for (int i = 0; i < files.size(); i++) {  for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); } } inForward.close(); inReverse.close(); return files; }
575             
576             //if the reads are okay write to output files
577             int process = count % processors;
578             
579             *(tempfiles[process][0]) << ">" << fread.name << endl << fread.sequence << endl;
580             *(tempfiles[process][1]) << ">" << fread.name << endl;
581             for (int i = 0; i < fread.scores.size(); i++) { *(tempfiles[process][1]) << fread.scores[i] << " "; }
582             *(tempfiles[process][1]) << endl;
583             *(tempfiles[process][2]) << ">" << rread.name << endl << rread.sequence << endl;
584             *(tempfiles[process][3]) << ">" << rread.name << endl;
585             for (int i = 0; i < rread.scores.size(); i++) { *(tempfiles[process][3]) << rread.scores[i] << " "; }
586             *(tempfiles[process][3]) << endl;
587             
588             count++;
589             
590             //report progress
591                         if((count) % 10000 == 0){       m->mothurOut(toString(count)); m->mothurOutEndLine();           }
592                         
593                 }
594                 //report progress
595                 if((count) % 10000 != 0){       m->mothurOut(toString(count)); m->mothurOutEndLine();           }
596                 
597         
598         
599         //close files, delete ofstreams
600         for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close();  delete (it->second)[i]; } }
601         inForward.close();
602         inReverse.close();
603         
604         //adjust for really large processors or really small files
605         if (count < processors) { 
606             for (int i = count; i < processors; i++) { for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); } files[i].clear(); }
607             files.resize(count);
608             processors = count; 
609         }
610         
611         return files;
612     }
613     catch(exception& e) {
614         m->errorOut(e, "MakeContigsCommand", "readFastqFiles");
615         exit(1);
616     }
617 }
618 //**********************************************************************************************************************
619 fastqRead MakeContigsCommand::readFastq(ifstream& in){
620     try {
621         fastqRead read;
622         
623         //read sequence name
624         string name = m->getline(in); m->gobble(in);
625         if (name == "") {  m->mothurOut("[ERROR]: Blank fasta name."); m->mothurOutEndLine(); m->control_pressed = true; return read; }
626         else if (name[0] != '@') { m->mothurOut("[ERROR]: reading " + name + " expected a name with @ as a leading character."); m->mothurOutEndLine(); m->control_pressed = true; return read; }
627         else { name = name.substr(1); }
628         
629         //read sequence
630         string sequence = m->getline(in); m->gobble(in);
631         if (sequence == "") {  m->mothurOut("[ERROR]: missing sequence for " + name); m->mothurOutEndLine(); m->control_pressed = true; return read; }
632         
633         //read sequence name
634         string name2 = m->getline(in); m->gobble(in);
635         if (name2 == "") {  m->mothurOut("[ERROR]: Blank quality name."); m->mothurOutEndLine(); m->control_pressed = true; return read; }
636         else if (name2[0] != '+') { m->mothurOut("[ERROR]: reading " + name2 + " expected a name with + as a leading character."); m->mothurOutEndLine(); m->control_pressed = true; return read; }
637         else { name2 = name2.substr(1);  }
638         
639         //read quality scores
640         string quality = m->getline(in); m->gobble(in);
641         if (quality == "") {  m->mothurOut("[ERROR]: missing quality for " + name2); m->mothurOutEndLine(); m->control_pressed = true; return read; }
642         
643         //sanity check sequence length and number of quality scores match
644         if (name2 != "") { if (name != name2) { m->mothurOut("[ERROR]: names do not match. read " + name + " for fasta and " + name2 + " for quality."); m->mothurOutEndLine(); m->control_pressed = true; return read; } }
645         if (quality.length() != sequence.length()) { m->mothurOut("[ERROR]: Lengths do not match for sequence " + name + ". Read " + toString(sequence.length()) + " characters for fasta and " + toString(quality.length()) + " characters for quality scores."); m->mothurOutEndLine(); m->control_pressed = true; return read; }
646         
647         vector<int> qualScores;
648                 int controlChar = int('@');
649                 for (int i = 0; i < quality.length(); i++) { 
650                         int temp = int(quality[i]);
651                         temp -= controlChar;
652                         
653                         qualScores.push_back(temp);
654                 }
655
656         read.name = name;
657         read.sequence = sequence;
658         read.scores = qualScores;
659
660         return read;
661     }
662     catch(exception& e) {
663         m->errorOut(e, "MakeContigsCommand", "readFastq");
664         exit(1);
665     }
666 }
667 //**********************************************************************************************************************
668 bool MakeContigsCommand::checkReads(fastqRead& forward, fastqRead& reverse){
669     try {
670         bool good = true;
671         
672         //fix names
673         if ((forward.name.length() > 2) && (reverse.name.length() > 2)) {
674             forward.name = forward.name.substr(0, forward.name.length()-2);
675             reverse.name = reverse.name.substr(0, reverse.name.length()-2);
676         }else { good = false; m->control_pressed = true; }
677         
678         //do names match
679         if (forward.name != reverse.name) {
680             m->mothurOut("[ERROR]: read " + forward.name + " from " + ffastqfile + ", but read " + reverse.name + " from " + rfastqfile + ".\n");
681             good = false; m->control_pressed = true;
682         }
683         
684         //do sequence lengths match
685         if (forward.sequence.length() != reverse.sequence.length()) {
686             m->mothurOut("[ERROR]: For sequence " + forward.name + " I read a sequence of length " + toString(forward.sequence.length()) + " from " + ffastqfile + ", but read a sequence of length " + toString(reverse.sequence.length()) + " from " + rfastqfile + ".\n");
687             good = false; m->control_pressed = true;
688         }
689         
690         //do number of qual scores match 
691         if (forward.scores.size() != reverse.scores.size()) {
692             m->mothurOut("[ERROR]: For sequence " + forward.name + " I read " + toString(forward.scores.size()) + " quality scores from " + ffastqfile + ", but read  " + toString(reverse.scores.size()) + " quality scores from " + rfastqfile + ".\n");
693             good = false; m->control_pressed = true;
694         }
695
696         return good;
697     }
698     catch(exception& e) {
699         m->errorOut(e, "MakeContigsCommand", "readFastq");
700         exit(1);
701     }
702 }
703 //**********************************************************************************************************************
704
705
706
707