]> git.donarmstrong.com Git - mothur.git/blob - makecontigscommand.cpp
Merge remote-tracking branch 'origin'
[mothur.git] / makecontigscommand.cpp
1 //
2 //  makecontigscommand.cpp
3 //  Mothur
4 //
5 //  Created by Sarah Westcott on 5/15/12.
6 //  Copyright (c) 2012 Schloss Lab. All rights reserved.
7 //
8
9 #include "makecontigscommand.h"
10
11 //**********************************************************************************************************************
12 vector<string> MakeContigsCommand::setParameters(){     
13         try {
14                 CommandParameter pfastq("ffastq", "InputTypes", "", "", "FastaFastqFile", "FastaFastqFile", "fastqGroup","fasta-qfile",false,false,true); parameters.push_back(pfastq);
15         CommandParameter prfastq("rfastq", "InputTypes", "", "", "none", "none", "fastqGroup","fasta-qfile",false,false,true); parameters.push_back(prfastq);
16         CommandParameter pfasta("ffasta", "InputTypes", "", "", "FastaFastqFile", "FastaFastqFile", "fastaGroup","fasta",false,false,true); parameters.push_back(pfasta);
17         CommandParameter prfasta("rfasta", "InputTypes", "", "", "none", "none", "none","fastaGroup",false,false,true); parameters.push_back(prfasta);
18         CommandParameter pfqual("fqfile", "InputTypes", "", "", "none", "none", "qfileGroup","",false,false,true); parameters.push_back(pfqual);
19         CommandParameter prqual("rqfile", "InputTypes", "", "", "none", "none", "qfileGroup","",false,false,true); parameters.push_back(prqual);
20         CommandParameter pfile("file", "InputTypes", "", "", "FastaFastqFile", "FastaFastqFile", "none","fasta-qfile",false,false,true); parameters.push_back(pfile);
21         CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none","group",false,false,true); parameters.push_back(poligos);
22                 CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(ppdiffs);
23                 CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(pbdiffs);
24         CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ptdiffs);
25
26         CommandParameter palign("align", "Multiple", "needleman-gotoh", "needleman", "", "", "","",false,false); parameters.push_back(palign);
27         CommandParameter pallfiles("allfiles", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pallfiles);
28         CommandParameter ptrimoverlap("trimoverlap", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(ptrimoverlap);
29                 CommandParameter pmatch("match", "Number", "", "1.0", "", "", "","",false,false); parameters.push_back(pmatch);
30                 CommandParameter pmismatch("mismatch", "Number", "", "-1.0", "", "", "","",false,false); parameters.push_back(pmismatch);
31                 CommandParameter pgapopen("gapopen", "Number", "", "-2.0", "", "", "","",false,false); parameters.push_back(pgapopen);
32                 CommandParameter pgapextend("gapextend", "Number", "", "-1.0", "", "", "","",false,false); parameters.push_back(pgapextend);
33         CommandParameter pthreshold("insert", "Number", "", "20", "", "", "","",false,false); parameters.push_back(pthreshold);
34         CommandParameter pdeltaq("deltaq", "Number", "", "6", "", "", "","",false,false); parameters.push_back(pdeltaq);
35                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
36         CommandParameter pformat("format", "Multiple", "sanger-illumina-solexa-illumina1.8+", "illumina1.8+", "", "", "","",false,false,true); parameters.push_back(pformat);
37                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
38                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
39                 
40                 vector<string> myArray;
41                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
42                 return myArray;
43         }
44         catch(exception& e) {
45                 m->errorOut(e, "MakeContigsCommand", "setParameters");
46                 exit(1);
47         }
48 }
49 //**********************************************************************************************************************
50 string MakeContigsCommand::getHelpString(){     
51         try {
52                 string helpString = "";
53                 helpString += "The make.contigs command reads a file, forward fastq file and a reverse fastq file or forward fasta and reverse fasta files and outputs new fasta.  It will also provide new quality files if the fastq or file parameter is used.\n";
54         helpString += "If an oligos file is provided barcodes and primers will be trimmed, and a group file will be created.\n";
55                 helpString += "The make.contigs command parameters are file, ffastq, rfastq, ffasta, rfasta, fqfile, rqfile, oligos, format, tdiffs, bdiffs, pdiffs, align, match, mismatch, gapopen, gapextend, insert, deltaq, allfiles and processors.\n";
56                 helpString += "The ffastq and rfastq, file, or ffasta and rfasta parameters are required.\n";
57         helpString += "The file parameter is 2 or 3 column file containing the forward fastq files in the first column and their matching reverse fastq files in the second column, or a groupName then forward fastq file and reverse fastq file.  Mothur will process each pair and create a combined fasta and report file with all the sequences.\n";
58         helpString += "The ffastq and rfastq parameters are used to provide a forward fastq and reverse fastq file to process.  If you provide one, you must provide the other.\n";
59         helpString += "The ffasta and rfasta parameters are used to provide a forward fasta and reverse fasta file to process.  If you provide one, you must provide the other.\n";
60         helpString += "The fqfile and rqfile parameters are used to provide a forward quality and reverse quality files to process with the ffasta and rfasta parameters.  If you provide one, you must provide the other.\n";
61                 helpString += "The format parameter is used to indicate whether your sequences are sanger, solexa, illumina1.8+ or illumina, default=illumina1.8+.\n";
62         helpString += "The align parameter allows you to specify the alignment method to use.  Your options are: gotoh and needleman. The default is needleman.\n";
63         helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n";
64                 helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";
65                 helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
66         //helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n";
67                 //helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n";
68                 helpString += "The match parameter allows you to specify the bonus for having the same base. The default is 1.0.\n";
69                 helpString += "The mistmatch parameter allows you to specify the penalty for having different bases.  The default is -1.0.\n";
70         helpString += "The deltaq parameter allows you to specify the delta allowed between quality scores of a mismatched base.  For example in the overlap, if deltaq=5 and in the alignment seqA, pos 200 has a quality score of 30 and the same position in seqB has a quality score of 20, you take the base from seqA (30-20 >= 5).  If the quality score in seqB is 28 then the base in the consensus will be an N (30-28<5) The default is 6.\n";
71                 helpString += "The gapopen parameter allows you to specify the penalty for opening a gap in an alignment. The default is -2.0.\n";
72                 helpString += "The gapextend parameter allows you to specify the penalty for extending a gap in an alignment.  The default is -1.0.\n";
73         helpString += "The insert parameter allows you to set a quality scores threshold. In the case where we are trying to decide whether to keep a base or remove it because the base is compared to a gap in the other fragment, if the base has a quality score equal to or below the threshold we eliminate it. Default=20.\n";
74         helpString += "The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n";
75         helpString += "The allfiles parameter will create separate group and fasta file for each grouping. The default is F.\n";
76         helpString += "The trimoverlap parameter allows you to trim the sequences to only the overlapping section. The default is F.\n";
77         helpString += "The make.contigs command should be in the following format: \n";
78                 helpString += "make.contigs(ffastq=yourForwardFastqFile, rfastq=yourReverseFastqFile, align=yourAlignmentMethod) \n";
79                 helpString += "Note: No spaces between parameter labels (i.e. ffastq), '=' and parameters (i.e.yourForwardFastqFile).\n";
80                 return helpString;
81         }
82         catch(exception& e) {
83                 m->errorOut(e, "MakeContigsCommand", "getHelpString");
84                 exit(1);
85         }
86 }
87 //**********************************************************************************************************************
88 string MakeContigsCommand::getOutputPattern(string type) {
89     try {
90         string pattern = "";
91         
92         if (type == "fasta") {  pattern = "[filename],[tag],contigs.fasta"; } 
93         else if (type == "group") {  pattern = "[filename],[tag],contigs.groups"; }
94         else if (type == "report") {  pattern = "[filename],[tag],contigs.report"; }
95         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
96         
97         return pattern;
98     }
99     catch(exception& e) {
100         m->errorOut(e, "MakeContigsCommand", "getOutputPattern");
101         exit(1);
102     }
103 }
104 //**********************************************************************************************************************
105 MakeContigsCommand::MakeContigsCommand(){       
106         try {
107                 abort = true; calledHelp = true; 
108                 setParameters();
109                 vector<string> tempOutNames;
110                 outputTypes["fasta"] = tempOutNames;
111         outputTypes["group"] = tempOutNames;
112         outputTypes["report"] = tempOutNames;
113         }
114         catch(exception& e) {
115                 m->errorOut(e, "MakeContigsCommand", "MakeContigsCommand");
116                 exit(1);
117         }
118 }
119 //**********************************************************************************************************************
120 MakeContigsCommand::MakeContigsCommand(string option)  {
121         try {
122                 abort = false; calledHelp = false;
123         createFileGroup = false; createOligosGroup = false;
124         
125                 //allow user to run help
126                 if(option == "help") { help(); abort = true; calledHelp = true; }
127                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
128                 
129                 else {
130                         vector<string> myArray = setParameters();
131                         
132                         OptionParser parser(option);
133                         map<string, string> parameters = parser.getParameters(); 
134                         
135                         ValidParameters validParameter("pairwise.seqs");
136                         map<string, string>::iterator it;
137                         
138                         //check to make sure all parameters are valid for command
139                         for (it = parameters.begin(); it != parameters.end(); it++) { 
140                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
141                         }
142                         
143                         //initialize outputTypes
144                         vector<string> tempOutNames;
145                         outputTypes["fasta"] = tempOutNames;
146             outputTypes["report"] = tempOutNames;
147             outputTypes["group"] = tempOutNames;
148                         
149             
150                         //if the user changes the input directory command factory will send this info to us in the output parameter 
151                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
152                         if (inputDir == "not found"){   inputDir = "";          }
153                         else { 
154                                 string path;
155                 it = parameters.find("ffastq");
156                                 //user has given a template file
157                                 if(it != parameters.end()){ 
158                                         path = m->hasPath(it->second);
159                                         //if the user has not given a path then, add inputdir. else leave path alone.
160                                         if (path == "") {       parameters["ffastq"] = inputDir + it->second;           }
161                                 }
162                 
163                 it = parameters.find("rfastq");
164                                 //user has given a template file
165                                 if(it != parameters.end()){ 
166                                         path = m->hasPath(it->second);
167                                         //if the user has not given a path then, add inputdir. else leave path alone.
168                                         if (path == "") {       parameters["rfastq"] = inputDir + it->second;           }
169                                 }
170                 
171                 it = parameters.find("ffasta");
172                                 //user has given a template file
173                                 if(it != parameters.end()){ 
174                                         path = m->hasPath(it->second);
175                                         //if the user has not given a path then, add inputdir. else leave path alone.
176                                         if (path == "") {       parameters["ffasta"] = inputDir + it->second;           }
177                                 }
178                 
179                 it = parameters.find("rfasta");
180                                 //user has given a template file
181                                 if(it != parameters.end()){ 
182                                         path = m->hasPath(it->second);
183                                         //if the user has not given a path then, add inputdir. else leave path alone.
184                                         if (path == "") {       parameters["rfasta"] = inputDir + it->second;           }
185                                 }
186                 
187                 it = parameters.find("fqfile");
188                                 //user has given a template file
189                                 if(it != parameters.end()){ 
190                                         path = m->hasPath(it->second);
191                                         //if the user has not given a path then, add inputdir. else leave path alone.
192                                         if (path == "") {       parameters["fqfile"] = inputDir + it->second;           }
193                                 }
194                 
195                 it = parameters.find("rqfile");
196                                 //user has given a template file
197                                 if(it != parameters.end()){ 
198                                         path = m->hasPath(it->second);
199                                         //if the user has not given a path then, add inputdir. else leave path alone.
200                                         if (path == "") {       parameters["rqfile"] = inputDir + it->second;           }
201                                 }
202                 
203                 it = parameters.find("file");
204                                 //user has given a template file
205                                 if(it != parameters.end()){ 
206                                         path = m->hasPath(it->second);
207                                         //if the user has not given a path then, add inputdir. else leave path alone.
208                                         if (path == "") {       parameters["file"] = inputDir + it->second;             }
209                                 }
210                 
211                 it = parameters.find("oligos");
212                                 //user has given a template file
213                                 if(it != parameters.end()){ 
214                                         path = m->hasPath(it->second);
215                                         //if the user has not given a path then, add inputdir. else leave path alone.
216                                         if (path == "") {       parameters["oligos"] = inputDir + it->second;           }
217                                 }
218             }
219             
220             ffastqfile = validParameter.validFile(parameters, "ffastq", true);
221                         if (ffastqfile == "not open") {  abort = true; }        
222                         else if (ffastqfile == "not found") { ffastqfile = ""; }
223                         
224                         rfastqfile = validParameter.validFile(parameters, "rfastq", true);
225                         if (rfastqfile == "not open") {  abort = true; }        
226                         else if (rfastqfile == "not found") { rfastqfile = "";  }
227             
228             ffastafile = validParameter.validFile(parameters, "ffasta", true);
229                         if (ffastafile == "not open") {  abort = true; }        
230                         else if (ffastafile == "not found") { ffastafile = ""; }
231                         
232                         rfastafile = validParameter.validFile(parameters, "rfasta", true);
233                         if (rfastafile == "not open") {  abort = true; }        
234                         else if (rfastafile == "not found") { rfastafile = "";  }
235             
236             fqualfile = validParameter.validFile(parameters, "fqfile", true);
237                         if (fqualfile == "not open") {  abort = true; } 
238                         else if (fqualfile == "not found") { fqualfile = ""; }
239                         
240                         rqualfile = validParameter.validFile(parameters, "rqfile", true);
241                         if (rqualfile == "not open") {  abort = true; } 
242                         else if (rqualfile == "not found") { rqualfile = "";  }
243             
244             file = validParameter.validFile(parameters, "file", true);
245                         if (file == "not open") {  abort = true; }      
246                         else if (file == "not found") { file = "";  }
247             
248             //provide at least
249             if ((file == "") && (ffastafile == "") && (ffastqfile == "")) { abort = true; m->mothurOut("[ERROR]: The file, ffastq and rfastq or ffasta and rfasta parameters are required.\n"); }
250             if ((file != "") && ((ffastafile != "") || (ffastqfile != ""))) { abort = true; m->mothurOut("[ERROR]: The file, ffastq and rfastq or ffasta and rfasta parameters are required.\n"); }
251             if ((ffastqfile != "") && (rfastqfile == "")) {  abort = true; m->mothurOut("[ERROR]: If you provide use the ffastq, you must provide a rfastq file.\n"); }
252             if ((ffastqfile == "") && (rfastqfile != "")) {  abort = true; m->mothurOut("[ERROR]: If you provide use the rfastq, you must provide a ffastq file.\n"); }
253             if ((ffastafile != "") && (rfastafile == "")) {  abort = true; m->mothurOut("[ERROR]: If you provide use the ffasta, you must provide a rfasta file.\n"); }
254             if ((ffastafile == "") && (rfastafile != "")) {  abort = true; m->mothurOut("[ERROR]: If you provide use the rfasta, you must provide a ffasta file.\n"); }
255             if ((fqualfile != "") && (rqualfile == "")) {  abort = true; m->mothurOut("[ERROR]: If you provide use the fqfile, you must provide a rqfile file.\n"); }
256             if ((fqualfile == "") && (rqualfile != "")) {  abort = true; m->mothurOut("[ERROR]: If you provide use the rqfile, you must provide a fqfile file.\n"); }
257             if (((fqualfile != "") || (rqualfile != "")) && ((ffastafile == "") || (rfastafile == ""))) {
258                 abort = true; m->mothurOut("[ERROR]: If you provide use the rqfile or fqfile file, you must provide the ffasta and rfasta parameters.\n");
259             }
260             
261             oligosfile = validParameter.validFile(parameters, "oligos", true);
262                         if (oligosfile == "not found")      {   oligosfile = "";        }
263                         else if(oligosfile == "not open")   {   abort = true;       } 
264                         else {   m->setOligosFile(oligosfile);          }
265             
266             //if the user changes the output directory command factory will send this info to us in the output parameter 
267                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
268                  outputDir = ""; 
269             }
270                         
271
272                         //check for optional parameter and set defaults
273                         // ...at some point should added some additional type checking...
274                         string temp;
275                         temp = validParameter.validFile(parameters, "match", false);            if (temp == "not found"){       temp = "1.0";                   }
276                         m->mothurConvert(temp, match);  
277                         
278                         temp = validParameter.validFile(parameters, "mismatch", false);         if (temp == "not found"){       temp = "-1.0";                  }
279                         m->mothurConvert(temp, misMatch);  
280             if (misMatch > 0) { m->mothurOut("[ERROR]: mismatch must be negative.\n"); abort=true; }
281                         
282                         temp = validParameter.validFile(parameters, "gapopen", false);          if (temp == "not found"){       temp = "-2.0";                  }
283                         m->mothurConvert(temp, gapOpen);  
284             if (gapOpen > 0) { m->mothurOut("[ERROR]: gapopen must be negative.\n"); abort=true; }
285                         
286                         temp = validParameter.validFile(parameters, "gapextend", false);        if (temp == "not found"){       temp = "-1.0";                  }
287                         m->mothurConvert(temp, gapExtend); 
288             if (gapExtend > 0) { m->mothurOut("[ERROR]: gapextend must be negative.\n"); abort=true; }
289                         
290             temp = validParameter.validFile(parameters, "insert", false);       if (temp == "not found"){       temp = "20";                    }
291                         m->mothurConvert(temp, insert); 
292             if ((insert < 0) || (insert > 40)) { m->mothurOut("[ERROR]: insert must be between 0 and 40.\n"); abort=true; }
293
294             temp = validParameter.validFile(parameters, "deltaq", false);       if (temp == "not found"){       temp = "6";                     }
295                         m->mothurConvert(temp, deltaq);
296             
297                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
298                         m->setProcessors(temp);
299                         m->mothurConvert(temp, processors);
300             
301             temp = validParameter.validFile(parameters, "bdiffs", false);               if (temp == "not found") { temp = "0"; }
302                         m->mothurConvert(temp, bdiffs);
303                         
304                         temp = validParameter.validFile(parameters, "pdiffs", false);           if (temp == "not found") { temp = "0"; }
305                         m->mothurConvert(temp, pdiffs);
306             
307   //          temp = validParameter.validFile(parameters, "ldiffs", false);             if (temp == "not found") { temp = "0"; }
308 //                      m->mothurConvert(temp, ldiffs);
309             ldiffs = 0;
310             
311  //           temp = validParameter.validFile(parameters, "sdiffs", false);             if (temp == "not found") { temp = "0"; }
312  //           m->mothurConvert(temp, sdiffs);
313             sdiffs = 0;
314                         
315                         temp = validParameter.validFile(parameters, "tdiffs", false);           if (temp == "not found") { int tempTotal = pdiffs + bdiffs;  temp = toString(tempTotal); }
316                         m->mothurConvert(temp, tdiffs);
317                         
318                         if(tdiffs == 0){        tdiffs = bdiffs + pdiffs;       }  //+ ldiffs + sdiffs;
319
320             temp = validParameter.validFile(parameters, "allfiles", false);             if (temp == "not found") { temp = "F"; }
321                         allFiles = m->isTrue(temp);
322             
323             temp = validParameter.validFile(parameters, "trimoverlap", false);          if (temp == "not found") { temp = "F"; }
324                         trimOverlap = m->isTrue(temp);
325                         
326                         align = validParameter.validFile(parameters, "align", false);           if (align == "not found"){      align = "needleman";    }
327                         if ((align != "needleman") && (align != "gotoh")) { m->mothurOut(align + " is not a valid alignment method. Options are needleman or gotoh. I will use needleman."); m->mothurOutEndLine(); align = "needleman"; }
328             
329             format = validParameter.validFile(parameters, "format", false);             if (format == "not found"){     format = "illumina1.8+";        }
330             
331             if ((format != "sanger") && (format != "illumina") && (format != "illumina1.8+") && (format != "solexa"))  { 
332                                 m->mothurOut(format + " is not a valid format. Your format choices are sanger, solexa, illumina1.8+ and illumina, aborting." ); m->mothurOutEndLine();
333                                 abort=true;
334                         }
335             
336             //fill convert table - goes from solexa to sanger. Used fq_all2std.pl as a reference.
337             for (int i = -64; i < 65; i++) { 
338                 char temp = (char) ((int)(33 + 10*log(1+pow(10,(i/10.0)))/log(10)+0.499));
339                 convertTable.push_back(temp);
340             }
341         }
342                 
343         }
344         catch(exception& e) {
345                 m->errorOut(e, "MakeContigsCommand", "MakeContigsCommand");
346                 exit(1);
347         }
348 }
349 //**********************************************************************************************************************
350 int MakeContigsCommand::execute(){
351         try {
352                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
353         
354         //read ffastq and rfastq files creating fasta and qual files.
355         //this function will create a forward and reverse, fasta and qual files for each processor.
356         //files has an entry for each processor. files[i][0] = forwardFasta, files[i][1] = forwardQual, files[i][2] = reverseFasta, files[i][3] = reverseQual.  filesToProcess is for each filepair in the file parameter file.  for ffastq and rfastq this will be size 1.
357         unsigned long int numReads = 0;
358         int start = time(NULL);
359         longestBase = 1000;
360         m->mothurOut("Reading fastq data...\n"); 
361         vector < vector< vector<string> > > filesToProcess = preProcessData(numReads);
362         m->mothurOut("Done.\n");
363        
364         if (m->control_pressed) { return 0; }
365         
366         map<string, string> cvars;
367         string compOutputDir = outputDir;
368         if (outputDir == "") { compOutputDir = m->hasPath(file); }
369         cvars["[filename]"] = compOutputDir + m->getRootName(m->getSimpleName(file));
370         cvars["[tag]"] = "";
371         string compositeGroupFile = getOutputFileName("group",cvars);
372         cvars["[tag]"] = "trim";
373         string compositeFastaFile = getOutputFileName("fasta",cvars);
374         cvars["[tag]"] = "scrap";
375         string compositeScrapFastaFile = getOutputFileName("fasta",cvars);
376         cvars["[tag]"] = "";
377         string compositeMisMatchFile = getOutputFileName("report",cvars);
378         
379         if (filesToProcess.size() > 1) { //clear files for append below
380             ofstream outCTFasta, outCTQual, outCSFasta, outCSQual, outCMisMatch;
381             m->openOutputFile(compositeFastaFile, outCTFasta); outCTFasta.close();
382             m->openOutputFile(compositeScrapFastaFile, outCSFasta); outCSFasta.close();
383             m->openOutputFile(compositeMisMatchFile, outCMisMatch); outCMisMatch.close();
384             outputNames.push_back(compositeFastaFile); outputTypes["fasta"].push_back(compositeFastaFile);
385             outputNames.push_back(compositeMisMatchFile); outputTypes["report"].push_back(compositeMisMatchFile);
386             outputNames.push_back(compositeScrapFastaFile); outputTypes["fasta"].push_back(compositeScrapFastaFile);
387         }
388         
389         for (int l = 0; l < filesToProcess.size(); l++) {
390             
391             m->mothurOut("\n>>>>>\tProcessing " + filesToProcess[l][0][0] + " (file " + toString(l+1) + " of " + toString(filesToProcess.size()) + ")\t<<<<<\n");
392             
393             vector<vector<string> > fastaFileNames;
394             createOligosGroup = false;
395             string outputGroupFileName;
396             map<string, string> variables; 
397             string thisOutputDir = outputDir;
398             if (outputDir == "") {  thisOutputDir = m->hasPath(filesToProcess[l][0][0]); }
399             variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(filesToProcess[l][0][0]));
400             variables["[tag]"] = "";
401             if(oligosfile != ""){  createOligosGroup = getOligos(fastaFileNames, variables["[filename]"]);  }
402             if (createOligosGroup || createFileGroup) {
403                 outputGroupFileName = getOutputFileName("group",variables);
404             }
405             
406             //give group in file file precedence
407             if (createFileGroup) {  createOligosGroup = false; }
408             
409             variables["[tag]"] = "trim";
410             string outFastaFile = getOutputFileName("fasta",variables);
411             variables["[tag]"] = "scrap";
412             string outScrapFastaFile = getOutputFileName("fasta",variables);
413             variables["[tag]"] = "";
414             string outMisMatchFile = getOutputFileName("report",variables);
415                         
416             m->mothurOut("Making contigs...\n"); 
417             createProcesses(filesToProcess[l], outFastaFile, outScrapFastaFile, outMisMatchFile, fastaFileNames, l);
418             m->mothurOut("Done.\n");
419             
420             //remove temp fasta and qual files
421             for (int i = 0; i < processors; i++) { for(int j = 0; j < filesToProcess[l][i].size(); j++) { m->mothurRemove(filesToProcess[l][i][j]); }  }
422             
423             if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {    m->mothurRemove(outputNames[i]); }  return 0; }
424             
425             if(allFiles){
426                 map<string, string> uniqueFastaNames;// so we don't add the same groupfile multiple times
427                 map<string, string>::iterator it;
428                 set<string> namesToRemove;
429                 for(int i=0;i<fastaFileNames.size();i++){
430                     for(int j=0;j<fastaFileNames[0].size();j++){
431                         if (fastaFileNames[i][j] != "") {
432                             if (namesToRemove.count(fastaFileNames[i][j]) == 0) {
433                                 if(m->isBlank(fastaFileNames[i][j])){
434                                     m->mothurRemove(fastaFileNames[i][j]);
435                                     namesToRemove.insert(fastaFileNames[i][j]);
436                                 }else{  
437                                     it = uniqueFastaNames.find(fastaFileNames[i][j]);
438                                     if (it == uniqueFastaNames.end()) { 
439                                         uniqueFastaNames[fastaFileNames[i][j]] = barcodeNameVector[i];  
440                                     }   
441                                 }
442                             }
443                         }
444                     }
445                 }
446                 
447                 //remove names for outputFileNames, just cleans up the output
448                 vector<string> outputNames2;
449                 for(int i = 0; i < outputNames.size(); i++) { if (namesToRemove.count(outputNames[i]) == 0) { outputNames2.push_back(outputNames[i]); } }
450                 outputNames = outputNames2;
451                 
452                 for (it = uniqueFastaNames.begin(); it != uniqueFastaNames.end(); it++) {
453                     ifstream in;
454                     m->openInputFile(it->first, in);
455                     
456                     ofstream out;
457                     string thisGroupName = thisOutputDir + m->getRootName(m->getSimpleName(it->first));
458                     thisGroupName += getOutputFileName("group",variables); outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName); 
459                     m->openOutputFile(thisGroupName, out);
460                     
461                     while (!in.eof()){
462                         if (m->control_pressed) { break; }
463                         
464                         Sequence currSeq(in); m->gobble(in);
465                         out << currSeq.getName() << '\t' << it->second << endl;  
466                     }
467                     in.close();
468                     out.close();
469                 }
470             }
471             
472             if (createFileGroup || createOligosGroup) {
473                 ofstream outGroup;
474                 m->openOutputFile(outputGroupFileName, outGroup);
475                 for (map<string, string>::iterator itGroup = groupMap.begin(); itGroup != groupMap.end(); itGroup++) {
476                     outGroup << itGroup->first << '\t' << itGroup->second << endl;
477                 }
478                 outGroup.close();
479             }
480             
481             if (filesToProcess.size() > 1) { //merge into large combo files
482                 if (createFileGroup || createOligosGroup) { 
483                     if (l == 0) { 
484                         ofstream outCGroup;
485                         m->openOutputFile(compositeGroupFile, outCGroup); outCGroup.close();
486                         outputNames.push_back(compositeGroupFile); outputTypes["group"].push_back(compositeGroupFile);
487                     }
488                     m->appendFiles(outputGroupFileName, compositeGroupFile);
489                     if (!allFiles) { m->mothurRemove(outputGroupFileName);  }
490                     else { outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName); }
491                 }
492                 if (l == 0) {  m->appendFiles(outMisMatchFile, compositeMisMatchFile);  }
493                 else {  m->appendFilesWithoutHeaders(outMisMatchFile, compositeMisMatchFile);  }
494                 m->appendFiles(outFastaFile, compositeFastaFile);
495                 m->appendFiles(outScrapFastaFile, compositeScrapFastaFile);
496                 if (!allFiles) {
497                     m->mothurRemove(outMisMatchFile);
498                     m->mothurRemove(outFastaFile);
499                     m->mothurRemove(outScrapFastaFile);
500                 }else {
501                     outputNames.push_back(outFastaFile); outputTypes["fasta"].push_back(outFastaFile);
502                     outputNames.push_back(outScrapFastaFile); outputTypes["fasta"].push_back(outScrapFastaFile);
503                     outputNames.push_back(outMisMatchFile); outputTypes["report"].push_back(outMisMatchFile);
504                 }
505             }else {
506                 outputNames.push_back(outFastaFile); outputTypes["fasta"].push_back(outFastaFile);
507                 outputNames.push_back(outScrapFastaFile); outputTypes["fasta"].push_back(outScrapFastaFile);
508                 outputNames.push_back(outMisMatchFile); outputTypes["report"].push_back(outMisMatchFile);
509                 if (createFileGroup || createOligosGroup) { outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName); }
510             }
511         }
512         m->mothurOut("It took " + toString(time(NULL) - start) + " secs to process " + toString(numReads) + " sequences.\n");
513         
514         if (m->control_pressed) {       for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } return 0;    }
515         
516                 //output group counts
517                 m->mothurOutEndLine();
518                 int total = 0;
519                 if (groupCounts.size() != 0) {  m->mothurOut("Group count: \n");  }
520                 for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
521             total += it->second; m->mothurOut(it->first + "\t" + toString(it->second)); m->mothurOutEndLine(); 
522                 }
523                 if (total != 0) { m->mothurOut("Total of all groups is " + toString(total)); m->mothurOutEndLine(); }
524                 
525                 if (m->control_pressed) {       for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } return 0;    }
526         
527         string currentFasta = "";
528                 itTypes = outputTypes.find("fasta");
529                 if (itTypes != outputTypes.end()) {
530                         if ((itTypes->second).size() != 0) { currentFasta = (itTypes->second)[0]; m->setFastaFile(currentFasta); }
531                 }
532         
533         string currentGroup = "";
534                 itTypes = outputTypes.find("group");
535                 if (itTypes != outputTypes.end()) {
536                         if ((itTypes->second).size() != 0) { currentGroup = (itTypes->second)[0]; m->setGroupFile(currentGroup); }
537                 }
538                 
539         //output files created by command
540                 m->mothurOutEndLine();
541                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
542                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
543                 m->mothurOutEndLine();
544
545         return 0;
546     }
547         catch(exception& e) {
548                 m->errorOut(e, "MakeContigsCommand", "execute");
549                 exit(1);
550         }
551 }
552 //**********************************************************************************************************************
553 vector< vector< vector<string> > > MakeContigsCommand::preProcessData(unsigned long int& numReads) {
554         try {
555         vector< vector< vector<string> > > filesToProcess;
556         
557         if (ffastqfile != "") { //reading one file
558             vector< vector<string> > files = readFastqFiles(numReads, ffastqfile, rfastqfile); 
559             //adjust for really large processors or really small files
560             if (numReads == 0) {  m->mothurOut("[ERROR]: no good reads.\n"); m->control_pressed = true; }
561             if (numReads < processors) { 
562                 for (int i = numReads; i < processors; i++) { for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); } files[i].clear(); }
563                 files.resize(numReads);
564                 processors = numReads; 
565             }
566             filesToProcess.push_back(files);
567         }else if (file != "") { //reading multiple files
568             //return only valid pairs
569             vector< vector<string> > filePairsToProcess = readFileNames(file);
570             
571             if (m->control_pressed) { return filesToProcess; }
572             
573             if (filePairsToProcess.size() != 0) {
574                 for (int i = 0; i < filePairsToProcess.size(); i++) {
575                     
576                     if (m->control_pressed) { for (int l = 0; l < filesToProcess.size(); l++) { for (int k = 0; k < filesToProcess[l].size(); k++) { for(int j = 0; j < filesToProcess[l][k].size(); j++) { m->mothurRemove(filesToProcess[l][k][j]); } filesToProcess[l][k].clear(); } return filesToProcess; } }
577                     
578                     unsigned long int thisFilesReads;
579                     vector< vector<string> > files = readFastqFiles(thisFilesReads, filePairsToProcess[i][0], filePairsToProcess[i][1]); 
580                     
581                     //adjust for really large processors or really small files
582                     if (thisFilesReads < processors) { 
583                         m->mothurOut("[ERROR]: " + filePairsToProcess[i][0] + " has less than " + toString(processors) + " good reads, skipping\n"); 
584                         for (int k = 0; k < files.size(); k++) { for(int j = 0; j < files[k].size(); j++) { m->mothurRemove(files[k][j]); } files[k].clear(); }
585                     }else {
586                         filesToProcess.push_back(files);
587                         numReads += thisFilesReads;
588                     }
589                 }
590                 //all files are bad
591                 if (numReads == 0) {  m->control_pressed = true; }
592             }
593         }else if (ffastafile != "") {
594             vector< vector<string> > files = readFastaFiles(numReads, ffastafile, rfastafile);
595             //adjust for really large processors or really small files
596             if (numReads == 0) {  m->mothurOut("[ERROR]: no good reads.\n"); m->control_pressed = true; }
597             if (numReads < processors) { 
598                 for (int i = numReads; i < processors; i++) { for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); } files[i].clear(); }
599                 files.resize(numReads);
600                 processors = numReads; 
601             }
602             filesToProcess.push_back(files);
603         }else { m->control_pressed = true; } //should not get here
604         
605         return filesToProcess;
606     }
607         catch(exception& e) {
608                 m->errorOut(e, "MakeContigsCommand", "preProcessData");
609                 exit(1);
610         }
611 }
612 //**********************************************************************************************************************
613 int MakeContigsCommand::createProcesses(vector< vector<string> > files, string outputFasta, string outputScrapFasta, string outputMisMatches, vector<vector<string> > fastaFileNames, int index) {
614         try {
615                 int num = 0;
616                 vector<int> processIDS;
617         string group = "";
618         map<int, string>::iterator it = file2Group.find(index);
619         if (it != file2Group.end()) { group = it->second; }
620         
621 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
622                 int process = 0;
623                 
624                 //loop through and create all the processes you want
625                 while (process != processors-1) {
626                         int pid = fork();
627                         
628                         if (pid > 0) {
629                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
630                                 process++;
631                         }else if (pid == 0){
632                 vector<vector<string> > tempFASTAFileNames = fastaFileNames;
633                 
634                                 if(allFiles){
635                                         ofstream temp;
636                     
637                                         for(int i=0;i<tempFASTAFileNames.size();i++){
638                                                 for(int j=0;j<tempFASTAFileNames[i].size();j++){
639                                                         if (tempFASTAFileNames[i][j] != "") {
640                                                                 tempFASTAFileNames[i][j] += toString(getpid()) + ".temp";
641                                                                 m->openOutputFile(tempFASTAFileNames[i][j], temp);                      temp.close();
642                                                         }
643                                                 }
644                                         }
645                                 }
646
647                                 num = driver(files[process], 
648                              outputFasta + toString(getpid()) + ".temp", 
649                              outputScrapFasta + toString(getpid()) + ".temp", 
650                              outputMisMatches + toString(getpid()) + ".temp",
651                              tempFASTAFileNames, process, group);
652                                 
653                                 //pass groupCounts to parent
654                 ofstream out;
655                 string tempFile = toString(getpid()) + ".num.temp";
656                 m->openOutputFile(tempFile, out);
657                 out << num << endl;
658                                 if (createFileGroup || createOligosGroup) {
659                                         out << groupCounts.size() << endl;
660                                         
661                                         for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
662                                                 out << it->first << '\t' << it->second << endl;
663                                         }
664                     
665                     out << groupMap.size() << endl;
666                     for (map<string, string>::iterator it = groupMap.begin(); it != groupMap.end(); it++) {
667                                                 out << it->first << '\t' << it->second << endl;
668                                         }
669                                 }
670                 out.close();
671                                 
672                                 exit(0);
673                         }else { 
674                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
675                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
676                                 exit(0);
677                         }
678                 }
679                 
680         ofstream temp;
681                 m->openOutputFile(outputFasta, temp);           temp.close();
682         m->openOutputFile(outputScrapFasta, temp);              temp.close();
683                 
684                 //do my part
685                 num = driver(files[processors-1], outputFasta, outputScrapFasta,  outputMisMatches, fastaFileNames, processors-1, group);
686                 
687                 //force parent to wait until all the processes are done
688                 for (int i=0;i<processIDS.size();i++) { 
689                         int temp = processIDS[i];
690                         wait(&temp);
691                 }
692         
693                 for (int i = 0; i < processIDS.size(); i++) {
694             ifstream in;
695             string tempFile = toString(processIDS[i]) + ".num.temp";
696             m->openInputFile(tempFile, in);
697             int tempNum;
698             in >> tempNum; num += tempNum; m->gobble(in);
699             
700                         if (createFileGroup || createOligosGroup) {
701                                 string group;
702                                 in >> tempNum; m->gobble(in);
703                                 
704                                 if (tempNum != 0) {
705                                         for (int j = 0; j < tempNum; j++) { 
706                         int groupNum;
707                                                 in >> group >> groupNum; m->gobble(in);
708                         
709                                                 map<string, int>::iterator it = groupCounts.find(group);
710                                                 if (it == groupCounts.end()) {  groupCounts[group] = groupNum; }
711                                                 else { groupCounts[it->first] += groupNum; }
712                                         }
713                                 }
714                 in >> tempNum; m->gobble(in);
715                 if (tempNum != 0) {
716                                         for (int j = 0; j < tempNum; j++) { 
717                         string group, seqName;
718                                                 in >> seqName >> group; m->gobble(in);
719                         
720                                                 map<string, string>::iterator it = groupMap.find(seqName);
721                                                 if (it == groupMap.end()) {     groupMap[seqName] = group; }
722                                                 else { m->mothurOut("[ERROR]: " + seqName + " is in your fasta file more than once. Sequence names must be unique. please correct.\n");  }
723                                         }
724                                 }
725                         }
726             in.close(); m->mothurRemove(tempFile);
727         }
728     #else
729         
730         //////////////////////////////////////////////////////////////////////////////////////////////////////
731                 //Windows version shared memory, so be careful when passing variables through the contigsData struct. 
732                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
733                 //////////////////////////////////////////////////////////////////////////////////////////////////////
734                 
735                 vector<contigsData*> pDataArray; 
736                 DWORD   dwThreadIdArray[processors-1];
737                 HANDLE  hThreadArray[processors-1]; 
738                 
739                 //Create processor worker threads.
740                 for( int h=0; h<processors-1; h++ ){
741                         string extension = "";
742                         if (h != 0) { extension = toString(h) + ".temp"; processIDS.push_back(h); }
743             vector<vector<string> > tempFASTAFileNames = fastaFileNames;
744                         
745             if(allFiles){
746                 ofstream temp;
747                 
748                 for(int i=0;i<tempFASTAFileNames.size();i++){
749                     for(int j=0;j<tempFASTAFileNames[i].size();j++){
750                         if (tempFASTAFileNames[i][j] != "") {
751                             tempFASTAFileNames[i][j] += extension;
752                             m->openOutputFile(tempFASTAFileNames[i][j], temp);                  temp.close();
753                         }
754                     }
755                 }
756             }
757
758                                   
759                         contigsData* tempcontig = new contigsData(group, files[h], (outputFasta + extension), (outputScrapFasta + extension), (outputMisMatches + extension), align, m, match, misMatch, gapOpen, gapExtend, insert, deltaq, barcodes, primers, tempFASTAFileNames, barcodeNameVector, primerNameVector, pdiffs, bdiffs, tdiffs, createOligosGroup, createFileGroup, allFiles, trimOverlap, h);
760                         pDataArray.push_back(tempcontig);
761             
762                         hThreadArray[h] = CreateThread(NULL, 0, MyContigsThreadFunction, pDataArray[h], 0, &dwThreadIdArray[h]);   
763                 }
764         
765         vector<vector<string> > tempFASTAFileNames = fastaFileNames;
766
767         if(allFiles){
768             ofstream temp;
769             string extension = toString(processors-1) + ".temp";
770             
771             for(int i=0;i<tempFASTAFileNames.size();i++){
772                 for(int j=0;j<tempFASTAFileNames[i].size();j++){
773                     if (tempFASTAFileNames[i][j] != "") {
774                         tempFASTAFileNames[i][j] += extension;
775                         m->openOutputFile(tempFASTAFileNames[i][j], temp);                      temp.close();
776                     }
777                 }
778             }
779         }
780
781                 //parent do my part
782                 ofstream temp;
783                 m->openOutputFile(outputFasta, temp);           temp.close();
784         m->openOutputFile(outputScrapFasta, temp);              temp.close();
785         
786         //do my part
787         processIDS.push_back(processors-1);
788                 num = driver(files[processors-1], (outputFasta+ toString(processors-1) + ".temp"),  (outputScrapFasta+ toString(processors-1) + ".temp"),  (outputMisMatches+ toString(processors-1) + ".temp"), tempFASTAFileNames, processors-1, group);
789         
790                 //Wait until all threads have terminated.
791                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
792                 
793                 //Close all thread handles and free memory allocations.
794                 for(int i=0; i < pDataArray.size(); i++){
795                         num += pDataArray[i]->count;
796             if (!pDataArray[i]->done) {
797                 m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of sequences assigned to it, quitting. \n"); m->control_pressed = true; 
798             }
799             for (map<string, int>::iterator it = pDataArray[i]->groupCounts.begin(); it != pDataArray[i]->groupCounts.end(); it++) {
800                 map<string, int>::iterator it2 = groupCounts.find(it->first);
801                 if (it2 == groupCounts.end()) { groupCounts[it->first] = it->second; }
802                 else { groupCounts[it->first] += it->second; }
803             }
804             for (map<string, string>::iterator it = pDataArray[i]->groupMap.begin(); it != pDataArray[i]->groupMap.end(); it++) {
805                 map<string, string>::iterator it2 = groupMap.find(it->first);
806                 if (it2 == groupMap.end()) {    groupMap[it->first] = it->second; }
807                 else { m->mothurOut("[ERROR]: " + it->first + " is in your fasta file more than once. Sequence names must be unique. please correct.\n");  }
808             }
809             CloseHandle(hThreadArray[i]);
810                         delete pDataArray[i];
811         }
812                                 
813     #endif      
814         
815         for (int i = 0; i < processIDS.size(); i++) {
816                         m->appendFiles((outputFasta + toString(processIDS[i]) + ".temp"), outputFasta);
817                         m->mothurRemove((outputFasta + toString(processIDS[i]) + ".temp"));
818                         
819                         m->appendFiles((outputScrapFasta + toString(processIDS[i]) + ".temp"), outputScrapFasta);
820                         m->mothurRemove((outputScrapFasta + toString(processIDS[i]) + ".temp"));
821             
822             m->appendFilesWithoutHeaders((outputMisMatches + toString(processIDS[i]) + ".temp"), outputMisMatches);
823                         m->mothurRemove((outputMisMatches + toString(processIDS[i]) + ".temp"));
824             
825             if(allFiles){
826                                 for(int j=0;j<fastaFileNames.size();j++){
827                                         for(int k=0;k<fastaFileNames[j].size();k++){
828                                                 if (fastaFileNames[j][k] != "") {
829                                                         m->appendFiles((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"), fastaFileNames[j][k]);
830                                                         m->mothurRemove((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"));
831                                                 }
832                                         }
833                                 }
834                         }
835                 }
836                 
837                 return num;
838         }
839         catch(exception& e) {
840                 m->errorOut(e, "MakeContigsCommand", "createProcesses");
841                 exit(1);
842         }
843 }
844 //**********************************************************************************************************************
845 int MakeContigsCommand::driver(vector<string> files, string outputFasta, string outputScrapFasta, string outputMisMatches, vector<vector<string> > fastaFileNames, int process, string group){
846     try {
847         
848         Alignment* alignment;
849         if(align == "gotoh")                    {       alignment = new GotohOverlap(gapOpen, gapExtend, match, misMatch, longestBase);                 }
850                 else if(align == "needleman")   {       alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase);                                }
851         
852         int num = 0;
853         string thisffastafile = files[0];
854         string thisfqualfile = files[1];
855         string thisrfastafile = files[2];
856         string thisrqualfile = files[3];
857         
858         if (m->debug) {  m->mothurOut("[DEBUG]: ffasta = " + thisffastafile + ".\n[DEBUG]: fqual = " + thisfqualfile + ".\n[DEBUG]: rfasta = " + thisrfastafile + ".\n[DEBUG]: rqual = " + thisrqualfile + ".\n"); }
859         
860         ifstream inFFasta, inRFasta, inFQual, inRQual;
861         ofstream outFasta, outMisMatch, outScrapFasta;
862         m->openInputFile(thisffastafile, inFFasta);
863         m->openInputFile(thisrfastafile, inRFasta);
864         if (thisfqualfile != "") {
865             m->openInputFile(thisfqualfile, inFQual);
866             m->openInputFile(thisrqualfile, inRQual);
867         }
868         m->openOutputFile(outputFasta, outFasta);
869         m->openOutputFile(outputScrapFasta, outScrapFasta);
870         m->openOutputFile(outputMisMatches, outMisMatch);
871         outMisMatch << "Name\tLength\tOverlap_Length\tOverlap_Start\tOverlap_End\tMisMatches\tNum_Ns\n";  
872         
873         TrimOligos trimOligos(pdiffs, bdiffs, 0, 0, primers, barcodes);
874         
875         while ((!inFFasta.eof()) && (!inRFasta.eof())) {
876             
877             if (m->control_pressed) { break; }
878             
879             int success = 1;
880             string trashCode = "";
881             int currentSeqsDiffs = 0;
882
883             //read seqs and quality info
884             Sequence fSeq(inFFasta); m->gobble(inFFasta);
885             Sequence rSeq(inRFasta); m->gobble(inRFasta);
886             QualityScores* fQual = NULL; QualityScores* rQual = NULL;
887             if (thisfqualfile != "") {
888                 fQual = new QualityScores(inFQual); m->gobble(inFQual);
889                 rQual = new QualityScores(inRQual); m->gobble(inRQual);
890             }
891             
892             int barcodeIndex = 0;
893             int primerIndex = 0;
894             
895             if(barcodes.size() != 0){
896                 if (thisfqualfile != "") {
897                     success = trimOligos.stripBarcode(fSeq, rSeq, *fQual, *rQual, barcodeIndex);
898                 }else {
899                     success = trimOligos.stripBarcode(fSeq, rSeq, barcodeIndex);
900                 }
901                 if(success > bdiffs)            {       trashCode += 'b';       }
902                 else{ currentSeqsDiffs += success;  }
903             }
904             
905             if(primers.size() != 0){
906                 if (thisfqualfile != "") {
907                     success = trimOligos.stripForward(fSeq, rSeq, *fQual, *rQual, primerIndex);
908                 }else {
909                     success = trimOligos.stripForward(fSeq, rSeq, primerIndex);
910                 }
911                 if(success > pdiffs)            {       trashCode += 'f';       }
912                 else{ currentSeqsDiffs += success;  }
913             }
914             
915             if (currentSeqsDiffs > tdiffs)      {       trashCode += 't';   }
916             
917             //flip the reverse reads
918             rSeq.reverseComplement();
919             if (thisfqualfile != "") { rQual->flipQScores(); }
920
921             //pairwise align
922             alignment->align(fSeq.getUnaligned(), rSeq.getUnaligned());
923             map<int, int> ABaseMap = alignment->getSeqAAlnBaseMap();
924             map<int, int> BBaseMap = alignment->getSeqBAlnBaseMap();
925             fSeq.setAligned(alignment->getSeqAAln());
926             rSeq.setAligned(alignment->getSeqBAln());
927             int length = fSeq.getAligned().length();
928             
929             //traverse alignments merging into one contiguous seq
930             string contig = "";
931             int numMismatches = 0;
932             string seq1 = fSeq.getAligned();
933             string seq2 = rSeq.getAligned();
934             vector<int> scores1, scores2; 
935             if (thisfqualfile != "") {
936                 scores1 = fQual->getQualityScores();
937                 scores2 = rQual->getQualityScores();
938                 delete fQual; delete rQual;
939             }
940             
941             // if (num < 5) {  cout << fSeq.getStartPos() << '\t' << fSeq.getEndPos() << '\t' << rSeq.getStartPos() << '\t' << rSeq.getEndPos() << endl; }
942             int overlapStart = fSeq.getStartPos();
943             int seq2Start = rSeq.getStartPos();
944             
945             //bigger of the 2 starting positions is the location of the overlapping start
946             if (overlapStart < seq2Start) { //seq2 starts later so take from 0 to seq2Start from seq1
947                 overlapStart = seq2Start; 
948                 for (int i = 0; i < overlapStart; i++) { contig += seq1[i];  }
949             }else { //seq1 starts later so take from 0 to overlapStart from seq2
950                 for (int i = 0; i < overlapStart; i++) {  contig += seq2[i]; }
951             }
952             
953             int seq1End = fSeq.getEndPos();
954             int seq2End = rSeq.getEndPos();
955             int overlapEnd = seq1End;
956             if (seq2End < overlapEnd) { overlapEnd = seq2End; }  //smallest end position is where overlapping ends
957             
958             int oStart = contig.length();
959             for (int i = overlapStart; i < overlapEnd; i++) {
960                 if (seq1[i] == seq2[i]) { //match, add base and choose highest score
961                     contig += seq1[i];
962                 }else if (((seq1[i] == '.') || (seq1[i] == '-')) && ((seq2[i] != '-') && (seq2[i] != '.'))) { //seq1 is a gap and seq2 is a base, choose seq2, unless quality score for base is below insert. In that case eliminate base
963                     if (thisfqualfile != "") {
964                         if (scores2[BBaseMap[i]] <= insert) { } //
965                         else { contig += seq2[i];  }
966                     }else { contig += seq2[i]; } //with no quality info, then we keep it?
967                 }else if (((seq2[i] == '.') || (seq2[i] == '-')) && ((seq1[i] != '-') && (seq1[i] != '.'))) { //seq2 is a gap and seq1 is a base, choose seq1, unless quality score for base is below insert. In that case eliminate base
968                     if (thisfqualfile != "") {
969                         if (scores1[ABaseMap[i]] <= insert) { } //
970                         else { contig += seq1[i];  }
971                     }else { contig += seq1[i]; } //with no quality info, then we keep it?
972                 }else if (((seq1[i] != '-') && (seq1[i] != '.')) && ((seq2[i] != '-') && (seq2[i] != '.'))) { //both bases choose one with better quality
973                     if (thisfqualfile != "") {
974                         if (abs(scores1[ABaseMap[i]] - scores2[BBaseMap[i]]) >= deltaq) { //is the difference in qual scores >= deltaq, if yes choose base with higher score
975                             char c = seq1[i];
976                             if (scores1[ABaseMap[i]] < scores2[BBaseMap[i]]) { c = seq2[i]; }
977                             contig += c;
978                         }else { //if no, base becomes n
979                             contig += 'N';
980                         }
981                         numMismatches++;
982                     }else { numMismatches++; } //cant decide, so eliminate and mark as mismatch
983                 }else { //should never get here
984                     m->mothurOut("[ERROR]: case I didn't think of seq1 = " + toString(seq1[i]) + " and seq2 = " + toString(seq2[i]) + "\n");
985                 }
986             }
987             int oend = contig.length();
988             if (seq1End < seq2End) { //seq1 ends before seq2 so take from overlap to length from seq2
989                 for (int i = overlapEnd; i < length; i++) { contig += seq2[i];  }
990             }else { //seq2 ends before seq1 so take from overlap to length from seq1
991                 for (int i = overlapEnd; i < length; i++) {  contig += seq1[i]; }
992             }
993             
994             if (trimOverlap) { contig = contig.substr(overlapStart-1, oend-oStart);  if (contig.length() == 0) { trashCode += "l"; } }
995             
996             if(trashCode.length() == 0){
997                 bool ignore = false;
998                 
999                 if (m->debug) { m->mothurOut(fSeq.getName()); }
1000                 
1001                 if (createOligosGroup) {
1002                     if(barcodes.size() != 0){
1003                         string thisGroup = barcodeNameVector[barcodeIndex];
1004                         if (primers.size() != 0) { 
1005                             if (primerNameVector[primerIndex] != "") { 
1006                                 if(thisGroup != "") {
1007                                     thisGroup += "." + primerNameVector[primerIndex]; 
1008                                 }else {
1009                                     thisGroup = primerNameVector[primerIndex]; 
1010                                 }
1011                             } 
1012                         }
1013                         
1014                         if (m->debug) { m->mothurOut(", group= " + thisGroup + "\n"); }
1015                         
1016                         int pos = thisGroup.find("ignore");
1017                         if (pos == string::npos) {
1018                             groupMap[fSeq.getName()] = thisGroup; 
1019                         
1020                             map<string, int>::iterator it = groupCounts.find(thisGroup);
1021                             if (it == groupCounts.end()) {      groupCounts[thisGroup] = 1; }
1022                             else { groupCounts[it->first] ++; }
1023                         }else { ignore = true; }
1024                         
1025                     }
1026                 }else if (createFileGroup) {
1027                     int pos = group.find("ignore");
1028                     if (pos == string::npos) {
1029                         groupMap[fSeq.getName()] = group;
1030                         
1031                         map<string, int>::iterator it = groupCounts.find(group);
1032                         if (it == groupCounts.end()) {  groupCounts[group] = 1; }
1033                         else { groupCounts[it->first] ++; }
1034                     }else { ignore = true; }
1035                 }
1036                 if (m->debug) { m->mothurOut("\n"); }
1037                 
1038                 if(allFiles && !ignore){
1039                     ofstream output;
1040                     m->openOutputFileAppend(fastaFileNames[barcodeIndex][primerIndex], output);
1041                     output << ">" << fSeq.getName() << endl << contig << endl;
1042                     output.close();
1043                 }
1044                 
1045                 //output
1046                 outFasta << ">" << fSeq.getName() << endl << contig << endl;
1047                 int numNs = 0;
1048                 for (int i = 0; i < contig.length(); i++) { if (contig[i] == 'N') { numNs++; }  }
1049                 outMisMatch << fSeq.getName() << '\t' << contig.length() << '\t' << (oend-oStart) << '\t' << oStart << '\t' << oend << '\t' << numMismatches << '\t' << numNs << endl;
1050             }else {
1051                 //output
1052                 outScrapFasta << ">" << fSeq.getName() << " | " << trashCode << endl << contig << endl;
1053             }
1054             num++;
1055             
1056                         //report progress
1057                         if((num) % 1000 == 0){  m->mothurOut(toString(num)); m->mothurOutEndLine();             }
1058                 }
1059         
1060                 //report progress
1061                 if((num) % 1000 != 0){  m->mothurOut(toString(num)); m->mothurOutEndLine();             }
1062         
1063         inFFasta.close();
1064         inRFasta.close();
1065         outFasta.close();
1066         outScrapFasta.close();
1067         outMisMatch.close();
1068         if (thisfqualfile != "") {
1069             inFQual.close();
1070             inRQual.close();
1071         }
1072         delete alignment;
1073         
1074         if (m->control_pressed) {  m->mothurRemove(outputFasta); m->mothurRemove(outputScrapFasta);m->mothurRemove(outputMisMatches);  }
1075     
1076         return num;
1077     }
1078         catch(exception& e) {
1079                 m->errorOut(e, "MakeContigsCommand", "driver");
1080                 exit(1);
1081         }
1082 }
1083 //**********************************************************************************************************************
1084 vector< vector<string> > MakeContigsCommand::readFastqFiles(unsigned long int& count, string ffastq, string rfastq){
1085     try {
1086         vector< vector<string> > files;
1087         //maps processors number to file pointer
1088         map<int, vector<ofstream*> > tempfiles;  //tempfiles[0] = forwardFasta, [1] = forwardQual, [2] = reverseFasta, [3] = reverseQual
1089         map<int, vector<ofstream*> >::iterator it;
1090         
1091         //create files to write to
1092         for (int i = 0; i < processors; i++) {
1093             vector<ofstream*> temp;
1094             ofstream* outFF = new ofstream;     temp.push_back(outFF);
1095             ofstream* outFQ = new ofstream;     temp.push_back(outFQ);
1096             ofstream* outRF = new ofstream;     temp.push_back(outRF);
1097             ofstream* outRQ = new ofstream;     temp.push_back(outRQ);
1098             tempfiles[i] = temp;
1099             
1100             vector<string> names;
1101             string thisOutputDir = outputDir;
1102             if (outputDir == "") { thisOutputDir = m->hasPath(ffastq); }
1103             string ffastafilename = thisOutputDir + m->getRootName(m->getSimpleName(ffastq)) + toString(i) + "ffastatemp";
1104             string rfastafilename = thisOutputDir + m->getRootName(m->getSimpleName(rfastq)) + toString(i) + "rfastatemp";
1105             string fqualfilename = thisOutputDir + m->getRootName(m->getSimpleName(ffastq)) + toString(i) + "fqualtemp";
1106             string rqualfilename = thisOutputDir + m->getRootName(m->getSimpleName(rfastq)) + toString(i) + "rqualtemp";
1107             names.push_back(ffastafilename); names.push_back(fqualfilename);
1108             names.push_back(rfastafilename); names.push_back(rqualfilename);
1109             files.push_back(names);
1110             
1111             m->openOutputFile(ffastafilename, *outFF);
1112             m->openOutputFile(rfastafilename, *outRF);
1113             m->openOutputFile(fqualfilename, *outFQ);
1114             m->openOutputFile(rqualfilename, *outRQ);
1115         }
1116         
1117         if (m->control_pressed) {
1118             //close files, delete ofstreams
1119             for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close();  delete (it->second)[i]; } }
1120             //remove files
1121             for (int i = 0; i < files.size(); i++) {  
1122                 for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); }
1123             }
1124         }
1125         
1126         ifstream inForward;
1127         m->openInputFile(ffastq, inForward);
1128         
1129         ifstream inReverse;
1130         m->openInputFile(rfastq, inReverse);
1131         
1132         count = 0;
1133         map<string, fastqRead> uniques;
1134         map<string, fastqRead>::iterator itUniques;
1135         while ((!inForward.eof()) || (!inReverse.eof())) {
1136             
1137             if (m->control_pressed) { for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close();  delete (it->second)[i]; } } for (int i = 0; i < files.size(); i++) {  for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); } } inForward.close(); inReverse.close(); return files; }
1138             
1139             //get a read from forward and reverse fastq files
1140             bool ignoref, ignorer;
1141             fastqRead thisFread, thisRread;
1142             if (!inForward.eof()) {  thisFread = readFastq(inForward, ignoref); }
1143             else { ignoref = true; }
1144             if (!inReverse.eof()) { thisRread = readFastq(inReverse, ignorer);  }
1145             else { ignorer = true; }
1146             
1147             vector<pairFastqRead> reads = getReads(ignoref, ignorer, thisFread, thisRread, uniques);
1148            
1149             for (int i = 0; i < reads.size(); i++) {
1150                 fastqRead fread = reads[i].forward;
1151                 fastqRead rread = reads[i].reverse;
1152                 
1153                 if (m->debug) { m->mothurOut(toString(count) + '\t' + fread.name + '\t' + rread.name + '\n'); }
1154                
1155                 //if (checkReads(fread, rread, ffastq, rfastq)) {
1156                     if (m->control_pressed) { for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close();  delete (it->second)[i]; } } for (int i = 0; i < files.size(); i++) {  for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); } } inForward.close(); inReverse.close(); return files; }
1157                     
1158                     //if the reads are okay write to output files
1159                     int process = count % processors;
1160                     
1161                     *(tempfiles[process][0]) << ">" << fread.name << endl << fread.sequence << endl;
1162                     *(tempfiles[process][1]) << ">" << fread.name << endl;
1163                     for (int i = 0; i < fread.scores.size(); i++) { *(tempfiles[process][1]) << fread.scores[i] << " "; }
1164                     *(tempfiles[process][1]) << endl;
1165                     *(tempfiles[process][2]) << ">" << rread.name << endl << rread.sequence << endl;
1166                     *(tempfiles[process][3]) << ">" << rread.name << endl;
1167                     for (int i = 0; i < rread.scores.size(); i++) { *(tempfiles[process][3]) << rread.scores[i] << " "; }
1168                     *(tempfiles[process][3]) << endl;
1169                     
1170                     count++;
1171                     
1172                     //report progress
1173                     if((count) % 10000 == 0){   m->mothurOut(toString(count)); m->mothurOutEndLine();           }
1174                 //}
1175             }
1176                 }
1177                 //report progress
1178                 if((count) % 10000 != 0){       m->mothurOut(toString(count)); m->mothurOutEndLine();           }
1179         
1180         if (uniques.size() != 0) {
1181             for (itUniques = uniques.begin(); itUniques != uniques.end(); itUniques++) {
1182                 m->mothurOut("[WARNING]: did not find paired read for " + itUniques->first + ", ignoring.\n");
1183             }
1184             m->mothurOutEndLine();
1185         }
1186         
1187         //close files, delete ofstreams
1188         for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close();  delete (it->second)[i]; } }
1189         inForward.close();
1190         inReverse.close();
1191         
1192         return files;
1193     }
1194     catch(exception& e) {
1195         m->errorOut(e, "MakeContigsCommand", "readFastqFiles");
1196         exit(1);
1197     }
1198 }
1199 //**********************************************************************************************************************
1200 vector< vector<string> > MakeContigsCommand::readFastaFiles(unsigned long int& count, string ffasta, string rfasta){
1201     try {
1202         vector< vector<string> > files;
1203         //maps processors number to file pointer
1204         map<int, vector<ofstream*> > tempfiles;  //tempfiles[0] = forwardFasta, [1] = forwardQual, [2] = reverseFasta, [3] = reverseQual
1205         map<int, vector<ofstream*> >::iterator it;
1206         
1207         //create files to write to
1208         for (int i = 0; i < processors; i++) {
1209             vector<ofstream*> temp;
1210             ofstream* outFF = new ofstream;     temp.push_back(outFF);
1211             ofstream* outFQ = new ofstream;     temp.push_back(outFQ);
1212             ofstream* outRF = new ofstream;     temp.push_back(outRF);
1213             ofstream* outRQ = new ofstream;     temp.push_back(outRQ);
1214             tempfiles[i] = temp;
1215             
1216             vector<string> names;
1217             string thisOutputDir = outputDir;
1218             if (outputDir == "") { thisOutputDir = m->hasPath(ffasta); }
1219             string ffastafilename = thisOutputDir + m->getRootName(m->getSimpleName(ffasta)) + toString(i) + "ffastatemp";
1220             string rfastafilename = thisOutputDir + m->getRootName(m->getSimpleName(rfasta)) + toString(i) + "rfastatemp";
1221             string fqualfilename = "";
1222             if (fqualfile != "") { fqualfilename = thisOutputDir + m->getRootName(m->getSimpleName(fqualfile)) + toString(i) + "fqual.temp";  m->openOutputFile(fqualfilename, *outFQ); }
1223             string rqualfilename = "";
1224             if (rqualfile != "") { rqualfilename = thisOutputDir + m->getRootName(m->getSimpleName(rqualfile)) + toString(i) + "rqual.temp"; m->openOutputFile(rqualfilename, *outRQ); }
1225             names.push_back(ffastafilename); names.push_back(fqualfilename);
1226             names.push_back(rfastafilename); names.push_back(rqualfilename);
1227             files.push_back(names);
1228             
1229             m->openOutputFile(ffastafilename, *outFF);
1230             m->openOutputFile(rfastafilename, *outRF);
1231         }
1232         
1233         if (m->control_pressed) {
1234             //close files, delete ofstreams
1235             for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close();  delete (it->second)[i]; } }
1236             //remove files
1237             for (int i = 0; i < files.size(); i++) {  
1238                 for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); }
1239             }
1240         }
1241         
1242         ifstream inForwardFasta;
1243         m->openInputFile(ffasta, inForwardFasta);
1244         
1245         ifstream inReverseFasta;
1246         m->openInputFile(rfasta, inReverseFasta);
1247         
1248         ifstream inForwardQual, inReverseQual;
1249         if (fqualfile != "") { m->openInputFile(fqualfile, inForwardQual); m->openInputFile(rqualfile, inReverseQual); }
1250         
1251         count = 0;
1252         map<string, fastqRead> uniques;
1253         map<string, fastqRead>::iterator itUniques;
1254         while ((!inForwardFasta.eof()) || (!inReverseFasta.eof())) {
1255             
1256             if (m->control_pressed) { for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close();  delete (it->second)[i]; } } for (int i = 0; i < files.size(); i++) {  for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); } } inReverseFasta.close(); inForwardFasta.close(); if (fqualfile != "") { inReverseQual.close(); inReverseQual.close(); } return files; }
1257             
1258             //get a reads from forward and reverse fasta files
1259             bool ignoref, ignorer;
1260             fastqRead thisFread, thisRread;
1261             if (!inForwardFasta.eof()) {  
1262                 ignoref = false; 
1263                 Sequence temp(inForwardFasta); m->gobble(inForwardFasta);
1264                 thisFread.name = temp.getName();
1265                 thisFread.sequence = temp.getUnaligned();
1266             }else { ignoref = true; }
1267             if (!inReverseFasta.eof()) {  
1268                 ignorer = false; 
1269                 Sequence temp(inReverseFasta); m->gobble(inReverseFasta);
1270                 thisRread.name = temp.getName();
1271                 thisRread.sequence = temp.getUnaligned();  
1272             }else { ignorer = true; }
1273             
1274             //get qual reads if given
1275             if (fqualfile != "") {
1276                 if (!inForwardQual.eof() && !ignoref) {  
1277                     QualityScores temp(inForwardQual); m->gobble(inForwardQual);
1278                     //if forward files dont match ignore read
1279                     if (thisFread.name != temp.getName()) { ignoref = true; } 
1280                     else { thisFread.scores = temp.getQualityScores(); }
1281                 }else { ignoref = true; }
1282                 if (!inReverseQual.eof() && !ignorer) {  
1283                     QualityScores temp(inReverseQual); m->gobble(inReverseQual);
1284                     //if reverse files dont match ignore read
1285                     if (thisRread.name != temp.getName()) { ignorer = true; } 
1286                     else { thisRread.scores = temp.getQualityScores(); }
1287                 }else { ignorer = true; }
1288             }
1289             
1290             vector<pairFastqRead> reads = getReads(ignoref, ignorer, thisFread, thisRread, uniques);
1291             
1292             for (int i = 0; i < reads.size(); i++) {
1293                 fastqRead fread = reads[i].forward;
1294                 fastqRead rread = reads[i].reverse;
1295                 
1296                 if (m->debug) { m->mothurOut(toString(count) + '\t' + fread.name + '\t' + rread.name + '\n'); }
1297                 
1298                // if (checkReads(fread, rread, ffasta, rfasta)) {
1299                     if (m->control_pressed) { for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close();  delete (it->second)[i]; } } for (int i = 0; i < files.size(); i++) {  for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); } } inReverseFasta.close(); inForwardFasta.close(); if (fqualfile != "") { inReverseQual.close(); inReverseQual.close(); } return files; }
1300                     
1301                     //if the reads are okay write to output files
1302                     int process = count % processors;
1303                     
1304                     *(tempfiles[process][0]) << ">" << fread.name << endl << fread.sequence << endl;
1305                     *(tempfiles[process][2]) << ">" << rread.name << endl << rread.sequence << endl;
1306                     if (fqualfile != "") { //if you have quality info, print it
1307                         *(tempfiles[process][1]) << ">" << fread.name << endl;
1308                         for (int i = 0; i < fread.scores.size(); i++) { *(tempfiles[process][1]) << fread.scores[i] << " "; }
1309                         *(tempfiles[process][1]) << endl;
1310                         *(tempfiles[process][3]) << ">" << rread.name << endl;
1311                         for (int i = 0; i < rread.scores.size(); i++) { *(tempfiles[process][3]) << rread.scores[i] << " "; }
1312                         *(tempfiles[process][3]) << endl;
1313                     }
1314                     count++;
1315                     
1316                     //report progress
1317                     if((count) % 10000 == 0){   m->mothurOut(toString(count)); m->mothurOutEndLine();           }
1318                 //}
1319             }
1320                 }
1321                 //report progress
1322                 if((count) % 10000 != 0){       m->mothurOut(toString(count)); m->mothurOutEndLine();           }
1323         
1324         if (uniques.size() != 0) {
1325             for (itUniques = uniques.begin(); itUniques != uniques.end(); itUniques++) {
1326                 m->mothurOut("[WARNING]: did not find paired read for " + itUniques->first + ", ignoring.\n");
1327             }
1328             m->mothurOutEndLine();
1329         }
1330         
1331         //close files, delete ofstreams
1332         for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close();  delete (it->second)[i]; } }
1333         inReverseFasta.close(); 
1334         inForwardFasta.close(); 
1335         if (fqualfile != "") { inReverseQual.close(); inReverseQual.close(); }
1336         
1337         return files;
1338     }
1339     catch(exception& e) {
1340         m->errorOut(e, "MakeContigsCommand", "readFastaFiles");
1341         exit(1);
1342     }
1343 }
1344 //**********************************************************************************************************************
1345 vector<pairFastqRead> MakeContigsCommand::getReads(bool ignoref, bool ignorer, fastqRead forward, fastqRead reverse, map<string, fastqRead>& uniques){
1346     try {
1347         vector<pairFastqRead> reads;
1348         map<string, fastqRead>::iterator itUniques;
1349             
1350         if (!ignoref && !ignorer) {
1351             if (forward.name == reverse.name) { 
1352                 pairFastqRead temp(forward, reverse);
1353                 reads.push_back(temp);
1354             }else {
1355                 bool match = false;
1356                 //if no match are the names only different by 1 and 2?
1357                 string tempFRead = forward.name.substr(0, forward.name.length()-1);
1358                 string tempRRead = reverse.name.substr(0, reverse.name.length()-1);
1359                 if (tempFRead == tempRRead) {
1360                     if ((forward.name[forward.name.length()-1] == '1') && (reverse.name[reverse.name.length()-1] == '2')) {
1361                         forward.name = tempFRead;
1362                         reverse.name = tempRRead;
1363                         pairFastqRead temp(forward, reverse);
1364                         reads.push_back(temp);
1365                         match = true;
1366                     }
1367                 }
1368                 
1369                 if (!match) {
1370                     //look for forward pair
1371                     itUniques = uniques.find(forward.name);
1372                     if (itUniques != uniques.end()) {  //we have the pair for this read
1373                         pairFastqRead temp(forward, itUniques->second);
1374                         reads.push_back(temp);
1375                         uniques.erase(itUniques);
1376                     }else { //save this read for later
1377                         uniques[forward.name] = forward;
1378                     }
1379                     
1380                     //look for reverse pair
1381                     itUniques = uniques.find(reverse.name);
1382                     if (itUniques != uniques.end()) {  //we have the pair for this read
1383                         pairFastqRead temp(itUniques->second, reverse);
1384                         reads.push_back(temp);
1385                         uniques.erase(itUniques);
1386                     }else { //save this read for later
1387                         uniques[reverse.name] = reverse;
1388                     }
1389                 }
1390                                 
1391             }
1392         }else if (!ignoref && ignorer) { //ignore reverse keep forward
1393             //look for forward pair
1394             itUniques = uniques.find(forward.name);
1395             if (itUniques != uniques.end()) {  //we have the pair for this read
1396                 pairFastqRead temp(forward, itUniques->second);
1397                 reads.push_back(temp);
1398                 uniques.erase(itUniques);
1399             }else { //save this read for later
1400                 uniques[forward.name] = forward;
1401             }
1402
1403         }else if (ignoref && !ignorer) { //ignore forward keep reverse
1404             //look for reverse pair
1405             itUniques = uniques.find(reverse.name);
1406             if (itUniques != uniques.end()) {  //we have the pair for this read
1407                 pairFastqRead temp(itUniques->second, reverse);
1408                 reads.push_back(temp);
1409                 uniques.erase(itUniques);
1410             }else { //save this read for later
1411                 uniques[reverse.name] = reverse;
1412             }
1413         }//else ignore both and do nothing
1414         
1415         return reads;
1416     }
1417     catch(exception& e) {
1418         m->errorOut(e, "MakeContigsCommand", "readFastqFiles");
1419         exit(1);
1420     }
1421 }
1422 //**********************************************************************************************************************
1423 fastqRead MakeContigsCommand::readFastq(ifstream& in, bool& ignore){
1424     try {
1425         fastqRead read;
1426         
1427         ignore = false;
1428         
1429         //read sequence name
1430         string line = m->getline(in); m->gobble(in);
1431         vector<string> pieces = m->splitWhiteSpace(line);
1432         string name = "";  if (pieces.size() != 0) { name = pieces[0]; }
1433         if (name == "") {  m->mothurOut("[WARNING]: Blank fasta name, ignoring read."); m->mothurOutEndLine(); ignore=true;  }
1434         else if (name[0] != '@') { m->mothurOut("[WARNING]: reading " + name + " expected a name with @ as a leading character, ignoring read."); m->mothurOutEndLine(); ignore=true; }
1435         else { name = name.substr(1); }
1436         
1437         //read sequence
1438         string sequence = m->getline(in); m->gobble(in);
1439         if (sequence == "") {  m->mothurOut("[WARNING]: missing sequence for " + name + ", ignoring."); ignore=true; }
1440         
1441         //read sequence name
1442         line = m->getline(in); m->gobble(in);
1443         pieces = m->splitWhiteSpace(line);
1444         string name2 = "";  if (pieces.size() != 0) { name2 = pieces[0]; }
1445         if (name2 == "") {  m->mothurOut("[WARNING]: expected a name with + as a leading character, ignoring."); ignore=true; }
1446         else if (name2[0] != '+') { m->mothurOut("[WARNING]: reading " + name2 + " expected a name with + as a leading character, ignoring."); ignore=true; }
1447         else { name2 = name2.substr(1); if (name2 == "") { name2 = name; } }
1448         
1449         //read quality scores
1450         string quality = m->getline(in); m->gobble(in);
1451         if (quality == "") {  m->mothurOut("[WARNING]: missing quality for " + name2 + ", ignoring."); ignore=true; }
1452          
1453         //sanity check sequence length and number of quality scores match
1454         if (name2 != "") { if (name != name2) { m->mothurOut("[WARNING]: names do not match. read " + name + " for fasta and " + name2 + " for quality, ignoring."); ignore=true; } }
1455         if (quality.length() != sequence.length()) { m->mothurOut("[WARNING]: Lengths do not match for sequence " + name + ". Read " + toString(sequence.length()) + " characters for fasta and " + toString(quality.length()) + " characters for quality scores, ignoring read."); ignore=true; }
1456         
1457         vector<int> qualScores = convertQual(quality);
1458         
1459         read.name = name;
1460         read.sequence = sequence;
1461         read.scores = qualScores;
1462
1463         return read;
1464     }
1465     catch(exception& e) {
1466         m->errorOut(e, "MakeContigsCommand", "readFastq");
1467         exit(1);
1468     }
1469 }
1470 /**********************************************************************************************************************
1471 bool MakeContigsCommand::checkReads(fastqRead& forward, fastqRead& reverse, string ffile, string rfile){
1472     try {
1473         bool good = true;
1474         
1475         //do sequence lengths match
1476         if (forward.sequence.length() != reverse.sequence.length()) {
1477             m->mothurOut("[WARNING]: For sequence " + forward.name + " I read a sequence of length " + toString(forward.sequence.length()) + " from " + ffile + ", but read a sequence of length " + toString(reverse.sequence.length()) + " from " + rfile + ", ignoring.\n");
1478             good = false; 
1479         }
1480         
1481         //do number of qual scores match 
1482         if (forward.scores.size() != reverse.scores.size()) {
1483             m->mothurOut("[WARNING]: For sequence " + forward.name + " I read " + toString(forward.scores.size()) + " quality scores from " + ffile + ", but read  " + toString(reverse.scores.size()) + " quality scores from " + rfile + ", ignoring.\n");
1484             good = false; 
1485         }
1486
1487         return good;
1488     }
1489     catch(exception& e) {
1490         m->errorOut(e, "MakeContigsCommand", "checkReads");
1491         exit(1);
1492     }
1493 }*/
1494 //***************************************************************************************************************
1495 vector< vector<string> > MakeContigsCommand::readFileNames(string filename){
1496         try {
1497         vector< vector<string> > files;
1498         string forward, reverse;
1499         
1500         ifstream in;
1501         m->openInputFile(filename, in);
1502         
1503         while(!in.eof()) {
1504             
1505             if (m->control_pressed) { return files; }
1506             
1507             in >> forward; m->gobble(in);
1508             in >> reverse;
1509             
1510             string group = "";
1511             while (!in.eof())   { //do we have a group assigned to this pair
1512                 char c = in.get();
1513                 if (c == 10 || c == 13 || c == -1){     break;  }
1514                 else if (c == 32 || c == 9){;} //space or tab
1515                 else {  group += c;  }
1516             }
1517             
1518             if (group != "") {
1519                 //line in file look like: group forward reverse
1520                 string temp = forward;
1521                 forward = reverse;
1522                 reverse = group;
1523                 group = temp;
1524                 createFileGroup = true;
1525             }
1526             m->gobble(in);
1527             
1528             
1529             //check to make sure both are able to be opened
1530             ifstream in2;
1531             int openForward = m->openInputFile(forward, in2, "noerror");
1532             
1533             //if you can't open it, try default location
1534             if (openForward == 1) {
1535                 if (m->getDefaultPath() != "") { //default path is set
1536                     string tryPath = m->getDefaultPath() + m->getSimpleName(forward);
1537                     m->mothurOut("Unable to open " + forward + ". Trying default " + tryPath); m->mothurOutEndLine();
1538                     ifstream in3;
1539                     openForward = m->openInputFile(tryPath, in3, "noerror");
1540                     in3.close();
1541                     forward = tryPath;
1542                 }
1543             }
1544             
1545             //if you can't open it, try output location
1546             if (openForward == 1) {
1547                 if (m->getOutputDir() != "") { //default path is set
1548                     string tryPath = m->getOutputDir() + m->getSimpleName(forward);
1549                     m->mothurOut("Unable to open " + forward + ". Trying output directory " + tryPath); m->mothurOutEndLine();
1550                     ifstream in4;
1551                     openForward = m->openInputFile(tryPath, in4, "noerror");
1552                     forward = tryPath;
1553                     in4.close();
1554                 }
1555             }
1556             
1557             if (openForward == 1) { //can't find it
1558                 m->mothurOut("[WARNING]: can't find " + forward + ", ignoring pair.\n"); 
1559             }else{  in2.close();  }
1560             
1561             ifstream in3;
1562             int openReverse = m->openInputFile(reverse, in3, "noerror");
1563             
1564             //if you can't open it, try default location
1565             if (openReverse == 1) {
1566                 if (m->getDefaultPath() != "") { //default path is set
1567                     string tryPath = m->getDefaultPath() + m->getSimpleName(reverse);
1568                     m->mothurOut("Unable to open " + reverse + ". Trying default " + tryPath); m->mothurOutEndLine();
1569                     ifstream in3;
1570                     openReverse = m->openInputFile(tryPath, in3, "noerror");
1571                     in3.close();
1572                     reverse = tryPath;
1573                 }
1574             }
1575             
1576             //if you can't open it, try output location
1577             if (openReverse == 1) {
1578                 if (m->getOutputDir() != "") { //default path is set
1579                     string tryPath = m->getOutputDir() + m->getSimpleName(reverse);
1580                     m->mothurOut("Unable to open " + reverse + ". Trying output directory " + tryPath); m->mothurOutEndLine();
1581                     ifstream in4;
1582                     openReverse = m->openInputFile(tryPath, in4, "noerror");
1583                     reverse = tryPath;
1584                     in4.close();
1585                 }
1586             }
1587             
1588             if (openReverse == 1) { //can't find it
1589                 m->mothurOut("[WARNING]: can't find " + reverse + ", ignoring pair.\n"); 
1590             }else{  in3.close();  }
1591             
1592             if ((openForward != 1) && (openReverse != 1)) { //good pair
1593                 file2Group[files.size()] = group;
1594                 vector<string> pair;
1595                 pair.push_back(forward);
1596                 pair.push_back(reverse);
1597                 files.push_back(pair);
1598             }
1599         }
1600         in.close();
1601         
1602         return files;
1603     }
1604     catch(exception& e) {
1605         m->errorOut(e, "MakeContigsCommand", "checkReads");
1606         exit(1);
1607     }
1608 }
1609 //***************************************************************************************************************
1610 //illumina data requires paired forward and reverse data
1611 //BARCODE   atgcatgc   atgcatgc    groupName 
1612 //PRIMER   atgcatgc   atgcatgc    groupName  
1613 //PRIMER   atgcatgc   atgcatgc  
1614 bool MakeContigsCommand::getOligos(vector<vector<string> >& fastaFileNames, string rootname){
1615         try {
1616                 ifstream in;
1617                 m->openInputFile(oligosfile, in);
1618                 
1619                 ofstream test;
1620                 
1621                 string type, foligo, roligo, group;
1622         
1623                 int indexPrimer = 0;
1624                 int indexBarcode = 0;
1625         set<string> uniquePrimers;
1626         set<string> uniqueBarcodes;
1627                 
1628                 while(!in.eof()){
1629             
1630                         in >> type; 
1631     
1632                         if (m->debug) { m->mothurOut("[DEBUG]: reading type - " + type + ".\n"); }      
1633             
1634                         if(type[0] == '#'){
1635                                 while (!in.eof())       {       char c = in.get();  if (c == 10 || c == 13){    break;  }       } // get rest of line if there's any crap there
1636                                 m->gobble(in);
1637                         }
1638                         else{
1639                                 m->gobble(in);
1640                                 //make type case insensitive
1641                                 for(int i=0;i<type.length();i++){       type[i] = toupper(type[i]);  }
1642                                 
1643                                 in >> foligo;
1644                 
1645                 if (m->debug) { m->mothurOut("[DEBUG]: reading - " + foligo + ".\n"); }
1646                                 
1647                                 for(int i=0;i<foligo.length();i++){
1648                                         foligo[i] = toupper(foligo[i]);
1649                                         if(foligo[i] == 'U')    {       foligo[i] = 'T';        }
1650                                 }
1651                                 
1652                                 if(type == "PRIMER"){
1653                                         m->gobble(in);
1654                                         
1655                     in >> roligo;
1656                     
1657                     for(int i=0;i<roligo.length();i++){
1658                         roligo[i] = toupper(roligo[i]);
1659                         if(roligo[i] == 'U')    {       roligo[i] = 'T';        }
1660                     }
1661                     //roligo = reverseOligo(roligo);
1662                     
1663                     group = "";
1664                     
1665                                         // get rest of line in case there is a primer name
1666                                         while (!in.eof())       {       
1667                                                 char c = in.get(); 
1668                                                 if (c == 10 || c == 13){        break;  }
1669                                                 else if (c == 32 || c == 9){;} //space or tab
1670                                                 else {  group += c;  }
1671                                         } 
1672                     
1673                     oligosPair newPrimer(foligo, roligo);
1674                                         
1675                                         //check for repeat barcodes
1676                     string tempPair = foligo+roligo;
1677                     if (uniquePrimers.count(tempPair) != 0) { m->mothurOut("primer pair " + newPrimer.forward + " " + newPrimer.reverse + " is in your oligos file already."); m->mothurOutEndLine();  }
1678                     else { uniquePrimers.insert(tempPair); }
1679                                         
1680                     if (m->debug) {  if (group != "") { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); }else{ m->mothurOut("[DEBUG]: no group for primer pair " + newPrimer.forward + " " + newPrimer.reverse + ".\n"); }  }
1681                     
1682                                         primers[indexPrimer]=newPrimer; indexPrimer++;          
1683                                         primerNameVector.push_back(group);
1684                                 }else if(type == "BARCODE"){
1685                                         m->gobble(in);
1686                                         
1687                     in >> roligo;
1688                     
1689                     for(int i=0;i<roligo.length();i++){
1690                         roligo[i] = toupper(roligo[i]);
1691                         if(roligo[i] == 'U')    {       roligo[i] = 'T';        }
1692                     }
1693                     //roligo = reverseOligo(roligo);
1694                     
1695                     oligosPair newPair(foligo, roligo);
1696                     
1697                     group = "";
1698                     while (!in.eof())   {       
1699                                                 char c = in.get(); 
1700                                                 if (c == 10 || c == 13){        break;  }
1701                                                 else if (c == 32 || c == 9){;} //space or tab
1702                                                 else {  group += c;  }
1703                                         } 
1704                                         
1705                     if (m->debug) { m->mothurOut("[DEBUG]: barcode pair " + newPair.forward + " " + newPair.reverse + ", and group = " + group + ".\n"); }
1706                         
1707                     //check for repeat barcodes
1708                     string tempPair = foligo+roligo;
1709                     if (uniqueBarcodes.count(tempPair) != 0) { m->mothurOut("barcode pair " + newPair.forward + " " + newPair.reverse +  " is in your oligos file already, disregarding."); m->mothurOutEndLine();  }
1710                     else { uniqueBarcodes.insert(tempPair); }
1711                         
1712                     barcodes[indexBarcode]=newPair; indexBarcode++;
1713                                         barcodeNameVector.push_back(group);
1714                                 }else if(type == "LINKER"){
1715                                         linker.push_back(foligo);
1716                     m->mothurOut("[WARNING]: make.contigs is not setup to remove linkers, ignoring.\n");
1717                                 }else if(type == "SPACER"){
1718                                         spacer.push_back(foligo);
1719                     m->mothurOut("[WARNING]: make.contigs is not setup to remove spacers, ignoring.\n");
1720                                 }
1721                                 else{   m->mothurOut("[WARNING]: " + type + " is not recognized as a valid type. Choices are primer, barcode, linker and spacer. Ignoring " + foligo + "."); m->mothurOutEndLine(); }
1722                         }
1723                         m->gobble(in);
1724                 }       
1725                 in.close();
1726                 
1727                 if(barcodeNameVector.size() == 0 && primerNameVector[0] == ""){ allFiles = 0;   }
1728                 
1729                 //add in potential combos
1730                 if(barcodeNameVector.size() == 0){
1731             oligosPair temp("", "");
1732                         barcodes[0] = temp;
1733                         barcodeNameVector.push_back("");                        
1734                 }
1735                 
1736                 if(primerNameVector.size() == 0){
1737             oligosPair temp("", "");
1738                         primers[0] = temp;
1739                         primerNameVector.push_back("");                 
1740                 }
1741                 
1742                 fastaFileNames.resize(barcodeNameVector.size());
1743                 for(int i=0;i<fastaFileNames.size();i++){
1744                         fastaFileNames[i].assign(primerNameVector.size(), "");
1745                 }
1746                 
1747                 if(allFiles){
1748                         set<string> uniqueNames; //used to cleanup outputFileNames
1749                         for(map<int, oligosPair>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
1750                                 for(map<int, oligosPair>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
1751                                         
1752                                         string primerName = primerNameVector[itPrimer->first];
1753                                         string barcodeName = barcodeNameVector[itBar->first];
1754                     
1755                     if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing 
1756                                         else {
1757                         string comboGroupName = "";
1758                         string fastaFileName = "";
1759                         string qualFileName = "";
1760                         string nameFileName = "";
1761                         string countFileName = "";
1762                         
1763                         if(primerName == ""){
1764                             comboGroupName = barcodeNameVector[itBar->first];
1765                         }
1766                         else{
1767                             if(barcodeName == ""){
1768                                 comboGroupName = primerNameVector[itPrimer->first];
1769                             }
1770                             else{
1771                                 comboGroupName = barcodeNameVector[itBar->first] + "." + primerNameVector[itPrimer->first];
1772                             }
1773                         }
1774                         
1775                         
1776                         ofstream temp;
1777                         fastaFileName = rootname + comboGroupName + ".fasta";
1778                         if (uniqueNames.count(fastaFileName) == 0) {
1779                             outputNames.push_back(fastaFileName);
1780                             outputTypes["fasta"].push_back(fastaFileName);
1781                             uniqueNames.insert(fastaFileName);
1782                         }
1783                         
1784                         fastaFileNames[itBar->first][itPrimer->first] = fastaFileName;
1785                         m->openOutputFile(fastaFileName, temp);         temp.close();
1786                     }
1787                                 }
1788                         }
1789                 }
1790                 
1791                 bool allBlank = true;
1792                 for (int i = 0; i < barcodeNameVector.size(); i++) {
1793                         if (barcodeNameVector[i] != "") {
1794                                 allBlank = false;
1795                                 break;
1796                         }
1797                 }
1798                 for (int i = 0; i < primerNameVector.size(); i++) {
1799                         if (primerNameVector[i] != "") {
1800                                 allBlank = false;
1801                                 break;
1802                         }
1803                 }
1804         
1805                 if (allBlank) {
1806                         m->mothurOut("[WARNING]: your oligos file does not contain any group names.  mothur will not create a groupfile."); m->mothurOutEndLine();
1807                         allFiles = false;
1808                         return false;
1809                 }
1810                 
1811                 return true;
1812                 
1813         }
1814         catch(exception& e) {
1815                 m->errorOut(e, "MakeContigsCommand", "getOligos");
1816                 exit(1);
1817         }
1818 }
1819 //********************************************************************/
1820 string MakeContigsCommand::reverseOligo(string oligo){
1821         try {
1822         string reverse = "";
1823         
1824         for(int i=oligo.length()-1;i>=0;i--){
1825             
1826             if(oligo[i] == 'A')         {       reverse += 'T'; }
1827             else if(oligo[i] == 'T'){   reverse += 'A'; }
1828             else if(oligo[i] == 'U'){   reverse += 'A'; }
1829             
1830             else if(oligo[i] == 'G'){   reverse += 'C'; }
1831             else if(oligo[i] == 'C'){   reverse += 'G'; }
1832             
1833             else if(oligo[i] == 'R'){   reverse += 'Y'; }
1834             else if(oligo[i] == 'Y'){   reverse += 'R'; }
1835             
1836             else if(oligo[i] == 'M'){   reverse += 'K'; }
1837             else if(oligo[i] == 'K'){   reverse += 'M'; }
1838             
1839             else if(oligo[i] == 'W'){   reverse += 'W'; }
1840             else if(oligo[i] == 'S'){   reverse += 'S'; }
1841             
1842             else if(oligo[i] == 'B'){   reverse += 'V'; }
1843             else if(oligo[i] == 'V'){   reverse += 'B'; }
1844             
1845             else if(oligo[i] == 'D'){   reverse += 'H'; }
1846             else if(oligo[i] == 'H'){   reverse += 'D'; }
1847             
1848             else                                                {       reverse += 'N'; }
1849         }
1850         
1851         
1852         return reverse;
1853     }
1854         catch(exception& e) {
1855                 m->errorOut(e, "MakeContigsCommand", "reverseOligo");
1856                 exit(1);
1857         }
1858 }
1859 //**********************************************************************************************************************
1860 vector<int> MakeContigsCommand::convertQual(string qual) {
1861         try {
1862                 vector<int> qualScores;
1863         bool negativeScores = false;
1864                 
1865                 for (int i = 0; i < qual.length(); i++) { 
1866             
1867             int temp = 0;
1868             temp = int(qual[i]);
1869             if (format == "illumina") {
1870                 temp -= 64; //char '@'
1871             }else if (format == "illumina1.8+") {
1872                     temp -= int('!'); //char '!'
1873             }else if (format == "solexa") {
1874                 temp = int(convertTable[temp]); //convert to sanger
1875                 temp -= int('!'); //char '!'
1876             }else {
1877                 temp -= int('!'); //char '!'
1878             }
1879             
1880             if (temp < -5) { negativeScores = true; }
1881                         qualScores.push_back(temp);
1882                 }
1883                 
1884         if (negativeScores) { m->mothurOut("[ERROR]: finding negative quality scores, do you have the right format selected? http://en.wikipedia.org/wiki/FASTQ_format#Encoding \n");  m->control_pressed = true;  }
1885         
1886                 return qualScores;
1887         }
1888         catch(exception& e) {
1889                 m->errorOut(e, "MakeContigsCommand", "convertQual");
1890                 exit(1);
1891         }
1892 }
1893
1894 //**********************************************************************************************************************
1895
1896
1897
1898