2 // makecontigscommand.cpp
5 // Created by Sarah Westcott on 5/15/12.
6 // Copyright (c) 2012 Schloss Lab. All rights reserved.
9 #include "makecontigscommand.h"
11 //**********************************************************************************************************************
12 vector<string> MakeContigsCommand::setParameters(){
14 CommandParameter pfastq("ffastq", "InputTypes", "", "", "FastaFastqFile", "FastaFastqFile", "fastqGroup","fasta-qfile",false,false,true); parameters.push_back(pfastq);
15 CommandParameter prfastq("rfastq", "InputTypes", "", "", "none", "none", "fastqGroup","fasta-qfile",false,false,true); parameters.push_back(prfastq);
16 CommandParameter pfasta("ffasta", "InputTypes", "", "", "FastaFastqFile", "FastaFastqFile", "fastaGroup","fasta",false,false,true); parameters.push_back(pfasta);
17 CommandParameter prfasta("rfasta", "InputTypes", "", "", "none", "none", "none","fastaGroup",false,false,true); parameters.push_back(prfasta);
18 CommandParameter pfqual("fqfile", "InputTypes", "", "", "none", "none", "qfileGroup","",false,false,true); parameters.push_back(pfqual);
19 CommandParameter prqual("rqfile", "InputTypes", "", "", "none", "none", "qfileGroup","",false,false,true); parameters.push_back(prqual);
20 CommandParameter pfile("file", "InputTypes", "", "", "FastaFastqFile", "FastaFastqFile", "none","fasta-qfile",false,false,true); parameters.push_back(pfile);
21 CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none","group",false,false,true); parameters.push_back(poligos);
22 CommandParameter pfindex("findex", "InputTypes", "", "", "none", "none", "none","",false,false,true); parameters.push_back(pfindex);
23 CommandParameter prindex("rindex", "InputTypes", "", "", "none", "none", "none","",false,false,true); parameters.push_back(prindex);
24 CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(ppdiffs);
25 CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(pbdiffs);
26 CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ptdiffs);
28 CommandParameter palign("align", "Multiple", "needleman-gotoh", "needleman", "", "", "","",false,false); parameters.push_back(palign);
29 CommandParameter pallfiles("allfiles", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pallfiles);
30 CommandParameter ptrimoverlap("trimoverlap", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(ptrimoverlap);
31 CommandParameter pmatch("match", "Number", "", "1.0", "", "", "","",false,false); parameters.push_back(pmatch);
32 CommandParameter pmismatch("mismatch", "Number", "", "-1.0", "", "", "","",false,false); parameters.push_back(pmismatch);
33 CommandParameter pgapopen("gapopen", "Number", "", "-2.0", "", "", "","",false,false); parameters.push_back(pgapopen);
34 CommandParameter pgapextend("gapextend", "Number", "", "-1.0", "", "", "","",false,false); parameters.push_back(pgapextend);
35 CommandParameter pthreshold("insert", "Number", "", "20", "", "", "","",false,false); parameters.push_back(pthreshold);
36 CommandParameter pdeltaq("deltaq", "Number", "", "6", "", "", "","",false,false); parameters.push_back(pdeltaq);
37 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
38 CommandParameter pformat("format", "Multiple", "sanger-illumina-solexa-illumina1.8+", "illumina1.8+", "", "", "","",false,false,true); parameters.push_back(pformat);
39 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
40 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
42 vector<string> myArray;
43 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
47 m->errorOut(e, "MakeContigsCommand", "setParameters");
51 //**********************************************************************************************************************
52 string MakeContigsCommand::getHelpString(){
54 string helpString = "";
55 helpString += "The make.contigs command reads a file, forward fastq file and a reverse fastq file or forward fasta and reverse fasta files and outputs new fasta. \n";
56 helpString += "If an oligos file is provided barcodes and primers will be trimmed, and a group file will be created.\n";
57 helpString += "If a forward index or reverse index file is provided barcodes be trimmed, and a group file will be created. The oligos parameter is required if an index file is given.\n";
58 helpString += "The make.contigs command parameters are file, ffastq, rfastq, ffasta, rfasta, fqfile, rqfile, oligos, findex, rindex, format, tdiffs, bdiffs, pdiffs, align, match, mismatch, gapopen, gapextend, insert, deltaq, allfiles and processors.\n";
59 helpString += "The ffastq and rfastq, file, or ffasta and rfasta parameters are required.\n";
60 helpString += "The file parameter is 2, 3 or 4 column file containing the forward fastq files in the first column and their matching reverse fastq files in the second column, or a groupName then forward fastq file and reverse fastq file, or forward fastq file then reverse fastq then forward index and reverse index file. If you only have one index file add 'none' for the other one. Mothur will process each pair and create a combined fasta and report file with all the sequences.\n";
61 helpString += "The ffastq and rfastq parameters are used to provide a forward fastq and reverse fastq file to process. If you provide one, you must provide the other.\n";
62 helpString += "The ffasta and rfasta parameters are used to provide a forward fasta and reverse fasta file to process. If you provide one, you must provide the other.\n";
63 helpString += "The fqfile and rqfile parameters are used to provide a forward quality and reverse quality files to process with the ffasta and rfasta parameters. If you provide one, you must provide the other.\n";
64 helpString += "The format parameter is used to indicate whether your sequences are sanger, solexa, illumina1.8+ or illumina, default=illumina1.8+.\n";
65 helpString += "The findex and rindex parameters are used to provide a forward index and reverse index files to process. \n";
66 helpString += "The align parameter allows you to specify the alignment method to use. Your options are: gotoh and needleman. The default is needleman.\n";
67 helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n";
68 helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";
69 helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
70 //helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n";
71 //helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n";
72 helpString += "The match parameter allows you to specify the bonus for having the same base. The default is 1.0.\n";
73 helpString += "The mistmatch parameter allows you to specify the penalty for having different bases. The default is -1.0.\n";
74 helpString += "The deltaq parameter allows you to specify the delta allowed between quality scores of a mismatched base. For example in the overlap, if deltaq=5 and in the alignment seqA, pos 200 has a quality score of 30 and the same position in seqB has a quality score of 20, you take the base from seqA (30-20 >= 5). If the quality score in seqB is 28 then the base in the consensus will be an N (30-28<5) The default is 6.\n";
75 helpString += "The gapopen parameter allows you to specify the penalty for opening a gap in an alignment. The default is -2.0.\n";
76 helpString += "The gapextend parameter allows you to specify the penalty for extending a gap in an alignment. The default is -1.0.\n";
77 helpString += "The insert parameter allows you to set a quality scores threshold. In the case where we are trying to decide whether to keep a base or remove it because the base is compared to a gap in the other fragment, if the base has a quality score equal to or below the threshold we eliminate it. Default=20.\n";
78 helpString += "The processors parameter allows you to specify how many processors you would like to use. The default is 1. \n";
79 helpString += "The allfiles parameter will create separate group and fasta file for each grouping. The default is F.\n";
81 helpString += "The trimoverlap parameter allows you to trim the sequences to only the overlapping section. The default is F.\n";
82 helpString += "The make.contigs command should be in the following format: \n";
83 helpString += "make.contigs(ffastq=yourForwardFastqFile, rfastq=yourReverseFastqFile, align=yourAlignmentMethod) \n";
84 helpString += "Note: No spaces between parameter labels (i.e. ffastq), '=' and parameters (i.e.yourForwardFastqFile).\n";
88 m->errorOut(e, "MakeContigsCommand", "getHelpString");
92 //**********************************************************************************************************************
93 string MakeContigsCommand::getOutputPattern(string type) {
97 if (type == "fasta") { pattern = "[filename],[tag],contigs.fasta"; }
98 else if (type == "group") { pattern = "[filename],[tag],contigs.groups"; }
99 else if (type == "report") { pattern = "[filename],[tag],contigs.report"; }
100 else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
104 catch(exception& e) {
105 m->errorOut(e, "MakeContigsCommand", "getOutputPattern");
109 //**********************************************************************************************************************
110 MakeContigsCommand::MakeContigsCommand(){
112 abort = true; calledHelp = true;
114 vector<string> tempOutNames;
115 outputTypes["fasta"] = tempOutNames;
116 outputTypes["group"] = tempOutNames;
117 outputTypes["report"] = tempOutNames;
119 catch(exception& e) {
120 m->errorOut(e, "MakeContigsCommand", "MakeContigsCommand");
124 //**********************************************************************************************************************
125 MakeContigsCommand::MakeContigsCommand(string option) {
127 abort = false; calledHelp = false;
128 createFileGroup = false; createOligosGroup = false;
130 //allow user to run help
131 if(option == "help") { help(); abort = true; calledHelp = true; }
132 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
135 vector<string> myArray = setParameters();
137 OptionParser parser(option);
138 map<string, string> parameters = parser.getParameters();
140 ValidParameters validParameter("pairwise.seqs");
141 map<string, string>::iterator it;
143 //check to make sure all parameters are valid for command
144 for (it = parameters.begin(); it != parameters.end(); it++) {
145 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
148 //initialize outputTypes
149 vector<string> tempOutNames;
150 outputTypes["fasta"] = tempOutNames;
151 outputTypes["report"] = tempOutNames;
152 outputTypes["group"] = tempOutNames;
155 //if the user changes the input directory command factory will send this info to us in the output parameter
156 string inputDir = validParameter.validFile(parameters, "inputdir", false);
157 if (inputDir == "not found"){ inputDir = ""; }
160 it = parameters.find("ffastq");
161 //user has given a template file
162 if(it != parameters.end()){
163 path = m->hasPath(it->second);
164 //if the user has not given a path then, add inputdir. else leave path alone.
165 if (path == "") { parameters["ffastq"] = inputDir + it->second; }
168 it = parameters.find("rfastq");
169 //user has given a template file
170 if(it != parameters.end()){
171 path = m->hasPath(it->second);
172 //if the user has not given a path then, add inputdir. else leave path alone.
173 if (path == "") { parameters["rfastq"] = inputDir + it->second; }
176 it = parameters.find("ffasta");
177 //user has given a template file
178 if(it != parameters.end()){
179 path = m->hasPath(it->second);
180 //if the user has not given a path then, add inputdir. else leave path alone.
181 if (path == "") { parameters["ffasta"] = inputDir + it->second; }
184 it = parameters.find("rfasta");
185 //user has given a template file
186 if(it != parameters.end()){
187 path = m->hasPath(it->second);
188 //if the user has not given a path then, add inputdir. else leave path alone.
189 if (path == "") { parameters["rfasta"] = inputDir + it->second; }
192 it = parameters.find("fqfile");
193 //user has given a template file
194 if(it != parameters.end()){
195 path = m->hasPath(it->second);
196 //if the user has not given a path then, add inputdir. else leave path alone.
197 if (path == "") { parameters["fqfile"] = inputDir + it->second; }
200 it = parameters.find("rqfile");
201 //user has given a template file
202 if(it != parameters.end()){
203 path = m->hasPath(it->second);
204 //if the user has not given a path then, add inputdir. else leave path alone.
205 if (path == "") { parameters["rqfile"] = inputDir + it->second; }
208 it = parameters.find("file");
209 //user has given a template file
210 if(it != parameters.end()){
211 path = m->hasPath(it->second);
212 //if the user has not given a path then, add inputdir. else leave path alone.
213 if (path == "") { parameters["file"] = inputDir + it->second; }
216 it = parameters.find("oligos");
217 //user has given a template file
218 if(it != parameters.end()){
219 path = m->hasPath(it->second);
220 //if the user has not given a path then, add inputdir. else leave path alone.
221 if (path == "") { parameters["oligos"] = inputDir + it->second; }
224 it = parameters.find("findex");
225 //user has given a template file
226 if(it != parameters.end()){
227 path = m->hasPath(it->second);
228 //if the user has not given a path then, add inputdir. else leave path alone.
229 if (path == "") { parameters["findex"] = inputDir + it->second; }
232 it = parameters.find("rindex");
233 //user has given a template file
234 if(it != parameters.end()){
235 path = m->hasPath(it->second);
236 //if the user has not given a path then, add inputdir. else leave path alone.
237 if (path == "") { parameters["rindex"] = inputDir + it->second; }
241 ffastqfile = validParameter.validFile(parameters, "ffastq", true);
242 if (ffastqfile == "not open") { abort = true; }
243 else if (ffastqfile == "not found") { ffastqfile = ""; }
245 rfastqfile = validParameter.validFile(parameters, "rfastq", true);
246 if (rfastqfile == "not open") { abort = true; }
247 else if (rfastqfile == "not found") { rfastqfile = ""; }
249 ffastafile = validParameter.validFile(parameters, "ffasta", true);
250 if (ffastafile == "not open") { abort = true; }
251 else if (ffastafile == "not found") { ffastafile = ""; }
253 rfastafile = validParameter.validFile(parameters, "rfasta", true);
254 if (rfastafile == "not open") { abort = true; }
255 else if (rfastafile == "not found") { rfastafile = ""; }
257 fqualfile = validParameter.validFile(parameters, "fqfile", true);
258 if (fqualfile == "not open") { abort = true; }
259 else if (fqualfile == "not found") { fqualfile = ""; }
261 rqualfile = validParameter.validFile(parameters, "rqfile", true);
262 if (rqualfile == "not open") { abort = true; }
263 else if (rqualfile == "not found") { rqualfile = ""; }
265 file = validParameter.validFile(parameters, "file", true);
266 if (file == "not open") { abort = true; }
267 else if (file == "not found") { file = ""; }
270 if ((file == "") && (ffastafile == "") && (ffastqfile == "")) { abort = true; m->mothurOut("[ERROR]: The file, ffastq and rfastq or ffasta and rfasta parameters are required.\n"); }
271 if ((file != "") && ((ffastafile != "") || (ffastqfile != ""))) { abort = true; m->mothurOut("[ERROR]: The file, ffastq and rfastq or ffasta and rfasta parameters are required.\n"); }
272 if ((ffastqfile != "") && (rfastqfile == "")) { abort = true; m->mothurOut("[ERROR]: If you provide use the ffastq, you must provide a rfastq file.\n"); }
273 if ((ffastqfile == "") && (rfastqfile != "")) { abort = true; m->mothurOut("[ERROR]: If you provide use the rfastq, you must provide a ffastq file.\n"); }
274 if ((ffastafile != "") && (rfastafile == "")) { abort = true; m->mothurOut("[ERROR]: If you provide use the ffasta, you must provide a rfasta file.\n"); }
275 if ((ffastafile == "") && (rfastafile != "")) { abort = true; m->mothurOut("[ERROR]: If you provide use the rfasta, you must provide a ffasta file.\n"); }
276 if ((fqualfile != "") && (rqualfile == "")) { abort = true; m->mothurOut("[ERROR]: If you provide use the fqfile, you must provide a rqfile file.\n"); }
277 if ((fqualfile == "") && (rqualfile != "")) { abort = true; m->mothurOut("[ERROR]: If you provide use the rqfile, you must provide a fqfile file.\n"); }
278 if (((fqualfile != "") || (rqualfile != "")) && ((ffastafile == "") || (rfastafile == ""))) {
279 abort = true; m->mothurOut("[ERROR]: If you provide use the rqfile or fqfile file, you must provide the ffasta and rfasta parameters.\n");
282 oligosfile = validParameter.validFile(parameters, "oligos", true);
283 if (oligosfile == "not found") { oligosfile = ""; }
284 else if(oligosfile == "not open") { abort = true; }
285 else { m->setOligosFile(oligosfile); }
287 findexfile = validParameter.validFile(parameters, "findex", true);
288 if (findexfile == "not found") { findexfile = ""; }
289 else if(findexfile == "not open") { abort = true; }
291 rindexfile = validParameter.validFile(parameters, "rindex", true);
292 if (rindexfile == "not found") { rindexfile = ""; }
293 else if(rindexfile == "not open") { abort = true; }
295 if ((rindexfile != "") || (findexfile != "")) {
296 if (oligosfile == ""){
297 oligosfile = m->getOligosFile();
298 if (oligosfile != "") { m->mothurOut("Using " + oligosfile + " as input file for the oligos parameter.\n"); }
300 m->mothurOut("You need to provide an oligos file if you are going to use an index file.\n"); abort = true;
304 //can only use an index file with the fastq parameters not fasta and qual
305 if ((ffastafile != "") || (rfastafile != "")) {
306 m->mothurOut("[ERROR]: You can only use an index file with the fastq parameters or the file option.\n"); abort = true;
310 //if the user changes the output directory command factory will send this info to us in the output parameter
311 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
316 //check for optional parameter and set defaults
317 // ...at some point should added some additional type checking...
319 temp = validParameter.validFile(parameters, "match", false); if (temp == "not found"){ temp = "1.0"; }
320 m->mothurConvert(temp, match);
322 temp = validParameter.validFile(parameters, "mismatch", false); if (temp == "not found"){ temp = "-1.0"; }
323 m->mothurConvert(temp, misMatch);
324 if (misMatch > 0) { m->mothurOut("[ERROR]: mismatch must be negative.\n"); abort=true; }
326 temp = validParameter.validFile(parameters, "gapopen", false); if (temp == "not found"){ temp = "-2.0"; }
327 m->mothurConvert(temp, gapOpen);
328 if (gapOpen > 0) { m->mothurOut("[ERROR]: gapopen must be negative.\n"); abort=true; }
330 temp = validParameter.validFile(parameters, "gapextend", false); if (temp == "not found"){ temp = "-1.0"; }
331 m->mothurConvert(temp, gapExtend);
332 if (gapExtend > 0) { m->mothurOut("[ERROR]: gapextend must be negative.\n"); abort=true; }
334 temp = validParameter.validFile(parameters, "insert", false); if (temp == "not found"){ temp = "20"; }
335 m->mothurConvert(temp, insert);
336 if ((insert < 0) || (insert > 40)) { m->mothurOut("[ERROR]: insert must be between 0 and 40.\n"); abort=true; }
338 temp = validParameter.validFile(parameters, "deltaq", false); if (temp == "not found"){ temp = "6"; }
339 m->mothurConvert(temp, deltaq);
341 temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
342 m->setProcessors(temp);
343 m->mothurConvert(temp, processors);
345 temp = validParameter.validFile(parameters, "bdiffs", false); if (temp == "not found") { temp = "0"; }
346 m->mothurConvert(temp, bdiffs);
348 temp = validParameter.validFile(parameters, "pdiffs", false); if (temp == "not found") { temp = "0"; }
349 m->mothurConvert(temp, pdiffs);
351 // temp = validParameter.validFile(parameters, "ldiffs", false); if (temp == "not found") { temp = "0"; }
352 // m->mothurConvert(temp, ldiffs);
355 // temp = validParameter.validFile(parameters, "sdiffs", false); if (temp == "not found") { temp = "0"; }
356 // m->mothurConvert(temp, sdiffs);
359 temp = validParameter.validFile(parameters, "tdiffs", false); if (temp == "not found") { int tempTotal = pdiffs + bdiffs; temp = toString(tempTotal); }
360 m->mothurConvert(temp, tdiffs);
362 if(tdiffs == 0){ tdiffs = bdiffs + pdiffs; } //+ ldiffs + sdiffs;
364 temp = validParameter.validFile(parameters, "allfiles", false); if (temp == "not found") { temp = "F"; }
365 allFiles = m->isTrue(temp);
368 temp = validParameter.validFile(parameters, "trimoverlap", false); if (temp == "not found") { temp = "F"; }
369 trimOverlap = m->isTrue(temp);
371 align = validParameter.validFile(parameters, "align", false); if (align == "not found"){ align = "needleman"; }
372 if ((align != "needleman") && (align != "gotoh")) { m->mothurOut(align + " is not a valid alignment method. Options are needleman or gotoh. I will use needleman."); m->mothurOutEndLine(); align = "needleman"; }
374 format = validParameter.validFile(parameters, "format", false); if (format == "not found"){ format = "illumina1.8+"; }
376 if ((format != "sanger") && (format != "illumina") && (format != "illumina1.8+") && (format != "solexa")) {
377 m->mothurOut(format + " is not a valid format. Your format choices are sanger, solexa, illumina1.8+ and illumina, aborting." ); m->mothurOutEndLine();
381 //fill convert table - goes from solexa to sanger. Used fq_all2std.pl as a reference.
382 for (int i = -64; i < 65; i++) {
383 char temp = (char) ((int)(33 + 10*log(1+pow(10,(i/10.0)))/log(10)+0.499));
384 convertTable.push_back(temp);
389 catch(exception& e) {
390 m->errorOut(e, "MakeContigsCommand", "MakeContigsCommand");
394 //**********************************************************************************************************************
395 int MakeContigsCommand::execute(){
397 if (abort == true) { if (calledHelp) { return 0; } return 2; }
399 //read ffastq and rfastq files creating fasta and qual files.
400 //this function will create a forward and reverse, fasta and qual files for each processor.
401 //files has an entry for each processor. files[i][0] = forwardFasta, files[i][1] = forwardQual, files[i][2] = reverseFasta, files[i][3] = reverseQual. filesToProcess is for each filepair in the file parameter file. for ffastq and rfastq this will be size 1.
402 unsigned long int numReads = 0;
403 int start = time(NULL);
405 m->mothurOut("Reading fastq data...\n");
406 vector < vector< vector<string> > > filesToProcess = preProcessData(numReads);
407 m->mothurOut("Done.\n");
409 if (m->control_pressed) { return 0; }
411 map<string, string> cvars;
412 string compOutputDir = outputDir;
413 if (outputDir == "") { compOutputDir = m->hasPath(file); }
414 cvars["[filename]"] = compOutputDir + m->getRootName(m->getSimpleName(file));
416 string compositeGroupFile = getOutputFileName("group",cvars);
417 cvars["[tag]"] = "trim";
418 string compositeFastaFile = getOutputFileName("fasta",cvars);
419 cvars["[tag]"] = "scrap";
420 string compositeScrapFastaFile = getOutputFileName("fasta",cvars);
422 string compositeMisMatchFile = getOutputFileName("report",cvars);
424 if (filesToProcess.size() > 1) { //clear files for append below
425 ofstream outCTFasta, outCTQual, outCSFasta, outCSQual, outCMisMatch;
426 m->openOutputFile(compositeFastaFile, outCTFasta); outCTFasta.close();
427 m->openOutputFile(compositeScrapFastaFile, outCSFasta); outCSFasta.close();
428 m->openOutputFile(compositeMisMatchFile, outCMisMatch); outCMisMatch.close();
429 outputNames.push_back(compositeFastaFile); outputTypes["fasta"].push_back(compositeFastaFile);
430 outputNames.push_back(compositeMisMatchFile); outputTypes["report"].push_back(compositeMisMatchFile);
431 outputNames.push_back(compositeScrapFastaFile); outputTypes["fasta"].push_back(compositeScrapFastaFile);
434 map<string, int> totalGroupCounts;
436 for (int l = 0; l < filesToProcess.size(); l++) {
438 m->mothurOut("\n>>>>>\tProcessing " + filesToProcess[l][0][0] + " (file " + toString(l+1) + " of " + toString(filesToProcess.size()) + ")\t<<<<<\n");
442 vector<vector<string> > fastaFileNames;
443 createOligosGroup = false;
444 string outputGroupFileName;
445 map<string, string> variables;
446 string thisOutputDir = outputDir;
447 if (outputDir == "") { thisOutputDir = m->hasPath(filesToProcess[l][0][0]); }
448 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(filesToProcess[l][0][0]));
449 variables["[tag]"] = "";
450 if(oligosfile != ""){ createOligosGroup = getOligos(fastaFileNames, variables["[filename]"]); }
451 if (createOligosGroup || createFileGroup) {
452 outputGroupFileName = getOutputFileName("group",variables);
455 //give group in file file precedence
456 if (createFileGroup) { createOligosGroup = false; }
458 variables["[tag]"] = "trim";
459 string outFastaFile = getOutputFileName("fasta",variables);
460 variables["[tag]"] = "scrap";
461 string outScrapFastaFile = getOutputFileName("fasta",variables);
462 variables["[tag]"] = "";
463 string outMisMatchFile = getOutputFileName("report",variables);
465 m->mothurOut("Making contigs...\n");
466 createProcesses(filesToProcess[l], outFastaFile, outScrapFastaFile, outMisMatchFile, fastaFileNames, l);
468 //remove temp fasta and qual files
469 for (int i = 0; i < processors; i++) { for(int j = 0; j < filesToProcess[l][i].size(); j++) { m->mothurRemove(filesToProcess[l][i][j]); } }
471 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
474 map<string, string> uniqueFastaNames;// so we don't add the same groupfile multiple times
475 map<string, string>::iterator it;
476 set<string> namesToRemove;
477 for(int i=0;i<fastaFileNames.size();i++){
478 for(int j=0;j<fastaFileNames[0].size();j++){
479 if (fastaFileNames[i][j] != "") {
480 if (namesToRemove.count(fastaFileNames[i][j]) == 0) {
481 if(m->isBlank(fastaFileNames[i][j])){
482 m->mothurRemove(fastaFileNames[i][j]);
483 namesToRemove.insert(fastaFileNames[i][j]);
485 it = uniqueFastaNames.find(fastaFileNames[i][j]);
486 if (it == uniqueFastaNames.end()) {
487 uniqueFastaNames[fastaFileNames[i][j]] = barcodeNameVector[i];
495 //remove names for outputFileNames, just cleans up the output
496 vector<string> outputNames2;
497 for(int i = 0; i < outputNames.size(); i++) { if (namesToRemove.count(outputNames[i]) == 0) { outputNames2.push_back(outputNames[i]); } }
498 outputNames = outputNames2;
500 for (it = uniqueFastaNames.begin(); it != uniqueFastaNames.end(); it++) {
502 m->openInputFile(it->first, in);
505 string thisGroupName = thisOutputDir + m->getRootName(m->getSimpleName(it->first));
506 thisGroupName += getOutputFileName("group",variables); outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName);
507 m->openOutputFile(thisGroupName, out);
510 if (m->control_pressed) { break; }
512 Sequence currSeq(in); m->gobble(in);
513 out << currSeq.getName() << '\t' << it->second << endl;
520 if (createFileGroup || createOligosGroup) {
522 m->openOutputFile(outputGroupFileName, outGroup);
523 for (map<string, string>::iterator itGroup = groupMap.begin(); itGroup != groupMap.end(); itGroup++) {
524 outGroup << itGroup->first << '\t' << itGroup->second << endl;
529 if (filesToProcess.size() > 1) { //merge into large combo files
530 if (createFileGroup || createOligosGroup) {
533 m->openOutputFile(compositeGroupFile, outCGroup); outCGroup.close();
534 outputNames.push_back(compositeGroupFile); outputTypes["group"].push_back(compositeGroupFile);
536 m->appendFiles(outputGroupFileName, compositeGroupFile);
537 if (!allFiles) { m->mothurRemove(outputGroupFileName); }
538 else { outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName); }
540 for (map<string, int>::iterator itGroups = groupCounts.begin(); itGroups != groupCounts.end(); itGroups++) {
541 map<string, int>::iterator itTemp = totalGroupCounts.find(itGroups->first);
542 if (itTemp == totalGroupCounts.end()) { totalGroupCounts[itGroups->first] = itGroups->second; } //new group create it in totalGroups
543 else { itTemp->second += itGroups->second; } //existing group, update total
546 if (l == 0) { m->appendFiles(outMisMatchFile, compositeMisMatchFile); }
547 else { m->appendFilesWithoutHeaders(outMisMatchFile, compositeMisMatchFile); }
548 m->appendFiles(outFastaFile, compositeFastaFile);
549 m->appendFiles(outScrapFastaFile, compositeScrapFastaFile);
551 m->mothurRemove(outMisMatchFile);
552 m->mothurRemove(outFastaFile);
553 m->mothurRemove(outScrapFastaFile);
555 outputNames.push_back(outFastaFile); outputTypes["fasta"].push_back(outFastaFile);
556 outputNames.push_back(outScrapFastaFile); outputTypes["fasta"].push_back(outScrapFastaFile);
557 outputNames.push_back(outMisMatchFile); outputTypes["report"].push_back(outMisMatchFile);
560 totalGroupCounts = groupCounts;
561 outputNames.push_back(outFastaFile); outputTypes["fasta"].push_back(outFastaFile);
562 outputNames.push_back(outScrapFastaFile); outputTypes["fasta"].push_back(outScrapFastaFile);
563 outputNames.push_back(outMisMatchFile); outputTypes["report"].push_back(outMisMatchFile);
564 if (createFileGroup || createOligosGroup) {
565 outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName);
568 m->mothurOut("Done.\n");
571 m->mothurOut("It took " + toString(time(NULL) - start) + " secs to process " + toString(numReads) + " sequences.\n");
573 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
575 //output group counts
576 m->mothurOutEndLine();
578 if (totalGroupCounts.size() != 0) { m->mothurOut("Group count: \n"); }
579 for (map<string, int>::iterator it = totalGroupCounts.begin(); it != totalGroupCounts.end(); it++) {
580 total += it->second; m->mothurOut(it->first + "\t" + toString(it->second)); m->mothurOutEndLine();
582 if (total != 0) { m->mothurOut("Total of all groups is " + toString(total)); m->mothurOutEndLine(); }
584 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
586 string currentFasta = "";
587 itTypes = outputTypes.find("fasta");
588 if (itTypes != outputTypes.end()) {
589 if ((itTypes->second).size() != 0) { currentFasta = (itTypes->second)[0]; m->setFastaFile(currentFasta); }
592 string currentGroup = "";
593 itTypes = outputTypes.find("group");
594 if (itTypes != outputTypes.end()) {
595 if ((itTypes->second).size() != 0) { currentGroup = (itTypes->second)[0]; m->setGroupFile(currentGroup); }
598 //output files created by command
599 m->mothurOutEndLine();
600 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
601 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
602 m->mothurOutEndLine();
606 catch(exception& e) {
607 m->errorOut(e, "MakeContigsCommand", "execute");
611 //**********************************************************************************************************************
612 vector< vector< vector<string> > > MakeContigsCommand::preProcessData(unsigned long int& numReads) {
614 vector< vector< vector<string> > > filesToProcess;
616 if (ffastqfile != "") { //reading one file
617 vector< vector<string> > files = readFastqFiles(numReads, ffastqfile, rfastqfile, findexfile, rindexfile);
618 //adjust for really large processors or really small files
619 if (numReads == 0) { m->mothurOut("[ERROR]: no good reads.\n"); m->control_pressed = true; }
620 if (numReads < processors) {
621 for (int i = numReads; i < processors; i++) { for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); } files[i].clear(); }
622 files.resize(numReads);
623 processors = numReads;
625 filesToProcess.push_back(files);
626 }else if (file != "") { //reading multiple files
627 //return only valid pairs
628 vector< vector<string> > filePairsToProcess = readFileNames(file);
630 if (m->control_pressed) { return filesToProcess; }
632 if (filePairsToProcess.size() != 0) {
633 for (int i = 0; i < filePairsToProcess.size(); i++) {
635 if (m->control_pressed) { for (int l = 0; l < filesToProcess.size(); l++) { for (int k = 0; k < filesToProcess[l].size(); k++) { for(int j = 0; j < filesToProcess[l][k].size(); j++) { m->mothurRemove(filesToProcess[l][k][j]); } filesToProcess[l][k].clear(); } return filesToProcess; } }
637 unsigned long int thisFilesReads;
638 vector< vector<string> > files = readFastqFiles(thisFilesReads, filePairsToProcess[i][0], filePairsToProcess[i][1], filePairsToProcess[i][2], filePairsToProcess[i][3]);
640 //adjust for really large processors or really small files
641 if (thisFilesReads < processors) {
642 m->mothurOut("[ERROR]: " + filePairsToProcess[i][0] + " has less than " + toString(processors) + " good reads, skipping\n");
643 for (int k = 0; k < files.size(); k++) { for(int j = 0; j < files[k].size(); j++) { m->mothurRemove(files[k][j]); } files[k].clear(); }
644 //remove from file2Group if necassary
645 map<int, string> cFile2Group;
646 for (map<int, string>::iterator it = file2Group.begin(); it != file2Group.end(); it++) {
647 if ((it->first) < i) { cFile2Group[it->first] = it->second; }
648 else if ((it->first) == i) { } //do nothing, we removed files for i
649 else { cFile2Group[(it->first-1)] = it->second; } //adjust files because i was removed
651 file2Group = cFile2Group;
653 filesToProcess.push_back(files);
654 numReads += thisFilesReads;
658 if (numReads == 0) { m->control_pressed = true; }
660 }else if (ffastafile != "") {
661 vector< vector<string> > files = readFastaFiles(numReads, ffastafile, rfastafile);
662 //adjust for really large processors or really small files
663 if (numReads == 0) { m->mothurOut("[ERROR]: no good reads.\n"); m->control_pressed = true; }
664 if (numReads < processors) {
665 for (int i = numReads; i < processors; i++) { for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); } files[i].clear(); }
666 files.resize(numReads);
667 processors = numReads;
669 filesToProcess.push_back(files);
670 }else { m->control_pressed = true; } //should not get here
672 return filesToProcess;
674 catch(exception& e) {
675 m->errorOut(e, "MakeContigsCommand", "preProcessData");
679 //**********************************************************************************************************************
680 int MakeContigsCommand::createProcesses(vector< vector<string> > files, string outputFasta, string outputScrapFasta, string outputMisMatches, vector<vector<string> > fastaFileNames, int index) {
683 vector<int> processIDS;
685 map<int, string>::iterator it = file2Group.find(index);
686 if (it != file2Group.end()) { group = it->second; }
688 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
691 //loop through and create all the processes you want
692 while (process != processors-1) {
696 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
699 vector<vector<string> > tempFASTAFileNames = fastaFileNames;
704 for(int i=0;i<tempFASTAFileNames.size();i++){
705 for(int j=0;j<tempFASTAFileNames[i].size();j++){
706 if (tempFASTAFileNames[i][j] != "") {
707 tempFASTAFileNames[i][j] += toString(getpid()) + ".temp";
708 m->openOutputFile(tempFASTAFileNames[i][j], temp); temp.close();
714 num = driver(files[process],
715 outputFasta + toString(getpid()) + ".temp",
716 outputScrapFasta + toString(getpid()) + ".temp",
717 outputMisMatches + toString(getpid()) + ".temp",
718 tempFASTAFileNames, process, group);
720 //pass groupCounts to parent
722 string tempFile = toString(getpid()) + ".num.temp";
723 m->openOutputFile(tempFile, out);
725 if (createFileGroup || createOligosGroup) {
726 out << groupCounts.size() << endl;
728 for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
729 out << it->first << '\t' << it->second << endl;
732 out << groupMap.size() << endl;
733 for (map<string, string>::iterator it = groupMap.begin(); it != groupMap.end(); it++) {
734 out << it->first << '\t' << it->second << endl;
741 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
742 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
748 m->openOutputFile(outputFasta, temp); temp.close();
749 m->openOutputFile(outputScrapFasta, temp); temp.close();
752 num = driver(files[processors-1], outputFasta, outputScrapFasta, outputMisMatches, fastaFileNames, processors-1, group);
754 //force parent to wait until all the processes are done
755 for (int i=0;i<processIDS.size();i++) {
756 int temp = processIDS[i];
760 for (int i = 0; i < processIDS.size(); i++) {
762 string tempFile = toString(processIDS[i]) + ".num.temp";
763 m->openInputFile(tempFile, in);
765 in >> tempNum; num += tempNum; m->gobble(in);
767 if (createFileGroup || createOligosGroup) {
769 in >> tempNum; m->gobble(in);
772 for (int j = 0; j < tempNum; j++) {
774 in >> group >> groupNum; m->gobble(in);
776 map<string, int>::iterator it = groupCounts.find(group);
777 if (it == groupCounts.end()) { groupCounts[group] = groupNum; }
778 else { groupCounts[it->first] += groupNum; }
781 in >> tempNum; m->gobble(in);
783 for (int j = 0; j < tempNum; j++) {
784 string group, seqName;
785 in >> seqName >> group; m->gobble(in);
787 map<string, string>::iterator it = groupMap.find(seqName);
788 if (it == groupMap.end()) { groupMap[seqName] = group; }
789 else { m->mothurOut("[ERROR]: " + seqName + " is in your fasta file more than once. Sequence names must be unique. please correct.\n"); }
793 in.close(); m->mothurRemove(tempFile);
797 //////////////////////////////////////////////////////////////////////////////////////////////////////
798 //Windows version shared memory, so be careful when passing variables through the contigsData struct.
799 //Above fork() will clone, so memory is separate, but that's not the case with windows,
800 //////////////////////////////////////////////////////////////////////////////////////////////////////
802 vector<contigsData*> pDataArray;
803 DWORD dwThreadIdArray[processors-1];
804 HANDLE hThreadArray[processors-1];
806 //Create processor worker threads.
807 for( int h=0; h<processors-1; h++ ){
808 string extension = "";
809 if (h != 0) { extension = toString(h) + ".temp"; processIDS.push_back(h); }
810 vector<vector<string> > tempFASTAFileNames = fastaFileNames;
815 for(int i=0;i<tempFASTAFileNames.size();i++){
816 for(int j=0;j<tempFASTAFileNames[i].size();j++){
817 if (tempFASTAFileNames[i][j] != "") {
818 tempFASTAFileNames[i][j] += extension;
819 m->openOutputFile(tempFASTAFileNames[i][j], temp); temp.close();
825 contigsData* tempcontig = new contigsData(group, files[h], (outputFasta + extension), (outputScrapFasta + extension), (outputMisMatches + extension), align, m, match, misMatch, gapOpen, gapExtend, insert, deltaq, barcodes, primers, tempFASTAFileNames, barcodeNameVector, primerNameVector, pdiffs, bdiffs, tdiffs, createOligosGroup, createFileGroup, allFiles, trimOverlap, h);
826 pDataArray.push_back(tempcontig);
828 hThreadArray[h] = CreateThread(NULL, 0, MyContigsThreadFunction, pDataArray[h], 0, &dwThreadIdArray[h]);
831 vector<vector<string> > tempFASTAFileNames = fastaFileNames;
835 string extension = toString(processors-1) + ".temp";
837 for(int i=0;i<tempFASTAFileNames.size();i++){
838 for(int j=0;j<tempFASTAFileNames[i].size();j++){
839 if (tempFASTAFileNames[i][j] != "") {
840 tempFASTAFileNames[i][j] += extension;
841 m->openOutputFile(tempFASTAFileNames[i][j], temp); temp.close();
849 m->openOutputFile(outputFasta, temp); temp.close();
850 m->openOutputFile(outputScrapFasta, temp); temp.close();
853 processIDS.push_back(processors-1);
854 num = driver(files[processors-1], (outputFasta+ toString(processors-1) + ".temp"), (outputScrapFasta+ toString(processors-1) + ".temp"), (outputMisMatches+ toString(processors-1) + ".temp"), tempFASTAFileNames, processors-1, group);
856 //Wait until all threads have terminated.
857 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
859 //Close all thread handles and free memory allocations.
860 for(int i=0; i < pDataArray.size(); i++){
861 num += pDataArray[i]->count;
862 if (!pDataArray[i]->done) {
863 m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of sequences assigned to it, quitting. \n"); m->control_pressed = true;
865 for (map<string, int>::iterator it = pDataArray[i]->groupCounts.begin(); it != pDataArray[i]->groupCounts.end(); it++) {
866 map<string, int>::iterator it2 = groupCounts.find(it->first);
867 if (it2 == groupCounts.end()) { groupCounts[it->first] = it->second; }
868 else { groupCounts[it->first] += it->second; }
870 for (map<string, string>::iterator it = pDataArray[i]->groupMap.begin(); it != pDataArray[i]->groupMap.end(); it++) {
871 map<string, string>::iterator it2 = groupMap.find(it->first);
872 if (it2 == groupMap.end()) { groupMap[it->first] = it->second; }
873 else { m->mothurOut("[ERROR]: " + it->first + " is in your fasta file more than once. Sequence names must be unique. please correct.\n"); }
875 CloseHandle(hThreadArray[i]);
876 delete pDataArray[i];
881 for (int i = 0; i < processIDS.size(); i++) {
882 m->appendFiles((outputFasta + toString(processIDS[i]) + ".temp"), outputFasta);
883 m->mothurRemove((outputFasta + toString(processIDS[i]) + ".temp"));
885 m->appendFiles((outputScrapFasta + toString(processIDS[i]) + ".temp"), outputScrapFasta);
886 m->mothurRemove((outputScrapFasta + toString(processIDS[i]) + ".temp"));
888 m->appendFilesWithoutHeaders((outputMisMatches + toString(processIDS[i]) + ".temp"), outputMisMatches);
889 m->mothurRemove((outputMisMatches + toString(processIDS[i]) + ".temp"));
892 for(int j=0;j<fastaFileNames.size();j++){
893 for(int k=0;k<fastaFileNames[j].size();k++){
894 if (fastaFileNames[j][k] != "") {
895 m->appendFiles((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"), fastaFileNames[j][k]);
896 m->mothurRemove((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"));
905 catch(exception& e) {
906 m->errorOut(e, "MakeContigsCommand", "createProcesses");
910 //**********************************************************************************************************************
911 int MakeContigsCommand::driver(vector<string> files, string outputFasta, string outputScrapFasta, string outputMisMatches, vector<vector<string> > fastaFileNames, int process, string group){
914 Alignment* alignment;
915 if(align == "gotoh") { alignment = new GotohOverlap(gapOpen, gapExtend, match, misMatch, longestBase); }
916 else if(align == "needleman") { alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase); }
919 string thisffastafile = files[0];
920 string thisfqualfile = files[1];
921 string thisrfastafile = files[2];
922 string thisrqualfile = files[3];
923 string thisfindexfile = files[4];
924 string thisrindexfile = files[5];
926 if (m->debug) { m->mothurOut("[DEBUG]: ffasta = " + thisffastafile + ".\n[DEBUG]: fqual = " + thisfqualfile + ".\n[DEBUG]: rfasta = " + thisrfastafile + ".\n[DEBUG]: rqual = " + thisrqualfile + ".\n[DEBUG]: findex = " + thisfindexfile + ".\n[DEBUG]: rindex = " + thisrindexfile + ".\n"); }
928 ifstream inFFasta, inRFasta, inFQual, inRQual, inFIndex, inRIndex;
929 ofstream outFasta, outMisMatch, outScrapFasta;
930 m->openInputFile(thisffastafile, inFFasta);
931 m->openInputFile(thisrfastafile, inRFasta);
932 if (thisfqualfile != "") {
933 m->openInputFile(thisfqualfile, inFQual);
934 m->openInputFile(thisrqualfile, inRQual);
937 if (thisfindexfile != "") { m->openInputFile(thisfindexfile, inFIndex); }
938 if (thisrindexfile != "") { m->openInputFile(thisrindexfile, inRIndex); }
940 m->openOutputFile(outputFasta, outFasta);
941 m->openOutputFile(outputScrapFasta, outScrapFasta);
942 m->openOutputFile(outputMisMatches, outMisMatch);
943 outMisMatch << "Name\tLength\tOverlap_Length\tOverlap_Start\tOverlap_End\tMisMatches\tNum_Ns\n";
945 TrimOligos trimOligos(pdiffs, bdiffs, 0, 0, primers, barcodes);
947 while ((!inFFasta.eof()) && (!inRFasta.eof())) {
949 if (m->control_pressed) { break; }
952 string trashCode = "";
953 int currentSeqsDiffs = 0;
955 //read seqs and quality info
956 Sequence fSeq(inFFasta); m->gobble(inFFasta);
957 Sequence rSeq(inRFasta); m->gobble(inRFasta);
958 QualityScores* fQual = NULL; QualityScores* rQual = NULL;
959 if (thisfqualfile != "") {
960 fQual = new QualityScores(inFQual); m->gobble(inFQual);
961 rQual = new QualityScores(inRQual); m->gobble(inRQual);
963 Sequence findexBarcode("findex", "NONE"); Sequence rindexBarcode("rindex", "NONE");
964 if (thisfindexfile != "") {
965 Sequence temp(inFIndex); m->gobble(inFIndex);
966 findexBarcode.setAligned(temp.getAligned());
969 if (thisrindexfile != "") {
970 Sequence temp(inRIndex); m->gobble(inRIndex);
971 rindexBarcode.setAligned(temp.getAligned());
974 int barcodeIndex = 0;
977 if(barcodes.size() != 0){
978 if (thisfqualfile != "") {
979 if ((thisfindexfile != "") || (thisrindexfile != "")) {
980 success = trimOligos.stripBarcode(findexBarcode, rindexBarcode, *fQual, *rQual, barcodeIndex);
982 success = trimOligos.stripBarcode(fSeq, rSeq, *fQual, *rQual, barcodeIndex);
985 success = trimOligos.stripBarcode(fSeq, rSeq, barcodeIndex);
987 if(success > bdiffs) { trashCode += 'b'; }
988 else{ currentSeqsDiffs += success; }
991 if(primers.size() != 0){
992 if (thisfqualfile != "") {
993 success = trimOligos.stripForward(fSeq, rSeq, *fQual, *rQual, primerIndex);
995 success = trimOligos.stripForward(fSeq, rSeq, primerIndex);
997 if(success > pdiffs) { trashCode += 'f'; }
998 else{ currentSeqsDiffs += success; }
1001 if (currentSeqsDiffs > tdiffs) { trashCode += 't'; }
1003 //flip the reverse reads
1004 rSeq.reverseComplement();
1005 if (thisfqualfile != "") { rQual->flipQScores(); }
1008 alignment->align(fSeq.getUnaligned(), rSeq.getUnaligned());
1009 map<int, int> ABaseMap = alignment->getSeqAAlnBaseMap();
1010 map<int, int> BBaseMap = alignment->getSeqBAlnBaseMap();
1011 fSeq.setAligned(alignment->getSeqAAln());
1012 rSeq.setAligned(alignment->getSeqBAln());
1013 int length = fSeq.getAligned().length();
1015 //traverse alignments merging into one contiguous seq
1017 int numMismatches = 0;
1018 string seq1 = fSeq.getAligned();
1019 string seq2 = rSeq.getAligned();
1020 vector<int> scores1, scores2;
1021 if (thisfqualfile != "") {
1022 scores1 = fQual->getQualityScores();
1023 scores2 = rQual->getQualityScores();
1024 delete fQual; delete rQual;
1027 // if (num < 5) { cout << fSeq.getStartPos() << '\t' << fSeq.getEndPos() << '\t' << rSeq.getStartPos() << '\t' << rSeq.getEndPos() << endl; }
1028 int overlapStart = fSeq.getStartPos();
1029 int seq2Start = rSeq.getStartPos();
1031 //bigger of the 2 starting positions is the location of the overlapping start
1032 if (overlapStart < seq2Start) { //seq2 starts later so take from 0 to seq2Start from seq1
1033 overlapStart = seq2Start;
1034 for (int i = 0; i < overlapStart; i++) { contig += seq1[i]; }
1035 }else { //seq1 starts later so take from 0 to overlapStart from seq2
1036 for (int i = 0; i < overlapStart; i++) { contig += seq2[i]; }
1039 int seq1End = fSeq.getEndPos();
1040 int seq2End = rSeq.getEndPos();
1041 int overlapEnd = seq1End;
1042 if (seq2End < overlapEnd) { overlapEnd = seq2End; } //smallest end position is where overlapping ends
1044 int oStart = contig.length();
1045 //cout << fSeq.getAligned() << endl; cout << rSeq.getAligned() << endl;
1046 for (int i = overlapStart; i < overlapEnd; i++) {
1047 //cout << seq1[i] << ' ' << seq2[i] << ' ' << scores1[ABaseMap[i]] << ' ' << scores2[BBaseMap[i]] << endl;
1048 if (seq1[i] == seq2[i]) { //match, add base and choose highest score
1050 }else if (((seq1[i] == '.') || (seq1[i] == '-')) && ((seq2[i] != '-') && (seq2[i] != '.'))) { //seq1 is a gap and seq2 is a base, choose seq2, unless quality score for base is below insert. In that case eliminate base
1051 if (thisfqualfile != "") {
1052 if (scores2[BBaseMap[i]] <= insert) { } //
1053 else { contig += seq2[i]; }
1054 }else { contig += seq2[i]; } //with no quality info, then we keep it?
1055 }else if (((seq2[i] == '.') || (seq2[i] == '-')) && ((seq1[i] != '-') && (seq1[i] != '.'))) { //seq2 is a gap and seq1 is a base, choose seq1, unless quality score for base is below insert. In that case eliminate base
1056 if (thisfqualfile != "") {
1057 if (scores1[ABaseMap[i]] <= insert) { } //
1058 else { contig += seq1[i]; }
1059 }else { contig += seq1[i]; } //with no quality info, then we keep it?
1060 }else if (((seq1[i] != '-') && (seq1[i] != '.')) && ((seq2[i] != '-') && (seq2[i] != '.'))) { //both bases choose one with better quality
1061 if (thisfqualfile != "") {
1062 if (abs(scores1[ABaseMap[i]] - scores2[BBaseMap[i]]) >= deltaq) { //is the difference in qual scores >= deltaq, if yes choose base with higher score
1064 if (scores1[ABaseMap[i]] < scores2[BBaseMap[i]]) { c = seq2[i]; }
1066 }else { //if no, base becomes n
1070 }else { numMismatches++; } //cant decide, so eliminate and mark as mismatch
1071 }else { //should never get here
1072 m->mothurOut("[ERROR]: case I didn't think of seq1 = " + toString(seq1[i]) + " and seq2 = " + toString(seq2[i]) + "\n");
1075 int oend = contig.length();
1076 if (seq1End < seq2End) { //seq1 ends before seq2 so take from overlap to length from seq2
1077 for (int i = overlapEnd; i < length; i++) { contig += seq2[i]; }
1078 }else { //seq2 ends before seq1 so take from overlap to length from seq1
1079 for (int i = overlapEnd; i < length; i++) { contig += seq1[i]; }
1081 //cout << contig << endl;
1083 if (trimOverlap) { contig = contig.substr(overlapStart-1, oend-oStart); if (contig.length() == 0) { trashCode += "l"; } }
1085 if(trashCode.length() == 0){
1086 bool ignore = false;
1088 if (m->debug) { m->mothurOut(fSeq.getName()); }
1090 if (createOligosGroup) {
1091 if(barcodes.size() != 0){
1092 string thisGroup = barcodeNameVector[barcodeIndex];
1093 if (primers.size() != 0) {
1094 if (primerNameVector[primerIndex] != "") {
1095 if(thisGroup != "") {
1096 thisGroup += "." + primerNameVector[primerIndex];
1098 thisGroup = primerNameVector[primerIndex];
1103 if (m->debug) { m->mothurOut(", group= " + thisGroup + "\n"); }
1105 int pos = thisGroup.find("ignore");
1106 if (pos == string::npos) {
1107 groupMap[fSeq.getName()] = thisGroup;
1109 map<string, int>::iterator it = groupCounts.find(thisGroup);
1110 if (it == groupCounts.end()) { groupCounts[thisGroup] = 1; }
1111 else { groupCounts[it->first] ++; }
1112 }else { ignore = true; }
1115 }else if (createFileGroup) {
1116 int pos = group.find("ignore");
1117 if (pos == string::npos) {
1118 groupMap[fSeq.getName()] = group;
1120 map<string, int>::iterator it = groupCounts.find(group);
1121 if (it == groupCounts.end()) { groupCounts[group] = 1; }
1122 else { groupCounts[it->first] ++; }
1123 }else { ignore = true; }
1125 if (m->debug) { m->mothurOut("\n"); }
1127 if(allFiles && !ignore){
1129 m->openOutputFileAppend(fastaFileNames[barcodeIndex][primerIndex], output);
1130 output << ">" << fSeq.getName() << endl << contig << endl;
1135 outFasta << ">" << fSeq.getName() << endl << contig << endl;
1137 for (int i = 0; i < contig.length(); i++) { if (contig[i] == 'N') { numNs++; } }
1138 outMisMatch << fSeq.getName() << '\t' << contig.length() << '\t' << (oend-oStart) << '\t' << oStart << '\t' << oend << '\t' << numMismatches << '\t' << numNs << endl;
1141 outScrapFasta << ">" << fSeq.getName() << " | " << trashCode << endl << contig << endl;
1146 if((num) % 1000 == 0){ m->mothurOut(toString(num)); m->mothurOutEndLine(); }
1150 if((num) % 1000 != 0){ m->mothurOut(toString(num)); m->mothurOutEndLine(); }
1155 outScrapFasta.close();
1156 outMisMatch.close();
1157 if (thisfqualfile != "") {
1163 if (m->control_pressed) { m->mothurRemove(outputFasta); m->mothurRemove(outputScrapFasta);m->mothurRemove(outputMisMatches); }
1167 catch(exception& e) {
1168 m->errorOut(e, "MakeContigsCommand", "driver");
1172 //**********************************************************************************************************************
1173 vector< vector<string> > MakeContigsCommand::readFastqFiles(unsigned long int& count, string ffastq, string rfastq, string findex, string rindex){
1175 vector< vector<string> > files;
1176 //maps processors number to file pointer
1177 map<int, vector<ofstream*> > tempfiles; //tempfiles[0] = forwardFasta, [1] = forwardQual, [2] = reverseFasta, [3] = reverseQual, tempfiles[4] = forwardIndex, [4] = forwardReverse
1178 map<int, vector<ofstream*> >::iterator it;
1180 //create files to write to
1181 for (int i = 0; i < processors; i++) {
1182 vector<ofstream*> temp;
1183 ofstream* outFF = new ofstream; temp.push_back(outFF);
1184 ofstream* outFQ = new ofstream; temp.push_back(outFQ);
1185 ofstream* outRF = new ofstream; temp.push_back(outRF);
1186 ofstream* outRQ = new ofstream; temp.push_back(outRQ);
1187 ofstream* outFI = new ofstream; temp.push_back(outFI);
1188 ofstream* outRI = new ofstream; temp.push_back(outRI);
1189 tempfiles[i] = temp;
1191 vector<string> names;
1192 string thisOutputDir = outputDir;
1193 if (outputDir == "") { thisOutputDir = m->hasPath(ffastq); }
1194 string ffastafilename = thisOutputDir + m->getRootName(m->getSimpleName(ffastq)) + toString(i) + "ffastatemp";
1195 string rfastafilename = thisOutputDir + m->getRootName(m->getSimpleName(rfastq)) + toString(i) + "rfastatemp";
1196 string fqualfilename = thisOutputDir + m->getRootName(m->getSimpleName(ffastq)) + toString(i) + "fqualtemp";
1197 string rqualfilename = thisOutputDir + m->getRootName(m->getSimpleName(rfastq)) + toString(i) + "rqualtemp";
1198 string findexfilename = ""; string rindexfilename = "";
1199 noneOk = false; //flag to oligos file read that its okay to allow for non paired barcodes
1200 if (findex != "") { findexfilename = thisOutputDir + m->getRootName(m->getSimpleName(findex)) + toString(i) + "findextemp"; m->openOutputFile(findexfilename, *outFI); noneOk = true; }
1201 if (rindex != "") { rindexfilename = thisOutputDir + m->getRootName(m->getSimpleName(rindex)) + toString(i) + "rindextemp"; m->openOutputFile(rindexfilename, *outRI); noneOk = true; }
1202 names.push_back(ffastafilename); names.push_back(fqualfilename);
1203 names.push_back(rfastafilename); names.push_back(rqualfilename);
1204 names.push_back(findexfilename); names.push_back(rindexfilename);
1205 files.push_back(names);
1207 m->openOutputFile(ffastafilename, *outFF);
1208 m->openOutputFile(rfastafilename, *outRF);
1209 m->openOutputFile(fqualfilename, *outFQ);
1210 m->openOutputFile(rqualfilename, *outRQ);
1213 if (m->control_pressed) {
1214 //close files, delete ofstreams
1215 for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close(); delete (it->second)[i]; } }
1217 for (int i = 0; i < files.size(); i++) {
1218 for(int j = 0; j < files[i].size(); j++) { if (files[i][j] != "") { m->mothurRemove(files[i][j]); } }
1223 m->openInputFile(ffastq, inForward);
1226 m->openInputFile(rfastq, inReverse);
1228 ifstream infIndex, inrIndex;
1229 bool findexIsGood = false;
1230 bool rindexIsGood = false;
1231 if (findex != "") { m->openInputFile(findex, infIndex); findexIsGood = true; }
1232 if (rindex != "") { m->openInputFile(rindex, inrIndex); rindexIsGood = true; }
1235 map<string, fastqRead> uniques;
1236 map<string, fastqRead> iUniques;
1237 map<string, pairFastqRead> pairUniques;
1238 map<string, fastqRead>::iterator itUniques;
1239 while ((!inForward.eof()) || (!inReverse.eof()) || (findexIsGood) || (rindexIsGood)) {
1241 if (m->control_pressed) { for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close(); delete (it->second)[i]; } } for (int i = 0; i < files.size(); i++) { for(int j = 0; j < files[i].size(); j++) { if (files[i][j] != "") { m->mothurRemove(files[i][j]); } } } inForward.close(); inReverse.close(); if (findex != "") { infIndex.close(); } if (findex != "") { inrIndex.close(); } return files; }
1243 //get a read from forward and reverse fastq files
1244 bool ignoref, ignorer, ignorefi, ignoreri;
1245 fastqRead thisFread, thisRread, thisFIread, thisRIread;
1246 if (!inForward.eof()) { thisFread = readFastq(inForward, ignoref); }
1247 else { ignoref = true; }
1248 if (!inReverse.eof()) { thisRread = readFastq(inReverse, ignorer); }
1249 else { ignorer = true; }
1250 if (findexIsGood) { thisFIread = readFastq(infIndex, ignorefi); if (infIndex.eof()) { findexIsGood = false; } }
1251 else { ignorefi = true; }
1252 if (rindexIsGood) { thisRIread = readFastq(inrIndex, ignoreri); if (inrIndex.eof()) { rindexIsGood = false; } }
1253 else { ignoreri = true; }
1255 bool allowOne = false;
1256 if ((findex == "") || (rindex == "")) { allowOne = true; }
1257 vector<pairFastqRead> frReads = getReads(ignoref, ignorer, thisFread, thisRread, uniques, false);
1258 vector<pairFastqRead> friReads = getReads(ignorefi, ignoreri, thisFIread, thisRIread, iUniques, allowOne);
1260 //add in index info if provided
1261 vector<pairFastqRead> reads = frReads;
1262 if ((findex != "") || (rindex != "")) { reads = mergeReads(frReads, friReads, pairUniques); }
1264 for (int i = 0; i < reads.size(); i++) {
1265 fastqRead fread = reads[i].forward;
1266 fastqRead rread = reads[i].reverse;
1267 fastqRead firead = reads[i].findex;
1268 fastqRead riread = reads[i].rindex;
1270 if (m->debug) { m->mothurOut(toString(count) + '\t' + fread.name + '\t' + rread.name + '\n'); if (findex != "") { m->mothurOut(toString(count) + '\t' + firead.name + '\n'); } if (rindex != "") { m->mothurOut(toString(count) + '\t' + riread.name + '\n'); } }
1272 //if (checkReads(fread, rread, ffastq, rfastq)) {
1273 if (m->control_pressed) { for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close(); delete (it->second)[i]; } } for (int i = 0; i < files.size(); i++) { for(int j = 0; j < files[i].size(); j++) { if (files[i][j] != "") { m->mothurRemove(files[i][j]); } } } inForward.close(); inReverse.close(); if (findex != "") { infIndex.close(); } if (findex != "") { inrIndex.close(); } return files; }
1275 //if the reads are okay write to output files
1276 int process = count % processors;
1278 *(tempfiles[process][0]) << ">" << fread.name << endl << fread.sequence << endl;
1279 *(tempfiles[process][1]) << ">" << fread.name << endl;
1280 for (int i = 0; i < fread.scores.size(); i++) { *(tempfiles[process][1]) << fread.scores[i] << " "; }
1281 *(tempfiles[process][1]) << endl;
1282 *(tempfiles[process][2]) << ">" << rread.name << endl << rread.sequence << endl;
1283 *(tempfiles[process][3]) << ">" << rread.name << endl;
1284 for (int i = 0; i < rread.scores.size(); i++) { *(tempfiles[process][3]) << rread.scores[i] << " "; }
1285 *(tempfiles[process][3]) << endl;
1286 if (findex != "") { *(tempfiles[process][4]) << ">" << firead.name << endl << firead.sequence << endl; }
1287 if (rindex != "") { *(tempfiles[process][5]) << ">" << riread.name << endl << riread.sequence << endl; }
1292 if((count) % 10000 == 0){ m->mothurOut(toString(count)); m->mothurOutEndLine(); }
1297 if((count) % 10000 != 0){ m->mothurOut(toString(count)); m->mothurOutEndLine(); }
1299 if (uniques.size() != 0) {
1300 for (itUniques = uniques.begin(); itUniques != uniques.end(); itUniques++) {
1301 if (m->control_pressed) { break; }
1302 m->mothurOut("[WARNING]: did not find paired read for " + itUniques->first + ", ignoring.\n");
1304 for (map<string, pairFastqRead>:: iterator it = pairUniques.begin(); it != pairUniques.end(); it++) {
1305 if (m->control_pressed) { break; }
1306 m->mothurOut("[WARNING]: did not find paired read for " + (it->first).substr(1) + ", ignoring.\n");
1308 m->mothurOutEndLine();
1311 //close files, delete ofstreams
1312 for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close(); delete (it->second)[i]; } }
1315 if (findex != "") { infIndex.close(); }
1316 if (rindex != "") { inrIndex.close(); }
1320 catch(exception& e) {
1321 m->errorOut(e, "MakeContigsCommand", "readFastqFiles");
1325 //**********************************************************************************************************************
1326 vector< vector<string> > MakeContigsCommand::readFastaFiles(unsigned long int& count, string ffasta, string rfasta){
1328 vector< vector<string> > files;
1329 //maps processors number to file pointer
1330 map<int, vector<ofstream*> > tempfiles; //tempfiles[0] = forwardFasta, [1] = forwardQual, [2] = reverseFasta, [3] = reverseQual
1331 map<int, vector<ofstream*> >::iterator it;
1333 //create files to write to
1334 for (int i = 0; i < processors; i++) {
1335 vector<ofstream*> temp;
1336 ofstream* outFF = new ofstream; temp.push_back(outFF);
1337 ofstream* outFQ = new ofstream; temp.push_back(outFQ);
1338 ofstream* outRF = new ofstream; temp.push_back(outRF);
1339 ofstream* outRQ = new ofstream; temp.push_back(outRQ);
1340 tempfiles[i] = temp;
1342 vector<string> names;
1343 string thisOutputDir = outputDir;
1344 if (outputDir == "") { thisOutputDir = m->hasPath(ffasta); }
1345 string ffastafilename = thisOutputDir + m->getRootName(m->getSimpleName(ffasta)) + toString(i) + "ffastatemp";
1346 string rfastafilename = thisOutputDir + m->getRootName(m->getSimpleName(rfasta)) + toString(i) + "rfastatemp";
1347 string fqualfilename = "";
1348 if (fqualfile != "") { fqualfilename = thisOutputDir + m->getRootName(m->getSimpleName(fqualfile)) + toString(i) + "fqual.temp"; m->openOutputFile(fqualfilename, *outFQ); }
1349 string rqualfilename = "";
1350 if (rqualfile != "") { rqualfilename = thisOutputDir + m->getRootName(m->getSimpleName(rqualfile)) + toString(i) + "rqual.temp"; m->openOutputFile(rqualfilename, *outRQ); }
1351 string findexfilename = ""; string rindexfilename = "";
1352 names.push_back(ffastafilename); names.push_back(fqualfilename);
1353 names.push_back(rfastafilename); names.push_back(rqualfilename);
1354 names.push_back(findexfilename); names.push_back(rindexfilename);
1355 files.push_back(names);
1357 m->openOutputFile(ffastafilename, *outFF);
1358 m->openOutputFile(rfastafilename, *outRF);
1361 if (m->control_pressed) {
1362 //close files, delete ofstreams
1363 for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close(); delete (it->second)[i]; } }
1365 for (int i = 0; i < files.size(); i++) {
1366 for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); }
1370 ifstream inForwardFasta;
1371 m->openInputFile(ffasta, inForwardFasta);
1373 ifstream inReverseFasta;
1374 m->openInputFile(rfasta, inReverseFasta);
1376 ifstream inForwardQual, inReverseQual;
1377 if (fqualfile != "") { m->openInputFile(fqualfile, inForwardQual); m->openInputFile(rqualfile, inReverseQual); }
1380 map<string, fastqRead> uniques;
1381 map<string, fastqRead>::iterator itUniques;
1382 while ((!inForwardFasta.eof()) || (!inReverseFasta.eof())) {
1384 if (m->control_pressed) { for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close(); delete (it->second)[i]; } } for (int i = 0; i < files.size(); i++) { for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); } } inReverseFasta.close(); inForwardFasta.close(); if (fqualfile != "") { inReverseQual.close(); inReverseQual.close(); } return files; }
1386 //get a reads from forward and reverse fasta files
1387 bool ignoref, ignorer;
1388 fastqRead thisFread, thisRread;
1389 if (!inForwardFasta.eof()) {
1391 Sequence temp(inForwardFasta); m->gobble(inForwardFasta);
1392 thisFread.name = temp.getName();
1393 thisFread.sequence = temp.getUnaligned();
1394 }else { ignoref = true; }
1395 if (!inReverseFasta.eof()) {
1397 Sequence temp(inReverseFasta); m->gobble(inReverseFasta);
1398 thisRread.name = temp.getName();
1399 thisRread.sequence = temp.getUnaligned();
1400 }else { ignorer = true; }
1402 //get qual reads if given
1403 if (fqualfile != "") {
1404 if (!inForwardQual.eof() && !ignoref) {
1405 QualityScores temp(inForwardQual); m->gobble(inForwardQual);
1406 //if forward files dont match ignore read
1407 if (thisFread.name != temp.getName()) { ignoref = true; }
1408 else { thisFread.scores = temp.getQualityScores(); }
1409 }else { ignoref = true; }
1410 if (!inReverseQual.eof() && !ignorer) {
1411 QualityScores temp(inReverseQual); m->gobble(inReverseQual);
1412 //if reverse files dont match ignore read
1413 if (thisRread.name != temp.getName()) { ignorer = true; }
1414 else { thisRread.scores = temp.getQualityScores(); }
1415 }else { ignorer = true; }
1418 vector<pairFastqRead> reads = getReads(ignoref, ignorer, thisFread, thisRread, uniques, false);
1420 for (int i = 0; i < reads.size(); i++) {
1421 fastqRead fread = reads[i].forward;
1422 fastqRead rread = reads[i].reverse;
1424 if (m->debug) { m->mothurOut(toString(count) + '\t' + fread.name + '\t' + rread.name + '\n'); }
1426 // if (checkReads(fread, rread, ffasta, rfasta)) {
1427 if (m->control_pressed) { for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close(); delete (it->second)[i]; } } for (int i = 0; i < files.size(); i++) { for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); } } inReverseFasta.close(); inForwardFasta.close(); if (fqualfile != "") { inReverseQual.close(); inReverseQual.close(); } return files; }
1429 //if the reads are okay write to output files
1430 int process = count % processors;
1432 *(tempfiles[process][0]) << ">" << fread.name << endl << fread.sequence << endl;
1433 *(tempfiles[process][2]) << ">" << rread.name << endl << rread.sequence << endl;
1434 if (fqualfile != "") { //if you have quality info, print it
1435 *(tempfiles[process][1]) << ">" << fread.name << endl;
1436 for (int i = 0; i < fread.scores.size(); i++) { *(tempfiles[process][1]) << fread.scores[i] << " "; }
1437 *(tempfiles[process][1]) << endl;
1438 *(tempfiles[process][3]) << ">" << rread.name << endl;
1439 for (int i = 0; i < rread.scores.size(); i++) { *(tempfiles[process][3]) << rread.scores[i] << " "; }
1440 *(tempfiles[process][3]) << endl;
1445 if((count) % 10000 == 0){ m->mothurOut(toString(count)); m->mothurOutEndLine(); }
1450 if((count) % 10000 != 0){ m->mothurOut(toString(count)); m->mothurOutEndLine(); }
1452 if (uniques.size() != 0) {
1453 for (itUniques = uniques.begin(); itUniques != uniques.end(); itUniques++) {
1454 m->mothurOut("[WARNING]: did not find paired read for " + itUniques->first + ", ignoring.\n");
1456 m->mothurOutEndLine();
1459 //close files, delete ofstreams
1460 for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close(); delete (it->second)[i]; } }
1461 inReverseFasta.close();
1462 inForwardFasta.close();
1463 if (fqualfile != "") { inReverseQual.close(); inReverseQual.close(); }
1467 catch(exception& e) {
1468 m->errorOut(e, "MakeContigsCommand", "readFastaFiles");
1472 //**********************************************************************************************************************
1473 vector<pairFastqRead> MakeContigsCommand::getReads(bool ignoref, bool ignorer, fastqRead forward, fastqRead reverse, map<string, fastqRead>& uniques, bool allowOne){
1475 vector<pairFastqRead> reads;
1476 map<string, fastqRead>::iterator itUniques;
1478 if (!ignoref && !ignorer) {
1479 if (forward.name == reverse.name) {
1480 pairFastqRead temp(forward, reverse);
1481 reads.push_back(temp);
1484 //if no match are the names only different by 1 and 2?
1485 string tempFRead = forward.name.substr(0, forward.name.length()-1);
1486 string tempRRead = reverse.name.substr(0, reverse.name.length()-1);
1487 if (tempFRead == tempRRead) {
1488 if ((forward.name[forward.name.length()-1] == '1') && (reverse.name[reverse.name.length()-1] == '2')) {
1489 forward.name = tempFRead;
1490 reverse.name = tempRRead;
1491 pairFastqRead temp(forward, reverse);
1492 reads.push_back(temp);
1498 //look for forward pair
1499 itUniques = uniques.find(forward.name);
1500 if (itUniques != uniques.end()) { //we have the pair for this read
1501 pairFastqRead temp(forward, itUniques->second);
1502 reads.push_back(temp);
1503 uniques.erase(itUniques);
1504 }else { //save this read for later
1505 uniques[forward.name] = forward;
1508 //look for reverse pair
1509 itUniques = uniques.find(reverse.name);
1510 if (itUniques != uniques.end()) { //we have the pair for this read
1511 pairFastqRead temp(itUniques->second, reverse);
1512 reads.push_back(temp);
1513 uniques.erase(itUniques);
1514 }else { //save this read for later
1515 uniques[reverse.name] = reverse;
1520 }else if (!ignoref && ignorer) { //ignore reverse keep forward
1523 pairFastqRead temp(forward, dummy);
1524 reads.push_back(temp);
1526 //look for forward pair
1527 itUniques = uniques.find(forward.name);
1528 if (itUniques != uniques.end()) { //we have the pair for this read
1529 pairFastqRead temp(forward, itUniques->second);
1530 reads.push_back(temp);
1531 uniques.erase(itUniques);
1532 }else { //save this read for later
1533 uniques[forward.name] = forward;
1536 }else if (ignoref && !ignorer) { //ignore forward keep reverse
1539 pairFastqRead temp(dummy, reverse);
1540 reads.push_back(temp);
1542 //look for reverse pair
1543 itUniques = uniques.find(reverse.name);
1544 if (itUniques != uniques.end()) { //we have the pair for this read
1545 pairFastqRead temp(itUniques->second, reverse);
1546 reads.push_back(temp);
1547 uniques.erase(itUniques);
1548 }else { //save this read for later
1549 uniques[reverse.name] = reverse;
1552 }//else ignore both and do nothing
1556 catch(exception& e) {
1557 m->errorOut(e, "MakeContigsCommand", "readFastqFiles");
1561 //**********************************************************************************************************************
1562 //look through the reads from the forward and reverse files and try to find matching reads from index files.
1563 vector<pairFastqRead> MakeContigsCommand::mergeReads(vector<pairFastqRead> thisReads, vector<pairFastqRead> indexes, map<string, pairFastqRead>& uniques){
1565 vector<pairFastqRead> reads;
1566 map<string, pairFastqRead>::iterator itUniques;
1568 set<int> foundIndexes;
1569 for (int i = 0; i < thisReads.size(); i++) {
1571 for (int j = 0; j < indexes.size(); j++) {
1573 //incase only one index
1574 string indexName = indexes[j].forward.name;
1575 if (indexName == "") { indexName = indexes[j].reverse.name; }
1577 if (thisReads[i].forward.name == indexName){
1578 thisReads[i].findex = indexes[j].forward;
1579 thisReads[i].rindex = indexes[j].reverse;
1580 reads.push_back(thisReads[i]);
1582 foundIndexes.insert(j);
1587 //look for forward pair
1588 itUniques = uniques.find('i'+thisReads[i].forward.name);
1589 if (itUniques != uniques.end()) { //we have the pair for this read
1590 thisReads[i].findex = itUniques->second.forward;
1591 thisReads[i].rindex = itUniques->second.reverse;
1592 reads.push_back(thisReads[i]);
1593 uniques.erase(itUniques);
1594 }else { //save this read for later
1595 uniques['r'+thisReads[i].forward.name] = thisReads[i];
1600 if (foundIndexes.size() != indexes.size()) { //if we didnt match all the indexes look for them in uniques
1601 for (int j = 0; j < indexes.size(); j++) {
1602 if (foundIndexes.count(j) == 0) { //we didnt find this one
1603 //incase only one index
1604 string indexName = indexes[j].forward.name;
1605 if (indexName == "") { indexName = indexes[j].reverse.name; }
1607 //look for forward pair
1608 itUniques = uniques.find('r'+indexName);
1609 if (itUniques != uniques.end()) { //we have the pair for this read
1610 pairFastqRead temp(itUniques->second.forward, itUniques->second.reverse, indexes[j].forward, indexes[j].reverse);
1611 reads.push_back(temp);
1612 uniques.erase(itUniques);
1613 }else { //save this read for later
1614 uniques['i'+indexName] = indexes[j];
1623 catch(exception& e) {
1624 m->errorOut(e, "MakeContigsCommand", "mergeReads");
1628 //**********************************************************************************************************************
1629 fastqRead MakeContigsCommand::readFastq(ifstream& in, bool& ignore){
1635 //read sequence name
1636 string line = m->getline(in); m->gobble(in);
1637 vector<string> pieces = m->splitWhiteSpace(line);
1638 string name = ""; if (pieces.size() != 0) { name = pieces[0]; }
1639 if (name == "") { m->mothurOut("[WARNING]: Blank fasta name, ignoring read."); m->mothurOutEndLine(); ignore=true; }
1640 else if (name[0] != '@') { m->mothurOut("[WARNING]: reading " + name + " expected a name with @ as a leading character, ignoring read."); m->mothurOutEndLine(); ignore=true; }
1641 else { name = name.substr(1); }
1644 string sequence = m->getline(in); m->gobble(in);
1645 if (sequence == "") { m->mothurOut("[WARNING]: missing sequence for " + name + ", ignoring."); ignore=true; }
1647 //read sequence name
1648 line = m->getline(in); m->gobble(in);
1649 pieces = m->splitWhiteSpace(line);
1650 string name2 = ""; if (pieces.size() != 0) { name2 = pieces[0]; }
1651 if (name2 == "") { m->mothurOut("[WARNING]: expected a name with + as a leading character, ignoring."); ignore=true; }
1652 else if (name2[0] != '+') { m->mothurOut("[WARNING]: reading " + name2 + " expected a name with + as a leading character, ignoring."); ignore=true; }
1653 else { name2 = name2.substr(1); if (name2 == "") { name2 = name; } }
1655 //read quality scores
1656 string quality = m->getline(in); m->gobble(in);
1657 if (quality == "") { m->mothurOut("[WARNING]: missing quality for " + name2 + ", ignoring."); ignore=true; }
1659 //sanity check sequence length and number of quality scores match
1660 if (name2 != "") { if (name != name2) { m->mothurOut("[WARNING]: names do not match. read " + name + " for fasta and " + name2 + " for quality, ignoring."); ignore=true; } }
1661 if (quality.length() != sequence.length()) { m->mothurOut("[WARNING]: Lengths do not match for sequence " + name + ". Read " + toString(sequence.length()) + " characters for fasta and " + toString(quality.length()) + " characters for quality scores, ignoring read."); ignore=true; }
1663 vector<int> qualScores = convertQual(quality);
1667 read.sequence = sequence;
1668 read.scores = qualScores;
1670 if (m->debug) { m->mothurOut("[DEBUG]: " + read.name + " " + read.sequence + " " + quality + "\n"); }
1674 catch(exception& e) {
1675 m->errorOut(e, "MakeContigsCommand", "readFastq");
1679 /**********************************************************************************************************************
1680 bool MakeContigsCommand::checkReads(fastqRead& forward, fastqRead& reverse, string ffile, string rfile){
1684 //do sequence lengths match
1685 if (forward.sequence.length() != reverse.sequence.length()) {
1686 m->mothurOut("[WARNING]: For sequence " + forward.name + " I read a sequence of length " + toString(forward.sequence.length()) + " from " + ffile + ", but read a sequence of length " + toString(reverse.sequence.length()) + " from " + rfile + ", ignoring.\n");
1690 //do number of qual scores match
1691 if (forward.scores.size() != reverse.scores.size()) {
1692 m->mothurOut("[WARNING]: For sequence " + forward.name + " I read " + toString(forward.scores.size()) + " quality scores from " + ffile + ", but read " + toString(reverse.scores.size()) + " quality scores from " + rfile + ", ignoring.\n");
1698 catch(exception& e) {
1699 m->errorOut(e, "MakeContigsCommand", "checkReads");
1703 //***************************************************************************************************************
1704 //lines can be 2, 3, or 4 columns
1705 // forward.fastq reverse.fastq -> 2 column
1706 // groupName forward.fastq reverse.fastq -> 3 column
1707 // forward.fastq reverse.fastq forward.index.fastq reverse.index.fastq -> 4 column
1708 // forward.fastq reverse.fastq none reverse.index.fastq -> 4 column
1709 // forward.fastq reverse.fastq forward.index.fastq none -> 4 column
1710 vector< vector<string> > MakeContigsCommand::readFileNames(string filename){
1712 vector< vector<string> > files;
1713 string forward, reverse, findex, rindex;
1716 m->openInputFile(filename, in);
1720 if (m->control_pressed) { return files; }
1722 string line = m->getline(in); m->gobble(in);
1723 vector<string> pieces = m->splitWhiteSpace(line);
1726 if (pieces.size() == 2) {
1727 forward = pieces[0];
1728 reverse = pieces[1];
1732 }else if (pieces.size() == 3) {
1734 forward = pieces[1];
1735 reverse = pieces[2];
1738 createFileGroup = true;
1739 }else if (pieces.size() == 4) {
1740 forward = pieces[0];
1741 reverse = pieces[1];
1744 if ((findex == "none") || (findex == "NONE")){ findex = ""; }
1745 if ((rindex == "none") || (rindex == "NONE")){ rindex = ""; }
1747 m->mothurOut("[ERROR]: file lines can be 2, 3, or 4 columns. The forward fastq files in the first column and their matching reverse fastq files in the second column, or a groupName then forward fastq file and reverse fastq file, or forward fastq file then reverse fastq then forward index and reverse index file. If you only have one index file add 'none' for the other one. \n"); m->control_pressed = true;
1750 if (m->debug) { m->mothurOut("[DEBUG]: group = " + group + ", forward = " + forward + ", reverse = " + reverse + ", forwardIndex = " + findex + ", reverseIndex = " + rindex + ".\n"); }
1752 //check to make sure both are able to be opened
1754 int openForward = m->openInputFile(forward, in2, "noerror");
1756 //if you can't open it, try default location
1757 if (openForward == 1) {
1758 if (m->getDefaultPath() != "") { //default path is set
1759 string tryPath = m->getDefaultPath() + m->getSimpleName(forward);
1760 m->mothurOut("Unable to open " + forward + ". Trying default " + tryPath); m->mothurOutEndLine();
1762 openForward = m->openInputFile(tryPath, in3, "noerror");
1768 //if you can't open it, try output location
1769 if (openForward == 1) {
1770 if (m->getOutputDir() != "") { //default path is set
1771 string tryPath = m->getOutputDir() + m->getSimpleName(forward);
1772 m->mothurOut("Unable to open " + forward + ". Trying output directory " + tryPath); m->mothurOutEndLine();
1774 openForward = m->openInputFile(tryPath, in4, "noerror");
1780 if (openForward == 1) { //can't find it
1781 m->mothurOut("[WARNING]: can't find " + forward + ", ignoring pair.\n");
1782 }else{ in2.close(); }
1785 int openReverse = m->openInputFile(reverse, in3, "noerror");
1787 //if you can't open it, try default location
1788 if (openReverse == 1) {
1789 if (m->getDefaultPath() != "") { //default path is set
1790 string tryPath = m->getDefaultPath() + m->getSimpleName(reverse);
1791 m->mothurOut("Unable to open " + reverse + ". Trying default " + tryPath); m->mothurOutEndLine();
1793 openReverse = m->openInputFile(tryPath, in3, "noerror");
1799 //if you can't open it, try output location
1800 if (openReverse == 1) {
1801 if (m->getOutputDir() != "") { //default path is set
1802 string tryPath = m->getOutputDir() + m->getSimpleName(reverse);
1803 m->mothurOut("Unable to open " + reverse + ". Trying output directory " + tryPath); m->mothurOutEndLine();
1805 openReverse = m->openInputFile(tryPath, in4, "noerror");
1811 if (openReverse == 1) { //can't find it
1812 m->mothurOut("[WARNING]: can't find " + reverse + ", ignoring pair.\n");
1813 }else{ in3.close(); }
1818 openFindex = m->openInputFile(findex, in4, "noerror"); in4.close();
1820 //if you can't open it, try default location
1821 if (openFindex == 1) {
1822 if (m->getDefaultPath() != "") { //default path is set
1823 string tryPath = m->getDefaultPath() + m->getSimpleName(findex);
1824 m->mothurOut("Unable to open " + findex + ". Trying default " + tryPath); m->mothurOutEndLine();
1826 openFindex = m->openInputFile(tryPath, in5, "noerror");
1832 //if you can't open it, try output location
1833 if (openFindex == 1) {
1834 if (m->getOutputDir() != "") { //default path is set
1835 string tryPath = m->getOutputDir() + m->getSimpleName(findex);
1836 m->mothurOut("Unable to open " + findex + ". Trying output directory " + tryPath); m->mothurOutEndLine();
1838 openFindex = m->openInputFile(tryPath, in6, "noerror");
1844 if (openFindex == 1) { //can't find it
1845 m->mothurOut("[WARNING]: can't find " + findex + ", ignoring pair.\n");
1852 openRindex = m->openInputFile(rindex, in7, "noerror"); in7.close();
1854 //if you can't open it, try default location
1855 if (openRindex == 1) {
1856 if (m->getDefaultPath() != "") { //default path is set
1857 string tryPath = m->getDefaultPath() + m->getSimpleName(rindex);
1858 m->mothurOut("Unable to open " + rindex + ". Trying default " + tryPath); m->mothurOutEndLine();
1860 openRindex = m->openInputFile(tryPath, in8, "noerror");
1866 //if you can't open it, try output location
1867 if (openRindex == 1) {
1868 if (m->getOutputDir() != "") { //default path is set
1869 string tryPath = m->getOutputDir() + m->getSimpleName(rindex);
1870 m->mothurOut("Unable to open " + rindex + ". Trying output directory " + tryPath); m->mothurOutEndLine();
1872 openRindex = m->openInputFile(tryPath, in9, "noerror");
1878 if (openRindex == 1) { //can't find it
1879 m->mothurOut("[WARNING]: can't find " + rindex + ", ignoring pair.\n");
1884 if ((openForward != 1) && (openReverse != 1) && (openFindex != 1) && (openRindex != 1)) { //good pair
1885 file2Group[files.size()] = group;
1886 vector<string> pair;
1887 pair.push_back(forward);
1888 pair.push_back(reverse);
1889 pair.push_back(findex);
1890 pair.push_back(rindex);
1891 if (((findex != "") || (rindex != "")) && (oligosfile == "")) { m->mothurOut("[ERROR]: You need to provide an oligos file if you are going to use an index file.\n"); m->control_pressed = true; }
1892 files.push_back(pair);
1899 catch(exception& e) {
1900 m->errorOut(e, "MakeContigsCommand", "readFileNames");
1904 //***************************************************************************************************************
1905 //illumina data requires paired forward and reverse data
1906 //BARCODE atgcatgc atgcatgc groupName
1907 //PRIMER atgcatgc atgcatgc groupName
1908 //PRIMER atgcatgc atgcatgc
1909 bool MakeContigsCommand::getOligos(vector<vector<string> >& fastaFileNames, string rootname){
1912 m->openInputFile(oligosfile, in);
1916 string type, foligo, roligo, group;
1918 int indexPrimer = 0;
1919 int indexBarcode = 0;
1920 set<string> uniquePrimers;
1921 set<string> uniqueBarcodes;
1927 if (m->debug) { m->mothurOut("[DEBUG]: reading type - " + type + ".\n"); }
1930 while (!in.eof()) { char c = in.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there
1935 //make type case insensitive
1936 for(int i=0;i<type.length();i++){ type[i] = toupper(type[i]); }
1940 if (m->debug) { m->mothurOut("[DEBUG]: reading - " + foligo + ".\n"); }
1942 for(int i=0;i<foligo.length();i++){
1943 foligo[i] = toupper(foligo[i]);
1944 if(foligo[i] == 'U') { foligo[i] = 'T'; }
1947 if(type == "PRIMER"){
1952 for(int i=0;i<roligo.length();i++){
1953 roligo[i] = toupper(roligo[i]);
1954 if(roligo[i] == 'U') { roligo[i] = 'T'; }
1956 //roligo = reverseOligo(roligo);
1958 if (m->debug) { m->mothurOut("[DEBUG]: reading - " + roligo + ".\n"); }
1962 // get rest of line in case there is a primer name
1965 if (c == 10 || c == 13 || c == -1){ break; }
1966 else if (c == 32 || c == 9){;} //space or tab
1967 else { group += c; }
1970 oligosPair newPrimer(foligo, roligo);
1972 if (m->debug) { m->mothurOut("[DEBUG]: primer pair " + newPrimer.forward + " " + newPrimer.reverse + ", and group = " + group + ".\n"); }
1974 //check for repeat barcodes
1975 string tempPair = foligo+roligo;
1976 if (uniquePrimers.count(tempPair) != 0) { m->mothurOut("primer pair " + newPrimer.forward + " " + newPrimer.reverse + " is in your oligos file already."); m->mothurOutEndLine(); }
1977 else { uniquePrimers.insert(tempPair); }
1979 if (m->debug) { if (group != "") { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); }else{ m->mothurOut("[DEBUG]: no group for primer pair " + newPrimer.forward + " " + newPrimer.reverse + ".\n"); } }
1980 primers[indexPrimer]=newPrimer; indexPrimer++;
1981 primerNameVector.push_back(group);
1982 }else if(type == "BARCODE"){
1987 for(int i=0;i<roligo.length();i++){
1988 roligo[i] = toupper(roligo[i]);
1989 if(roligo[i] == 'U') { roligo[i] = 'T'; }
1991 //roligo = reverseOligo(roligo);
1993 oligosPair newPair(foligo, roligo);
1995 if ((foligo == "NONE") || (roligo == "NONE")) { if (!noneOk) { m->mothurOut("[ERROR]: barcodes must be paired unless you are using an index file.\n"); m->control_pressed = true; } }
2000 if (c == 10 || c == 13 || c == -1){ break; }
2001 else if (c == 32 || c == 9){;} //space or tab
2002 else { group += c; }
2005 if (m->debug) { m->mothurOut("[DEBUG]: barcode pair " + newPair.forward + " " + newPair.reverse + ", and group = " + group + ".\n"); }
2007 //check for repeat barcodes
2008 string tempPair = foligo+roligo;
2009 if (uniqueBarcodes.count(tempPair) != 0) { m->mothurOut("barcode pair " + newPair.forward + " " + newPair.reverse + " is in your oligos file already, disregarding."); m->mothurOutEndLine(); }
2010 else { uniqueBarcodes.insert(tempPair); }
2012 barcodes[indexBarcode]=newPair; indexBarcode++;
2013 barcodeNameVector.push_back(group);
2014 }else if(type == "LINKER"){
2015 linker.push_back(foligo);
2016 m->mothurOut("[WARNING]: make.contigs is not setup to remove linkers, ignoring.\n");
2017 }else if(type == "SPACER"){
2018 spacer.push_back(foligo);
2019 m->mothurOut("[WARNING]: make.contigs is not setup to remove spacers, ignoring.\n");
2021 else{ m->mothurOut("[WARNING]: " + type + " is not recognized as a valid type. Choices are primer, barcode, linker and spacer. Ignoring " + foligo + "."); m->mothurOutEndLine(); }
2027 if(barcodeNameVector.size() == 0 && primerNameVector[0] == ""){ allFiles = 0; }
2029 //add in potential combos
2030 if(barcodeNameVector.size() == 0){
2031 oligosPair temp("", "");
2033 barcodeNameVector.push_back("");
2036 if(primerNameVector.size() == 0){
2037 oligosPair temp("", "");
2039 primerNameVector.push_back("");
2042 fastaFileNames.resize(barcodeNameVector.size());
2043 for(int i=0;i<fastaFileNames.size();i++){
2044 fastaFileNames[i].assign(primerNameVector.size(), "");
2048 set<string> uniqueNames; //used to cleanup outputFileNames
2049 for(map<int, oligosPair>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
2050 for(map<int, oligosPair>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
2052 string primerName = primerNameVector[itPrimer->first];
2053 string barcodeName = barcodeNameVector[itBar->first];
2055 if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing
2057 string comboGroupName = "";
2058 string fastaFileName = "";
2059 string qualFileName = "";
2060 string nameFileName = "";
2061 string countFileName = "";
2063 if(primerName == ""){
2064 comboGroupName = barcodeNameVector[itBar->first];
2067 if(barcodeName == ""){
2068 comboGroupName = primerNameVector[itPrimer->first];
2071 comboGroupName = barcodeNameVector[itBar->first] + "." + primerNameVector[itPrimer->first];
2077 fastaFileName = rootname + comboGroupName + ".fasta";
2078 if (uniqueNames.count(fastaFileName) == 0) {
2079 outputNames.push_back(fastaFileName);
2080 outputTypes["fasta"].push_back(fastaFileName);
2081 uniqueNames.insert(fastaFileName);
2084 fastaFileNames[itBar->first][itPrimer->first] = fastaFileName;
2085 m->openOutputFile(fastaFileName, temp); temp.close();
2091 bool allBlank = true;
2092 for (int i = 0; i < barcodeNameVector.size(); i++) {
2093 if (barcodeNameVector[i] != "") {
2098 for (int i = 0; i < primerNameVector.size(); i++) {
2099 if (primerNameVector[i] != "") {
2106 m->mothurOut("[WARNING]: your oligos file does not contain any group names. mothur will not create a groupfile."); m->mothurOutEndLine();
2114 catch(exception& e) {
2115 m->errorOut(e, "MakeContigsCommand", "getOligos");
2119 //********************************************************************/
2120 string MakeContigsCommand::reverseOligo(string oligo){
2122 string reverse = "";
2124 for(int i=oligo.length()-1;i>=0;i--){
2126 if(oligo[i] == 'A') { reverse += 'T'; }
2127 else if(oligo[i] == 'T'){ reverse += 'A'; }
2128 else if(oligo[i] == 'U'){ reverse += 'A'; }
2130 else if(oligo[i] == 'G'){ reverse += 'C'; }
2131 else if(oligo[i] == 'C'){ reverse += 'G'; }
2133 else if(oligo[i] == 'R'){ reverse += 'Y'; }
2134 else if(oligo[i] == 'Y'){ reverse += 'R'; }
2136 else if(oligo[i] == 'M'){ reverse += 'K'; }
2137 else if(oligo[i] == 'K'){ reverse += 'M'; }
2139 else if(oligo[i] == 'W'){ reverse += 'W'; }
2140 else if(oligo[i] == 'S'){ reverse += 'S'; }
2142 else if(oligo[i] == 'B'){ reverse += 'V'; }
2143 else if(oligo[i] == 'V'){ reverse += 'B'; }
2145 else if(oligo[i] == 'D'){ reverse += 'H'; }
2146 else if(oligo[i] == 'H'){ reverse += 'D'; }
2148 else { reverse += 'N'; }
2154 catch(exception& e) {
2155 m->errorOut(e, "MakeContigsCommand", "reverseOligo");
2159 //**********************************************************************************************************************
2160 vector<int> MakeContigsCommand::convertQual(string qual) {
2162 vector<int> qualScores;
2163 bool negativeScores = false;
2165 for (int i = 0; i < qual.length(); i++) {
2168 temp = int(qual[i]);
2169 if (format == "illumina") {
2170 temp -= 64; //char '@'
2171 }else if (format == "illumina1.8+") {
2172 temp -= int('!'); //char '!'
2173 }else if (format == "solexa") {
2174 temp = int(convertTable[temp]); //convert to sanger
2175 temp -= int('!'); //char '!'
2177 temp -= int('!'); //char '!'
2180 if (temp < -5) { negativeScores = true; }
2181 qualScores.push_back(temp);
2184 if (negativeScores) { m->mothurOut("[ERROR]: finding negative quality scores, do you have the right format selected? http://en.wikipedia.org/wiki/FASTQ_format#Encoding \n"); m->control_pressed = true; }
2188 catch(exception& e) {
2189 m->errorOut(e, "MakeContigsCommand", "convertQual");
2194 //**********************************************************************************************************************