2 // makecontigscommand.cpp
5 // Created by Sarah Westcott on 5/15/12.
6 // Copyright (c) 2012 Schloss Lab. All rights reserved.
9 #include "makecontigscommand.h"
11 //**********************************************************************************************************************
12 vector<string> MakeContigsCommand::setParameters(){
14 CommandParameter pfastq("ffastq", "InputTypes", "", "", "FastaFastqFile", "FastaFastqFile", "fastqGroup","fasta-qfile",false,false,true); parameters.push_back(pfastq);
15 CommandParameter prfastq("rfastq", "InputTypes", "", "", "none", "none", "fastqGroup","fasta-qfile",false,false,true); parameters.push_back(prfastq);
16 CommandParameter pfasta("ffasta", "InputTypes", "", "", "FastaFastqFile", "FastaFastqFile", "fastaGroup","fasta",false,false,true); parameters.push_back(pfasta);
17 CommandParameter prfasta("rfasta", "InputTypes", "", "", "none", "none", "none","fastaGroup",false,false,true); parameters.push_back(prfasta);
18 CommandParameter pfqual("fqfile", "InputTypes", "", "", "none", "none", "qfileGroup","",false,false,true); parameters.push_back(pfqual);
19 CommandParameter prqual("rqfile", "InputTypes", "", "", "none", "none", "qfileGroup","",false,false,true); parameters.push_back(prqual);
20 CommandParameter pfile("file", "InputTypes", "", "", "FastaFastqFile", "FastaFastqFile", "none","fasta-qfile",false,false,true); parameters.push_back(pfile);
21 CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none","group",false,false,true); parameters.push_back(poligos);
22 CommandParameter pfindex("findex", "InputTypes", "", "", "none", "none", "none","",false,false,true); parameters.push_back(pfindex);
23 CommandParameter prindex("rindex", "InputTypes", "", "", "none", "none", "none","",false,false,true); parameters.push_back(prindex);
24 CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(ppdiffs);
25 CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(pbdiffs);
26 CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ptdiffs);
28 CommandParameter palign("align", "Multiple", "needleman-gotoh", "needleman", "", "", "","",false,false); parameters.push_back(palign);
29 CommandParameter pallfiles("allfiles", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pallfiles);
30 CommandParameter ptrimoverlap("trimoverlap", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(ptrimoverlap);
31 CommandParameter pmatch("match", "Number", "", "1.0", "", "", "","",false,false); parameters.push_back(pmatch);
32 CommandParameter pmismatch("mismatch", "Number", "", "-1.0", "", "", "","",false,false); parameters.push_back(pmismatch);
33 CommandParameter pgapopen("gapopen", "Number", "", "-2.0", "", "", "","",false,false); parameters.push_back(pgapopen);
34 CommandParameter pgapextend("gapextend", "Number", "", "-1.0", "", "", "","",false,false); parameters.push_back(pgapextend);
35 CommandParameter pthreshold("insert", "Number", "", "20", "", "", "","",false,false); parameters.push_back(pthreshold);
36 CommandParameter pdeltaq("deltaq", "Number", "", "6", "", "", "","",false,false); parameters.push_back(pdeltaq);
37 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
38 CommandParameter pformat("format", "Multiple", "sanger-illumina-solexa-illumina1.8+", "illumina1.8+", "", "", "","",false,false,true); parameters.push_back(pformat);
39 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
40 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
42 vector<string> myArray;
43 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
47 m->errorOut(e, "MakeContigsCommand", "setParameters");
51 //**********************************************************************************************************************
52 string MakeContigsCommand::getHelpString(){
54 string helpString = "";
55 helpString += "The make.contigs command reads a file, forward fastq file and a reverse fastq file or forward fasta and reverse fasta files and outputs new fasta. \n";
56 helpString += "If an oligos file is provided barcodes and primers will be trimmed, and a group file will be created.\n";
57 helpString += "If a forward index or reverse index file is provided barcodes be trimmed, and a group file will be created. The oligos parameter is required if an index file is given.\n";
58 helpString += "The make.contigs command parameters are file, ffastq, rfastq, ffasta, rfasta, fqfile, rqfile, oligos, findex, rindex, format, tdiffs, bdiffs, pdiffs, align, match, mismatch, gapopen, gapextend, insert, deltaq, allfiles and processors.\n";
59 helpString += "The ffastq and rfastq, file, or ffasta and rfasta parameters are required.\n";
60 helpString += "The file parameter is 2, 3 or 4 column file containing the forward fastq files in the first column and their matching reverse fastq files in the second column, or a groupName then forward fastq file and reverse fastq file, or forward fastq file then reverse fastq then forward index and reverse index file. If you only have one index file add 'none' for the other one. Mothur will process each pair and create a combined fasta and report file with all the sequences.\n";
61 helpString += "The ffastq and rfastq parameters are used to provide a forward fastq and reverse fastq file to process. If you provide one, you must provide the other.\n";
62 helpString += "The ffasta and rfasta parameters are used to provide a forward fasta and reverse fasta file to process. If you provide one, you must provide the other.\n";
63 helpString += "The fqfile and rqfile parameters are used to provide a forward quality and reverse quality files to process with the ffasta and rfasta parameters. If you provide one, you must provide the other.\n";
64 helpString += "The format parameter is used to indicate whether your sequences are sanger, solexa, illumina1.8+ or illumina, default=illumina1.8+.\n";
65 helpString += "The findex and rindex parameters are used to provide a forward index and reverse index files to process. \n";
66 helpString += "The align parameter allows you to specify the alignment method to use. Your options are: gotoh and needleman. The default is needleman.\n";
67 helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n";
68 helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";
69 helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
70 //helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n";
71 //helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n";
72 helpString += "The match parameter allows you to specify the bonus for having the same base. The default is 1.0.\n";
73 helpString += "The mistmatch parameter allows you to specify the penalty for having different bases. The default is -1.0.\n";
74 helpString += "The deltaq parameter allows you to specify the delta allowed between quality scores of a mismatched base. For example in the overlap, if deltaq=5 and in the alignment seqA, pos 200 has a quality score of 30 and the same position in seqB has a quality score of 20, you take the base from seqA (30-20 >= 5). If the quality score in seqB is 28 then the base in the consensus will be an N (30-28<5) The default is 6.\n";
75 helpString += "The gapopen parameter allows you to specify the penalty for opening a gap in an alignment. The default is -2.0.\n";
76 helpString += "The gapextend parameter allows you to specify the penalty for extending a gap in an alignment. The default is -1.0.\n";
77 helpString += "The insert parameter allows you to set a quality scores threshold. In the case where we are trying to decide whether to keep a base or remove it because the base is compared to a gap in the other fragment, if the base has a quality score equal to or below the threshold we eliminate it. Default=20.\n";
78 helpString += "The processors parameter allows you to specify how many processors you would like to use. The default is 1. \n";
79 helpString += "The allfiles parameter will create separate group and fasta file for each grouping. The default is F.\n";
81 helpString += "The trimoverlap parameter allows you to trim the sequences to only the overlapping section. The default is F.\n";
82 helpString += "The make.contigs command should be in the following format: \n";
83 helpString += "make.contigs(ffastq=yourForwardFastqFile, rfastq=yourReverseFastqFile, align=yourAlignmentMethod) \n";
84 helpString += "Note: No spaces between parameter labels (i.e. ffastq), '=' and parameters (i.e.yourForwardFastqFile).\n";
88 m->errorOut(e, "MakeContigsCommand", "getHelpString");
92 //**********************************************************************************************************************
93 string MakeContigsCommand::getOutputPattern(string type) {
97 if (type == "fasta") { pattern = "[filename],[tag],contigs.fasta"; }
98 else if (type == "group") { pattern = "[filename],[tag],contigs.groups"; }
99 else if (type == "report") { pattern = "[filename],[tag],contigs.report"; }
100 else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
104 catch(exception& e) {
105 m->errorOut(e, "MakeContigsCommand", "getOutputPattern");
109 //**********************************************************************************************************************
110 MakeContigsCommand::MakeContigsCommand(){
112 abort = true; calledHelp = true;
114 vector<string> tempOutNames;
115 outputTypes["fasta"] = tempOutNames;
116 outputTypes["group"] = tempOutNames;
117 outputTypes["report"] = tempOutNames;
119 catch(exception& e) {
120 m->errorOut(e, "MakeContigsCommand", "MakeContigsCommand");
124 //**********************************************************************************************************************
125 MakeContigsCommand::MakeContigsCommand(string option) {
127 abort = false; calledHelp = false;
128 createFileGroup = false; createOligosGroup = false;
130 //allow user to run help
131 if(option == "help") { help(); abort = true; calledHelp = true; }
132 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
135 vector<string> myArray = setParameters();
137 OptionParser parser(option);
138 map<string, string> parameters = parser.getParameters();
140 ValidParameters validParameter("pairwise.seqs");
141 map<string, string>::iterator it;
143 //check to make sure all parameters are valid for command
144 for (it = parameters.begin(); it != parameters.end(); it++) {
145 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
148 //initialize outputTypes
149 vector<string> tempOutNames;
150 outputTypes["fasta"] = tempOutNames;
151 outputTypes["report"] = tempOutNames;
152 outputTypes["group"] = tempOutNames;
155 //if the user changes the input directory command factory will send this info to us in the output parameter
156 string inputDir = validParameter.validFile(parameters, "inputdir", false);
157 if (inputDir == "not found"){ inputDir = ""; }
160 it = parameters.find("ffastq");
161 //user has given a template file
162 if(it != parameters.end()){
163 path = m->hasPath(it->second);
164 //if the user has not given a path then, add inputdir. else leave path alone.
165 if (path == "") { parameters["ffastq"] = inputDir + it->second; }
168 it = parameters.find("rfastq");
169 //user has given a template file
170 if(it != parameters.end()){
171 path = m->hasPath(it->second);
172 //if the user has not given a path then, add inputdir. else leave path alone.
173 if (path == "") { parameters["rfastq"] = inputDir + it->second; }
176 it = parameters.find("ffasta");
177 //user has given a template file
178 if(it != parameters.end()){
179 path = m->hasPath(it->second);
180 //if the user has not given a path then, add inputdir. else leave path alone.
181 if (path == "") { parameters["ffasta"] = inputDir + it->second; }
184 it = parameters.find("rfasta");
185 //user has given a template file
186 if(it != parameters.end()){
187 path = m->hasPath(it->second);
188 //if the user has not given a path then, add inputdir. else leave path alone.
189 if (path == "") { parameters["rfasta"] = inputDir + it->second; }
192 it = parameters.find("fqfile");
193 //user has given a template file
194 if(it != parameters.end()){
195 path = m->hasPath(it->second);
196 //if the user has not given a path then, add inputdir. else leave path alone.
197 if (path == "") { parameters["fqfile"] = inputDir + it->second; }
200 it = parameters.find("rqfile");
201 //user has given a template file
202 if(it != parameters.end()){
203 path = m->hasPath(it->second);
204 //if the user has not given a path then, add inputdir. else leave path alone.
205 if (path == "") { parameters["rqfile"] = inputDir + it->second; }
208 it = parameters.find("file");
209 //user has given a template file
210 if(it != parameters.end()){
211 path = m->hasPath(it->second);
212 //if the user has not given a path then, add inputdir. else leave path alone.
213 if (path == "") { parameters["file"] = inputDir + it->second; }
216 it = parameters.find("oligos");
217 //user has given a template file
218 if(it != parameters.end()){
219 path = m->hasPath(it->second);
220 //if the user has not given a path then, add inputdir. else leave path alone.
221 if (path == "") { parameters["oligos"] = inputDir + it->second; }
224 it = parameters.find("findex");
225 //user has given a template file
226 if(it != parameters.end()){
227 path = m->hasPath(it->second);
228 //if the user has not given a path then, add inputdir. else leave path alone.
229 if (path == "") { parameters["findex"] = inputDir + it->second; }
232 it = parameters.find("rindex");
233 //user has given a template file
234 if(it != parameters.end()){
235 path = m->hasPath(it->second);
236 //if the user has not given a path then, add inputdir. else leave path alone.
237 if (path == "") { parameters["rindex"] = inputDir + it->second; }
241 ffastqfile = validParameter.validFile(parameters, "ffastq", true);
242 if (ffastqfile == "not open") { abort = true; }
243 else if (ffastqfile == "not found") { ffastqfile = ""; }
245 rfastqfile = validParameter.validFile(parameters, "rfastq", true);
246 if (rfastqfile == "not open") { abort = true; }
247 else if (rfastqfile == "not found") { rfastqfile = ""; }
249 ffastafile = validParameter.validFile(parameters, "ffasta", true);
250 if (ffastafile == "not open") { abort = true; }
251 else if (ffastafile == "not found") { ffastafile = ""; }
253 rfastafile = validParameter.validFile(parameters, "rfasta", true);
254 if (rfastafile == "not open") { abort = true; }
255 else if (rfastafile == "not found") { rfastafile = ""; }
257 fqualfile = validParameter.validFile(parameters, "fqfile", true);
258 if (fqualfile == "not open") { abort = true; }
259 else if (fqualfile == "not found") { fqualfile = ""; }
261 rqualfile = validParameter.validFile(parameters, "rqfile", true);
262 if (rqualfile == "not open") { abort = true; }
263 else if (rqualfile == "not found") { rqualfile = ""; }
265 file = validParameter.validFile(parameters, "file", true);
266 if (file == "not open") { abort = true; }
267 else if (file == "not found") { file = ""; }
270 if ((file == "") && (ffastafile == "") && (ffastqfile == "")) { abort = true; m->mothurOut("[ERROR]: The file, ffastq and rfastq or ffasta and rfasta parameters are required.\n"); }
271 if ((file != "") && ((ffastafile != "") || (ffastqfile != ""))) { abort = true; m->mothurOut("[ERROR]: The file, ffastq and rfastq or ffasta and rfasta parameters are required.\n"); }
272 if ((ffastqfile != "") && (rfastqfile == "")) { abort = true; m->mothurOut("[ERROR]: If you provide use the ffastq, you must provide a rfastq file.\n"); }
273 if ((ffastqfile == "") && (rfastqfile != "")) { abort = true; m->mothurOut("[ERROR]: If you provide use the rfastq, you must provide a ffastq file.\n"); }
274 if ((ffastafile != "") && (rfastafile == "")) { abort = true; m->mothurOut("[ERROR]: If you provide use the ffasta, you must provide a rfasta file.\n"); }
275 if ((ffastafile == "") && (rfastafile != "")) { abort = true; m->mothurOut("[ERROR]: If you provide use the rfasta, you must provide a ffasta file.\n"); }
276 if ((fqualfile != "") && (rqualfile == "")) { abort = true; m->mothurOut("[ERROR]: If you provide use the fqfile, you must provide a rqfile file.\n"); }
277 if ((fqualfile == "") && (rqualfile != "")) { abort = true; m->mothurOut("[ERROR]: If you provide use the rqfile, you must provide a fqfile file.\n"); }
278 if (((fqualfile != "") || (rqualfile != "")) && ((ffastafile == "") || (rfastafile == ""))) {
279 abort = true; m->mothurOut("[ERROR]: If you provide use the rqfile or fqfile file, you must provide the ffasta and rfasta parameters.\n");
282 oligosfile = validParameter.validFile(parameters, "oligos", true);
283 if (oligosfile == "not found") { oligosfile = ""; }
284 else if(oligosfile == "not open") { abort = true; }
285 else { m->setOligosFile(oligosfile); }
287 findexfile = validParameter.validFile(parameters, "findex", true);
288 if (findexfile == "not found") { findexfile = ""; }
289 else if(findexfile == "not open") { abort = true; }
291 rindexfile = validParameter.validFile(parameters, "rindex", true);
292 if (rindexfile == "not found") { rindexfile = ""; }
293 else if(rindexfile == "not open") { abort = true; }
295 if ((rindexfile != "") || (findexfile != "")) {
296 if (oligosfile == ""){
297 oligosfile = m->getOligosFile();
298 if (oligosfile != "") { m->mothurOut("Using " + oligosfile + " as input file for the oligos parameter.\n"); }
300 m->mothurOut("You need to provide an oligos file if you are going to use an index file.\n"); abort = true;
304 //can only use an index file with the fastq parameters not fasta and qual
305 if ((ffastafile != "") || (rfastafile != "")) {
306 m->mothurOut("[ERROR]: You can only use an index file with the fastq parameters or the file option.\n"); abort = true;
310 //if the user changes the output directory command factory will send this info to us in the output parameter
311 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
316 //check for optional parameter and set defaults
317 // ...at some point should added some additional type checking...
319 temp = validParameter.validFile(parameters, "match", false); if (temp == "not found"){ temp = "1.0"; }
320 m->mothurConvert(temp, match);
322 temp = validParameter.validFile(parameters, "mismatch", false); if (temp == "not found"){ temp = "-1.0"; }
323 m->mothurConvert(temp, misMatch);
324 if (misMatch > 0) { m->mothurOut("[ERROR]: mismatch must be negative.\n"); abort=true; }
326 temp = validParameter.validFile(parameters, "gapopen", false); if (temp == "not found"){ temp = "-2.0"; }
327 m->mothurConvert(temp, gapOpen);
328 if (gapOpen > 0) { m->mothurOut("[ERROR]: gapopen must be negative.\n"); abort=true; }
330 temp = validParameter.validFile(parameters, "gapextend", false); if (temp == "not found"){ temp = "-1.0"; }
331 m->mothurConvert(temp, gapExtend);
332 if (gapExtend > 0) { m->mothurOut("[ERROR]: gapextend must be negative.\n"); abort=true; }
334 temp = validParameter.validFile(parameters, "insert", false); if (temp == "not found"){ temp = "20"; }
335 m->mothurConvert(temp, insert);
336 if ((insert < 0) || (insert > 40)) { m->mothurOut("[ERROR]: insert must be between 0 and 40.\n"); abort=true; }
338 temp = validParameter.validFile(parameters, "deltaq", false); if (temp == "not found"){ temp = "6"; }
339 m->mothurConvert(temp, deltaq);
341 temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
342 m->setProcessors(temp);
343 m->mothurConvert(temp, processors);
345 temp = validParameter.validFile(parameters, "bdiffs", false); if (temp == "not found") { temp = "0"; }
346 m->mothurConvert(temp, bdiffs);
348 temp = validParameter.validFile(parameters, "pdiffs", false); if (temp == "not found") { temp = "0"; }
349 m->mothurConvert(temp, pdiffs);
351 // temp = validParameter.validFile(parameters, "ldiffs", false); if (temp == "not found") { temp = "0"; }
352 // m->mothurConvert(temp, ldiffs);
355 // temp = validParameter.validFile(parameters, "sdiffs", false); if (temp == "not found") { temp = "0"; }
356 // m->mothurConvert(temp, sdiffs);
359 temp = validParameter.validFile(parameters, "tdiffs", false); if (temp == "not found") { int tempTotal = pdiffs + bdiffs; temp = toString(tempTotal); }
360 m->mothurConvert(temp, tdiffs);
362 if(tdiffs == 0){ tdiffs = bdiffs + pdiffs; } //+ ldiffs + sdiffs;
364 temp = validParameter.validFile(parameters, "allfiles", false); if (temp == "not found") { temp = "F"; }
365 allFiles = m->isTrue(temp);
368 temp = validParameter.validFile(parameters, "trimoverlap", false); if (temp == "not found") { temp = "F"; }
369 trimOverlap = m->isTrue(temp);
371 align = validParameter.validFile(parameters, "align", false); if (align == "not found"){ align = "needleman"; }
372 if ((align != "needleman") && (align != "gotoh")) { m->mothurOut(align + " is not a valid alignment method. Options are needleman or gotoh. I will use needleman."); m->mothurOutEndLine(); align = "needleman"; }
374 format = validParameter.validFile(parameters, "format", false); if (format == "not found"){ format = "illumina1.8+"; }
376 if ((format != "sanger") && (format != "illumina") && (format != "illumina1.8+") && (format != "solexa")) {
377 m->mothurOut(format + " is not a valid format. Your format choices are sanger, solexa, illumina1.8+ and illumina, aborting." ); m->mothurOutEndLine();
381 //fill convert table - goes from solexa to sanger. Used fq_all2std.pl as a reference.
382 for (int i = -64; i < 65; i++) {
383 char temp = (char) ((int)(33 + 10*log(1+pow(10,(i/10.0)))/log(10)+0.499));
384 convertTable.push_back(temp);
389 catch(exception& e) {
390 m->errorOut(e, "MakeContigsCommand", "MakeContigsCommand");
394 //**********************************************************************************************************************
395 int MakeContigsCommand::execute(){
397 if (abort == true) { if (calledHelp) { return 0; } return 2; }
399 //read ffastq and rfastq files creating fasta and qual files.
400 //this function will create a forward and reverse, fasta and qual files for each processor.
401 //files has an entry for each processor. files[i][0] = forwardFasta, files[i][1] = forwardQual, files[i][2] = reverseFasta, files[i][3] = reverseQual. filesToProcess is for each filepair in the file parameter file. for ffastq and rfastq this will be size 1.
402 unsigned long int numReads = 0;
403 int start = time(NULL);
405 m->mothurOut("Reading fastq data...\n");
406 vector < vector< vector<string> > > filesToProcess = preProcessData(numReads);
407 m->mothurOut("Done.\n");
409 if (m->control_pressed) { return 0; }
411 map<string, string> cvars;
412 string compOutputDir = outputDir;
413 if (outputDir == "") { compOutputDir = m->hasPath(file); }
414 cvars["[filename]"] = compOutputDir + m->getRootName(m->getSimpleName(file));
416 string compositeGroupFile = getOutputFileName("group",cvars);
417 cvars["[tag]"] = "trim";
418 string compositeFastaFile = getOutputFileName("fasta",cvars);
419 cvars["[tag]"] = "scrap";
420 string compositeScrapFastaFile = getOutputFileName("fasta",cvars);
422 string compositeMisMatchFile = getOutputFileName("report",cvars);
424 if (filesToProcess.size() > 1) { //clear files for append below
425 ofstream outCTFasta, outCTQual, outCSFasta, outCSQual, outCMisMatch;
426 m->openOutputFile(compositeFastaFile, outCTFasta); outCTFasta.close();
427 m->openOutputFile(compositeScrapFastaFile, outCSFasta); outCSFasta.close();
428 m->openOutputFile(compositeMisMatchFile, outCMisMatch); outCMisMatch.close();
429 outputNames.push_back(compositeFastaFile); outputTypes["fasta"].push_back(compositeFastaFile);
430 outputNames.push_back(compositeMisMatchFile); outputTypes["report"].push_back(compositeMisMatchFile);
431 outputNames.push_back(compositeScrapFastaFile); outputTypes["fasta"].push_back(compositeScrapFastaFile);
434 map<string, int> totalGroupCounts;
436 for (int l = 0; l < filesToProcess.size(); l++) {
438 m->mothurOut("\n>>>>>\tProcessing " + filesToProcess[l][0][0] + " (file " + toString(l+1) + " of " + toString(filesToProcess.size()) + ")\t<<<<<\n");
442 vector<vector<string> > fastaFileNames;
443 createOligosGroup = false;
444 string outputGroupFileName;
445 map<string, string> variables;
446 string thisOutputDir = outputDir;
447 if (outputDir == "") { thisOutputDir = m->hasPath(filesToProcess[l][0][0]); }
448 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(filesToProcess[l][0][0]));
449 variables["[tag]"] = "";
450 if(oligosfile != ""){ createOligosGroup = getOligos(fastaFileNames, variables["[filename]"]); }
451 if (createOligosGroup || createFileGroup) {
452 outputGroupFileName = getOutputFileName("group",variables);
455 //give group in file file precedence
456 if (createFileGroup) { createOligosGroup = false; }
458 variables["[tag]"] = "trim";
459 string outFastaFile = getOutputFileName("fasta",variables);
460 variables["[tag]"] = "scrap";
461 string outScrapFastaFile = getOutputFileName("fasta",variables);
462 variables["[tag]"] = "";
463 string outMisMatchFile = getOutputFileName("report",variables);
465 m->mothurOut("Making contigs...\n");
466 createProcesses(filesToProcess[l], outFastaFile, outScrapFastaFile, outMisMatchFile, fastaFileNames, l);
468 //remove temp fasta and qual files
469 for (int i = 0; i < processors; i++) { for(int j = 0; j < filesToProcess[l][i].size(); j++) { m->mothurRemove(filesToProcess[l][i][j]); } }
471 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
474 map<string, string> uniqueFastaNames;// so we don't add the same groupfile multiple times
475 map<string, string>::iterator it;
476 set<string> namesToRemove;
477 for(int i=0;i<fastaFileNames.size();i++){
478 for(int j=0;j<fastaFileNames[0].size();j++){
479 if (fastaFileNames[i][j] != "") {
480 if (namesToRemove.count(fastaFileNames[i][j]) == 0) {
481 if(m->isBlank(fastaFileNames[i][j])){
482 m->mothurRemove(fastaFileNames[i][j]);
483 namesToRemove.insert(fastaFileNames[i][j]);
485 it = uniqueFastaNames.find(fastaFileNames[i][j]);
486 if (it == uniqueFastaNames.end()) {
487 uniqueFastaNames[fastaFileNames[i][j]] = barcodeNameVector[i];
495 //remove names for outputFileNames, just cleans up the output
496 vector<string> outputNames2;
497 for(int i = 0; i < outputNames.size(); i++) { if (namesToRemove.count(outputNames[i]) == 0) { outputNames2.push_back(outputNames[i]); } }
498 outputNames = outputNames2;
500 for (it = uniqueFastaNames.begin(); it != uniqueFastaNames.end(); it++) {
502 m->openInputFile(it->first, in);
505 string thisGroupName = thisOutputDir + m->getRootName(m->getSimpleName(it->first));
506 thisGroupName += getOutputFileName("group",variables); outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName);
507 m->openOutputFile(thisGroupName, out);
510 if (m->control_pressed) { break; }
512 Sequence currSeq(in); m->gobble(in);
513 out << currSeq.getName() << '\t' << it->second << endl;
520 if (createFileGroup || createOligosGroup) {
522 m->openOutputFile(outputGroupFileName, outGroup);
523 for (map<string, string>::iterator itGroup = groupMap.begin(); itGroup != groupMap.end(); itGroup++) {
524 outGroup << itGroup->first << '\t' << itGroup->second << endl;
529 if (filesToProcess.size() > 1) { //merge into large combo files
530 if (createFileGroup || createOligosGroup) {
533 m->openOutputFile(compositeGroupFile, outCGroup); outCGroup.close();
534 outputNames.push_back(compositeGroupFile); outputTypes["group"].push_back(compositeGroupFile);
536 m->appendFiles(outputGroupFileName, compositeGroupFile);
537 if (!allFiles) { m->mothurRemove(outputGroupFileName); }
538 else { outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName); }
540 for (map<string, int>::iterator itGroups = groupCounts.begin(); itGroups != groupCounts.end(); itGroups++) {
541 map<string, int>::iterator itTemp = totalGroupCounts.find(itGroups->first);
542 if (itTemp == totalGroupCounts.end()) { totalGroupCounts[itGroups->first] = itGroups->second; } //new group create it in totalGroups
543 else { itTemp->second += itGroups->second; } //existing group, update total
546 if (l == 0) { m->appendFiles(outMisMatchFile, compositeMisMatchFile); }
547 else { m->appendFilesWithoutHeaders(outMisMatchFile, compositeMisMatchFile); }
548 m->appendFiles(outFastaFile, compositeFastaFile);
549 m->appendFiles(outScrapFastaFile, compositeScrapFastaFile);
551 m->mothurRemove(outMisMatchFile);
552 m->mothurRemove(outFastaFile);
553 m->mothurRemove(outScrapFastaFile);
555 outputNames.push_back(outFastaFile); outputTypes["fasta"].push_back(outFastaFile);
556 outputNames.push_back(outScrapFastaFile); outputTypes["fasta"].push_back(outScrapFastaFile);
557 outputNames.push_back(outMisMatchFile); outputTypes["report"].push_back(outMisMatchFile);
560 totalGroupCounts = groupCounts;
561 outputNames.push_back(outFastaFile); outputTypes["fasta"].push_back(outFastaFile);
562 outputNames.push_back(outScrapFastaFile); outputTypes["fasta"].push_back(outScrapFastaFile);
563 outputNames.push_back(outMisMatchFile); outputTypes["report"].push_back(outMisMatchFile);
564 if (createFileGroup || createOligosGroup) {
565 outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName);
568 m->mothurOut("Done.\n");
571 m->mothurOut("It took " + toString(time(NULL) - start) + " secs to process " + toString(numReads) + " sequences.\n");
573 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
575 //output group counts
576 m->mothurOutEndLine();
578 if (totalGroupCounts.size() != 0) { m->mothurOut("Group count: \n"); }
579 for (map<string, int>::iterator it = totalGroupCounts.begin(); it != totalGroupCounts.end(); it++) {
580 total += it->second; m->mothurOut(it->first + "\t" + toString(it->second)); m->mothurOutEndLine();
582 if (total != 0) { m->mothurOut("Total of all groups is " + toString(total)); m->mothurOutEndLine(); }
584 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
586 string currentFasta = "";
587 itTypes = outputTypes.find("fasta");
588 if (itTypes != outputTypes.end()) {
589 if ((itTypes->second).size() != 0) { currentFasta = (itTypes->second)[0]; m->setFastaFile(currentFasta); }
592 string currentGroup = "";
593 itTypes = outputTypes.find("group");
594 if (itTypes != outputTypes.end()) {
595 if ((itTypes->second).size() != 0) { currentGroup = (itTypes->second)[0]; m->setGroupFile(currentGroup); }
598 //output files created by command
599 m->mothurOutEndLine();
600 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
601 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
602 m->mothurOutEndLine();
606 catch(exception& e) {
607 m->errorOut(e, "MakeContigsCommand", "execute");
611 //**********************************************************************************************************************
612 vector< vector< vector<string> > > MakeContigsCommand::preProcessData(unsigned long int& numReads) {
614 vector< vector< vector<string> > > filesToProcess;
616 if (ffastqfile != "") { //reading one file
617 vector< vector<string> > files = readFastqFiles(numReads, ffastqfile, rfastqfile, findexfile, rindexfile);
618 //adjust for really large processors or really small files
619 if (numReads == 0) { m->mothurOut("[ERROR]: no good reads.\n"); m->control_pressed = true; }
620 if (numReads < processors) {
621 for (int i = numReads; i < processors; i++) { for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); } files[i].clear(); }
622 files.resize(numReads);
623 processors = numReads;
625 filesToProcess.push_back(files);
626 }else if (file != "") { //reading multiple files
627 //return only valid pairs
628 vector< vector<string> > filePairsToProcess = readFileNames(file);
630 if (m->control_pressed) { return filesToProcess; }
632 if (filePairsToProcess.size() != 0) {
633 for (int i = 0; i < filePairsToProcess.size(); i++) {
635 if (m->control_pressed) { for (int l = 0; l < filesToProcess.size(); l++) { for (int k = 0; k < filesToProcess[l].size(); k++) { for(int j = 0; j < filesToProcess[l][k].size(); j++) { m->mothurRemove(filesToProcess[l][k][j]); } filesToProcess[l][k].clear(); } return filesToProcess; } }
637 unsigned long int thisFilesReads;
638 vector< vector<string> > files = readFastqFiles(thisFilesReads, filePairsToProcess[i][0], filePairsToProcess[i][1], filePairsToProcess[i][2], filePairsToProcess[i][3]);
640 //adjust for really large processors or really small files
641 if (thisFilesReads < processors) {
642 m->mothurOut("[ERROR]: " + filePairsToProcess[i][0] + " has less than " + toString(processors) + " good reads, skipping\n");
643 for (int k = 0; k < files.size(); k++) { for(int j = 0; j < files[k].size(); j++) { m->mothurRemove(files[k][j]); } files[k].clear(); }
644 //remove from file2Group if necassary
645 map<int, string> cFile2Group;
646 for (map<int, string>::iterator it = file2Group.begin(); it != file2Group.end(); it++) {
647 if ((it->first) < i) { cFile2Group[it->first] = it->second; }
648 else if ((it->first) == i) { } //do nothing, we removed files for i
649 else { cFile2Group[(it->first-1)] = it->second; } //adjust files because i was removed
651 file2Group = cFile2Group;
653 filesToProcess.push_back(files);
654 numReads += thisFilesReads;
658 if (numReads == 0) { m->control_pressed = true; }
660 }else if (ffastafile != "") {
661 vector< vector<string> > files = readFastaFiles(numReads, ffastafile, rfastafile);
662 //adjust for really large processors or really small files
663 if (numReads == 0) { m->mothurOut("[ERROR]: no good reads.\n"); m->control_pressed = true; }
664 if (numReads < processors) {
665 for (int i = numReads; i < processors; i++) { for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); } files[i].clear(); }
666 files.resize(numReads);
667 processors = numReads;
669 filesToProcess.push_back(files);
670 }else { m->control_pressed = true; } //should not get here
672 return filesToProcess;
674 catch(exception& e) {
675 m->errorOut(e, "MakeContigsCommand", "preProcessData");
679 //**********************************************************************************************************************
680 int MakeContigsCommand::createProcesses(vector< vector<string> > files, string outputFasta, string outputScrapFasta, string outputMisMatches, vector<vector<string> > fastaFileNames, int index) {
683 vector<int> processIDS;
685 map<int, string>::iterator it = file2Group.find(index);
686 if (it != file2Group.end()) { group = it->second; }
688 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
691 //loop through and create all the processes you want
692 while (process != processors-1) {
696 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
699 vector<vector<string> > tempFASTAFileNames = fastaFileNames;
704 for(int i=0;i<tempFASTAFileNames.size();i++){
705 for(int j=0;j<tempFASTAFileNames[i].size();j++){
706 if (tempFASTAFileNames[i][j] != "") {
707 tempFASTAFileNames[i][j] += m->mothurGetpid(process) + ".temp";
708 m->openOutputFile(tempFASTAFileNames[i][j], temp); temp.close();
714 num = driver(files[process],
715 outputFasta + m->mothurGetpid(process) + ".temp",
716 outputScrapFasta + m->mothurGetpid(process) + ".temp",
717 outputMisMatches + m->mothurGetpid(process) + ".temp",
718 tempFASTAFileNames, process, group);
720 //pass groupCounts to parent
722 string tempFile = m->mothurGetpid(process) + ".num.temp";
723 m->openOutputFile(tempFile, out);
725 if (createFileGroup || createOligosGroup) {
726 out << groupCounts.size() << endl;
728 for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
729 out << it->first << '\t' << it->second << endl;
732 out << groupMap.size() << endl;
733 for (map<string, string>::iterator it = groupMap.begin(); it != groupMap.end(); it++) {
734 out << it->first << '\t' << it->second << endl;
741 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
742 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
748 m->openOutputFile(outputFasta, temp); temp.close();
749 m->openOutputFile(outputScrapFasta, temp); temp.close();
752 num = driver(files[processors-1], outputFasta, outputScrapFasta, outputMisMatches, fastaFileNames, processors-1, group);
754 //force parent to wait until all the processes are done
755 for (int i=0;i<processIDS.size();i++) {
756 int temp = processIDS[i];
760 for (int i = 0; i < processIDS.size(); i++) {
762 string tempFile = toString(processIDS[i]) + ".num.temp";
763 m->openInputFile(tempFile, in);
765 in >> tempNum; num += tempNum; m->gobble(in);
767 if (createFileGroup || createOligosGroup) {
769 in >> tempNum; m->gobble(in);
772 for (int j = 0; j < tempNum; j++) {
774 in >> group >> groupNum; m->gobble(in);
776 map<string, int>::iterator it = groupCounts.find(group);
777 if (it == groupCounts.end()) { groupCounts[group] = groupNum; }
778 else { groupCounts[it->first] += groupNum; }
781 in >> tempNum; m->gobble(in);
783 for (int j = 0; j < tempNum; j++) {
784 string group, seqName;
785 in >> seqName >> group; m->gobble(in);
787 map<string, string>::iterator it = groupMap.find(seqName);
788 if (it == groupMap.end()) { groupMap[seqName] = group; }
789 else { m->mothurOut("[ERROR]: " + seqName + " is in your fasta file more than once. Sequence names must be unique. please correct.\n"); }
793 in.close(); m->mothurRemove(tempFile);
797 //////////////////////////////////////////////////////////////////////////////////////////////////////
798 //Windows version shared memory, so be careful when passing variables through the contigsData struct.
799 //Above fork() will clone, so memory is separate, but that's not the case with windows,
800 //////////////////////////////////////////////////////////////////////////////////////////////////////
802 vector<contigsData*> pDataArray;
803 DWORD dwThreadIdArray[processors-1];
804 HANDLE hThreadArray[processors-1];
806 //Create processor worker threads.
807 for( int h=0; h<processors-1; h++ ){
808 string extension = "";
809 if (h != 0) { extension = toString(h) + ".temp"; processIDS.push_back(h); }
810 vector<vector<string> > tempFASTAFileNames = fastaFileNames;
815 for(int i=0;i<tempFASTAFileNames.size();i++){
816 for(int j=0;j<tempFASTAFileNames[i].size();j++){
817 if (tempFASTAFileNames[i][j] != "") {
818 tempFASTAFileNames[i][j] += extension;
819 m->openOutputFile(tempFASTAFileNames[i][j], temp); temp.close();
825 contigsData* tempcontig = new contigsData(group, files[h], (outputFasta + extension), (outputScrapFasta + extension), (outputMisMatches + extension), align, m, match, misMatch, gapOpen, gapExtend, insert, deltaq, barcodes, primers, tempFASTAFileNames, barcodeNameVector, primerNameVector, pdiffs, bdiffs, tdiffs, createOligosGroup, createFileGroup, allFiles, trimOverlap, h);
826 pDataArray.push_back(tempcontig);
828 hThreadArray[h] = CreateThread(NULL, 0, MyContigsThreadFunction, pDataArray[h], 0, &dwThreadIdArray[h]);
831 vector<vector<string> > tempFASTAFileNames = fastaFileNames;
835 string extension = toString(processors-1) + ".temp";
837 for(int i=0;i<tempFASTAFileNames.size();i++){
838 for(int j=0;j<tempFASTAFileNames[i].size();j++){
839 if (tempFASTAFileNames[i][j] != "") {
840 tempFASTAFileNames[i][j] += extension;
841 m->openOutputFile(tempFASTAFileNames[i][j], temp); temp.close();
849 m->openOutputFile(outputFasta, temp); temp.close();
850 m->openOutputFile(outputScrapFasta, temp); temp.close();
853 processIDS.push_back(processors-1);
854 num = driver(files[processors-1], (outputFasta+ toString(processors-1) + ".temp"), (outputScrapFasta+ toString(processors-1) + ".temp"), (outputMisMatches+ toString(processors-1) + ".temp"), tempFASTAFileNames, processors-1, group);
856 //Wait until all threads have terminated.
857 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
859 //Close all thread handles and free memory allocations.
860 for(int i=0; i < pDataArray.size(); i++){
861 num += pDataArray[i]->count;
862 if (!pDataArray[i]->done) {
863 m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of sequences assigned to it, quitting. \n"); m->control_pressed = true;
865 for (map<string, int>::iterator it = pDataArray[i]->groupCounts.begin(); it != pDataArray[i]->groupCounts.end(); it++) {
866 map<string, int>::iterator it2 = groupCounts.find(it->first);
867 if (it2 == groupCounts.end()) { groupCounts[it->first] = it->second; }
868 else { groupCounts[it->first] += it->second; }
870 for (map<string, string>::iterator it = pDataArray[i]->groupMap.begin(); it != pDataArray[i]->groupMap.end(); it++) {
871 map<string, string>::iterator it2 = groupMap.find(it->first);
872 if (it2 == groupMap.end()) { groupMap[it->first] = it->second; }
873 else { m->mothurOut("[ERROR]: " + it->first + " is in your fasta file more than once. Sequence names must be unique. please correct.\n"); }
875 CloseHandle(hThreadArray[i]);
876 delete pDataArray[i];
881 for (int i = 0; i < processIDS.size(); i++) {
882 m->appendFiles((outputFasta + toString(processIDS[i]) + ".temp"), outputFasta);
883 m->mothurRemove((outputFasta + toString(processIDS[i]) + ".temp"));
885 m->appendFiles((outputScrapFasta + toString(processIDS[i]) + ".temp"), outputScrapFasta);
886 m->mothurRemove((outputScrapFasta + toString(processIDS[i]) + ".temp"));
888 m->appendFilesWithoutHeaders((outputMisMatches + toString(processIDS[i]) + ".temp"), outputMisMatches);
889 m->mothurRemove((outputMisMatches + toString(processIDS[i]) + ".temp"));
892 for(int j=0;j<fastaFileNames.size();j++){
893 for(int k=0;k<fastaFileNames[j].size();k++){
894 if (fastaFileNames[j][k] != "") {
895 m->appendFiles((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"), fastaFileNames[j][k]);
896 m->mothurRemove((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"));
905 catch(exception& e) {
906 m->errorOut(e, "MakeContigsCommand", "createProcesses");
910 //**********************************************************************************************************************
911 int MakeContigsCommand::driver(vector<string> files, string outputFasta, string outputScrapFasta, string outputMisMatches, vector<vector<string> > fastaFileNames, int process, string group){
914 Alignment* alignment;
915 if(align == "gotoh") { alignment = new GotohOverlap(gapOpen, gapExtend, match, misMatch, longestBase); }
916 else if(align == "needleman") { alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase); }
919 string thisffastafile = files[0];
920 string thisfqualfile = files[1];
921 string thisrfastafile = files[2];
922 string thisrqualfile = files[3];
923 string thisfindexfile = files[4];
924 string thisrindexfile = files[5];
926 if (m->debug) { m->mothurOut("[DEBUG]: ffasta = " + thisffastafile + ".\n[DEBUG]: fqual = " + thisfqualfile + ".\n[DEBUG]: rfasta = " + thisrfastafile + ".\n[DEBUG]: rqual = " + thisrqualfile + ".\n[DEBUG]: findex = " + thisfindexfile + ".\n[DEBUG]: rindex = " + thisrindexfile + ".\n"); }
928 ifstream inFFasta, inRFasta, inFQual, inRQual, inFIndex, inRIndex;
929 ofstream outFasta, outMisMatch, outScrapFasta;
930 m->openInputFile(thisffastafile, inFFasta);
931 m->openInputFile(thisrfastafile, inRFasta);
932 if (thisfqualfile != "") {
933 m->openInputFile(thisfqualfile, inFQual);
934 m->openInputFile(thisrqualfile, inRQual);
937 if (thisfindexfile != "") { m->openInputFile(thisfindexfile, inFIndex); }
938 if (thisrindexfile != "") { m->openInputFile(thisrindexfile, inRIndex); }
940 m->openOutputFile(outputFasta, outFasta);
941 m->openOutputFile(outputScrapFasta, outScrapFasta);
942 m->openOutputFile(outputMisMatches, outMisMatch);
943 outMisMatch << "Name\tLength\tOverlap_Length\tOverlap_Start\tOverlap_End\tMisMatches\tNum_Ns\n";
945 TrimOligos trimOligos(pdiffs, bdiffs, 0, 0, primers, barcodes);
947 while ((!inFFasta.eof()) && (!inRFasta.eof())) {
949 if (m->control_pressed) { break; }
952 string trashCode = "";
953 int currentSeqsDiffs = 0;
955 //read seqs and quality info
956 Sequence fSeq(inFFasta); m->gobble(inFFasta);
957 Sequence rSeq(inRFasta); m->gobble(inRFasta);
958 QualityScores* fQual = NULL; QualityScores* rQual = NULL;
959 if (thisfqualfile != "") {
960 fQual = new QualityScores(inFQual); m->gobble(inFQual);
961 rQual = new QualityScores(inRQual); m->gobble(inRQual);
963 Sequence findexBarcode("findex", "NONE"); Sequence rindexBarcode("rindex", "NONE");
964 if (thisfindexfile != "") {
965 Sequence temp(inFIndex); m->gobble(inFIndex);
966 findexBarcode.setAligned(temp.getAligned());
969 if (thisrindexfile != "") {
970 Sequence temp(inRIndex); m->gobble(inRIndex);
971 rindexBarcode.setAligned(temp.getAligned());
974 int barcodeIndex = 0;
977 if(barcodes.size() != 0){
978 if (thisfqualfile != "") {
979 if ((thisfindexfile != "") || (thisrindexfile != "")) {
980 success = trimOligos.stripBarcode(findexBarcode, rindexBarcode, *fQual, *rQual, barcodeIndex);
982 success = trimOligos.stripBarcode(fSeq, rSeq, *fQual, *rQual, barcodeIndex);
985 success = trimOligos.stripBarcode(fSeq, rSeq, barcodeIndex);
987 if(success > bdiffs) { trashCode += 'b'; }
988 else{ currentSeqsDiffs += success; }
991 if(primers.size() != 0){
992 if (thisfqualfile != "") {
993 success = trimOligos.stripForward(fSeq, rSeq, *fQual, *rQual, primerIndex);
995 success = trimOligos.stripForward(fSeq, rSeq, primerIndex);
997 if(success > pdiffs) { trashCode += 'f'; }
998 else{ currentSeqsDiffs += success; }
1001 if (currentSeqsDiffs > tdiffs) { trashCode += 't'; }
1003 //flip the reverse reads
1004 rSeq.reverseComplement();
1005 if (thisfqualfile != "") { rQual->flipQScores(); }
1008 alignment->align(fSeq.getUnaligned(), rSeq.getUnaligned());
1009 map<int, int> ABaseMap = alignment->getSeqAAlnBaseMap();
1010 map<int, int> BBaseMap = alignment->getSeqBAlnBaseMap();
1011 fSeq.setAligned(alignment->getSeqAAln());
1012 rSeq.setAligned(alignment->getSeqBAln());
1013 int length = fSeq.getAligned().length();
1015 //traverse alignments merging into one contiguous seq
1017 int numMismatches = 0;
1018 string seq1 = fSeq.getAligned();
1019 string seq2 = rSeq.getAligned();
1020 vector<int> scores1, scores2;
1021 if (thisfqualfile != "") {
1022 scores1 = fQual->getQualityScores();
1023 scores2 = rQual->getQualityScores();
1024 delete fQual; delete rQual;
1027 // if (num < 5) { cout << fSeq.getStartPos() << '\t' << fSeq.getEndPos() << '\t' << rSeq.getStartPos() << '\t' << rSeq.getEndPos() << endl; }
1028 int overlapStart = fSeq.getStartPos();
1029 int seq2Start = rSeq.getStartPos();
1031 //bigger of the 2 starting positions is the location of the overlapping start
1032 if (overlapStart < seq2Start) { //seq2 starts later so take from 0 to seq2Start from seq1
1033 overlapStart = seq2Start;
1034 for (int i = 0; i < overlapStart; i++) { contig += seq1[i]; }
1035 }else { //seq1 starts later so take from 0 to overlapStart from seq2
1036 for (int i = 0; i < overlapStart; i++) { contig += seq2[i]; }
1039 int seq1End = fSeq.getEndPos();
1040 int seq2End = rSeq.getEndPos();
1041 int overlapEnd = seq1End;
1042 if (seq2End < overlapEnd) { overlapEnd = seq2End; } //smallest end position is where overlapping ends
1044 int oStart = contig.length();
1045 //cout << fSeq.getAligned() << endl; cout << rSeq.getAligned() << endl;
1046 for (int i = overlapStart; i < overlapEnd; i++) {
1047 //cout << seq1[i] << ' ' << seq2[i] << ' ' << scores1[ABaseMap[i]] << ' ' << scores2[BBaseMap[i]] << endl;
1048 if (seq1[i] == seq2[i]) { //match, add base and choose highest score
1050 }else if (((seq1[i] == '.') || (seq1[i] == '-')) && ((seq2[i] != '-') && (seq2[i] != '.'))) { //seq1 is a gap and seq2 is a base, choose seq2, unless quality score for base is below insert. In that case eliminate base
1051 if (thisfqualfile != "") {
1052 if (scores2[BBaseMap[i]] <= insert) { } //
1053 else { contig += seq2[i]; }
1054 }else { contig += seq2[i]; } //with no quality info, then we keep it?
1055 }else if (((seq2[i] == '.') || (seq2[i] == '-')) && ((seq1[i] != '-') && (seq1[i] != '.'))) { //seq2 is a gap and seq1 is a base, choose seq1, unless quality score for base is below insert. In that case eliminate base
1056 if (thisfqualfile != "") {
1057 if (scores1[ABaseMap[i]] <= insert) { } //
1058 else { contig += seq1[i]; }
1059 }else { contig += seq1[i]; } //with no quality info, then we keep it?
1060 }else if (((seq1[i] != '-') && (seq1[i] != '.')) && ((seq2[i] != '-') && (seq2[i] != '.'))) { //both bases choose one with better quality
1061 if (thisfqualfile != "") {
1062 if (ABaseMap[i] <= scores1.size() && BBaseMap[i] <= scores2.size() && // this check is dumb; we should determine this earlier
1063 abs(scores1[ABaseMap[i]] - scores2[BBaseMap[i]]) >= deltaq) { //is the difference in qual scores >= deltaq, if yes choose base with higher score
1065 if (scores1[ABaseMap[i]] < scores2[BBaseMap[i]]) { c = seq2[i]; }
1067 }else { //if no, base becomes n
1071 }else { numMismatches++; } //cant decide, so eliminate and mark as mismatch
1072 }else { //should never get here
1073 m->mothurOut("[ERROR]: case I didn't think of seq1 = " + toString(seq1[i]) + " and seq2 = " + toString(seq2[i]) + "\n");
1076 int oend = contig.length();
1077 if (seq1End < seq2End) { //seq1 ends before seq2 so take from overlap to length from seq2
1078 for (int i = overlapEnd; i < length; i++) { contig += seq2[i]; }
1079 }else { //seq2 ends before seq1 so take from overlap to length from seq1
1080 for (int i = overlapEnd; i < length; i++) { contig += seq1[i]; }
1082 //cout << contig << endl;
1084 if (trimOverlap) { contig = contig.substr(overlapStart-1, oend-oStart); if (contig.length() == 0) { trashCode += "l"; } }
1086 if(trashCode.length() == 0){
1087 bool ignore = false;
1089 if (m->debug) { m->mothurOut(fSeq.getName()); }
1091 if (createOligosGroup) {
1092 if(barcodes.size() != 0){
1093 string thisGroup = barcodeNameVector[barcodeIndex];
1094 if (primers.size() != 0) {
1095 if (primerNameVector[primerIndex] != "") {
1096 if(thisGroup != "") {
1097 thisGroup += "." + primerNameVector[primerIndex];
1099 thisGroup = primerNameVector[primerIndex];
1104 if (m->debug) { m->mothurOut(", group= " + thisGroup + "\n"); }
1106 int pos = thisGroup.find("ignore");
1107 if (pos == string::npos) {
1108 groupMap[fSeq.getName()] = thisGroup;
1110 map<string, int>::iterator it = groupCounts.find(thisGroup);
1111 if (it == groupCounts.end()) { groupCounts[thisGroup] = 1; }
1112 else { groupCounts[it->first] ++; }
1113 }else { ignore = true; }
1116 }else if (createFileGroup) {
1117 int pos = group.find("ignore");
1118 if (pos == string::npos) {
1119 groupMap[fSeq.getName()] = group;
1121 map<string, int>::iterator it = groupCounts.find(group);
1122 if (it == groupCounts.end()) { groupCounts[group] = 1; }
1123 else { groupCounts[it->first] ++; }
1124 }else { ignore = true; }
1126 if (m->debug) { m->mothurOut("\n"); }
1128 if(allFiles && !ignore){
1130 m->openOutputFileAppend(fastaFileNames[barcodeIndex][primerIndex], output);
1131 output << ">" << fSeq.getName() << endl << contig << endl;
1136 outFasta << ">" << fSeq.getName() << endl << contig << endl;
1138 for (int i = 0; i < contig.length(); i++) { if (contig[i] == 'N') { numNs++; } }
1139 outMisMatch << fSeq.getName() << '\t' << contig.length() << '\t' << (oend-oStart) << '\t' << oStart << '\t' << oend << '\t' << numMismatches << '\t' << numNs << endl;
1142 outScrapFasta << ">" << fSeq.getName() << " | " << trashCode << endl << contig << endl;
1147 if((num) % 1000 == 0){ m->mothurOutJustToScreen(toString(num)); m->mothurOutEndLine(); }
1151 if((num) % 1000 != 0){ m->mothurOut(toString(num)); m->mothurOutEndLine(); }
1156 outScrapFasta.close();
1157 outMisMatch.close();
1158 if (thisfqualfile != "") {
1164 if (m->control_pressed) { m->mothurRemove(outputFasta); m->mothurRemove(outputScrapFasta);m->mothurRemove(outputMisMatches); }
1168 catch(exception& e) {
1169 m->errorOut(e, "MakeContigsCommand", "driver");
1173 //**********************************************************************************************************************
1174 vector< vector<string> > MakeContigsCommand::readFastqFiles(unsigned long int& count, string ffastq, string rfastq, string findex, string rindex){
1176 vector< vector<string> > files;
1177 //maps processors number to file pointer
1178 map<int, vector<ofstream*> > tempfiles; //tempfiles[0] = forwardFasta, [1] = forwardQual, [2] = reverseFasta, [3] = reverseQual, tempfiles[4] = forwardIndex, [4] = forwardReverse
1179 map<int, vector<ofstream*> >::iterator it;
1181 //create files to write to
1182 for (int i = 0; i < processors; i++) {
1183 vector<ofstream*> temp;
1184 ofstream* outFF = new ofstream; temp.push_back(outFF);
1185 ofstream* outFQ = new ofstream; temp.push_back(outFQ);
1186 ofstream* outRF = new ofstream; temp.push_back(outRF);
1187 ofstream* outRQ = new ofstream; temp.push_back(outRQ);
1188 ofstream* outFI = new ofstream; temp.push_back(outFI);
1189 ofstream* outRI = new ofstream; temp.push_back(outRI);
1190 tempfiles[i] = temp;
1192 vector<string> names;
1193 string thisOutputDir = outputDir;
1194 if (outputDir == "") { thisOutputDir = m->hasPath(ffastq); }
1195 string ffastafilename = thisOutputDir + m->getRootName(m->getSimpleName(ffastq)) + toString(i) + "ffastatemp";
1196 string rfastafilename = thisOutputDir + m->getRootName(m->getSimpleName(rfastq)) + toString(i) + "rfastatemp";
1197 string fqualfilename = thisOutputDir + m->getRootName(m->getSimpleName(ffastq)) + toString(i) + "fqualtemp";
1198 string rqualfilename = thisOutputDir + m->getRootName(m->getSimpleName(rfastq)) + toString(i) + "rqualtemp";
1199 string findexfilename = ""; string rindexfilename = "";
1200 noneOk = false; //flag to oligos file read that its okay to allow for non paired barcodes
1201 if (findex != "") { findexfilename = thisOutputDir + m->getRootName(m->getSimpleName(findex)) + toString(i) + "findextemp"; m->openOutputFile(findexfilename, *outFI); noneOk = true; }
1202 if (rindex != "") { rindexfilename = thisOutputDir + m->getRootName(m->getSimpleName(rindex)) + toString(i) + "rindextemp"; m->openOutputFile(rindexfilename, *outRI); noneOk = true; }
1203 names.push_back(ffastafilename); names.push_back(fqualfilename);
1204 names.push_back(rfastafilename); names.push_back(rqualfilename);
1205 names.push_back(findexfilename); names.push_back(rindexfilename);
1206 files.push_back(names);
1208 m->openOutputFile(ffastafilename, *outFF);
1209 m->openOutputFile(rfastafilename, *outRF);
1210 m->openOutputFile(fqualfilename, *outFQ);
1211 m->openOutputFile(rqualfilename, *outRQ);
1214 if (m->control_pressed) {
1215 //close files, delete ofstreams
1216 for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close(); delete (it->second)[i]; } }
1218 for (int i = 0; i < files.size(); i++) {
1219 for(int j = 0; j < files[i].size(); j++) { if (files[i][j] != "") { m->mothurRemove(files[i][j]); } }
1224 m->openInputFile(ffastq, inForward);
1227 m->openInputFile(rfastq, inReverse);
1229 ifstream infIndex, inrIndex;
1230 bool findexIsGood = false;
1231 bool rindexIsGood = false;
1232 if (findex != "") { m->openInputFile(findex, infIndex); findexIsGood = true; }
1233 if (rindex != "") { m->openInputFile(rindex, inrIndex); rindexIsGood = true; }
1236 map<string, fastqRead> uniques;
1237 map<string, fastqRead> iUniques;
1238 map<string, pairFastqRead> pairUniques;
1239 map<string, fastqRead>::iterator itUniques;
1240 while ((!inForward.eof()) || (!inReverse.eof()) || (findexIsGood) || (rindexIsGood)) {
1242 if (m->control_pressed) { for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close(); delete (it->second)[i]; } } for (int i = 0; i < files.size(); i++) { for(int j = 0; j < files[i].size(); j++) { if (files[i][j] != "") { m->mothurRemove(files[i][j]); } } } inForward.close(); inReverse.close(); if (findex != "") { infIndex.close(); } if (findex != "") { inrIndex.close(); } return files; }
1244 //get a read from forward and reverse fastq files
1245 bool ignoref, ignorer, ignorefi, ignoreri;
1246 fastqRead thisFread, thisRread, thisFIread, thisRIread;
1247 if (!inForward.eof()) { thisFread = readFastq(inForward, ignoref); }
1248 else { ignoref = true; }
1249 if (!inReverse.eof()) { thisRread = readFastq(inReverse, ignorer); }
1250 else { ignorer = true; }
1251 if (findexIsGood) { thisFIread = readFastq(infIndex, ignorefi); if (infIndex.eof()) { findexIsGood = false; } }
1252 else { ignorefi = true; }
1253 if (rindexIsGood) { thisRIread = readFastq(inrIndex, ignoreri); if (inrIndex.eof()) { rindexIsGood = false; } }
1254 else { ignoreri = true; }
1256 bool allowOne = false;
1257 if ((findex == "") || (rindex == "")) { allowOne = true; }
1258 vector<pairFastqRead> frReads = getReads(ignoref, ignorer, thisFread, thisRread, uniques, false);
1259 vector<pairFastqRead> friReads = getReads(ignorefi, ignoreri, thisFIread, thisRIread, iUniques, allowOne);
1261 //add in index info if provided
1262 vector<pairFastqRead> reads = frReads;
1263 if ((findex != "") || (rindex != "")) { reads = mergeReads(frReads, friReads, pairUniques); }
1265 for (int i = 0; i < reads.size(); i++) {
1266 fastqRead fread = reads[i].forward;
1267 fastqRead rread = reads[i].reverse;
1268 fastqRead firead = reads[i].findex;
1269 fastqRead riread = reads[i].rindex;
1271 if (m->debug) { m->mothurOut(toString(count) + '\t' + fread.name + '\t' + rread.name + '\n'); if (findex != "") { m->mothurOut(toString(count) + '\t' + firead.name + '\n'); } if (rindex != "") { m->mothurOut(toString(count) + '\t' + riread.name + '\n'); } }
1273 //if (checkReads(fread, rread, ffastq, rfastq)) {
1274 if (m->control_pressed) { for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close(); delete (it->second)[i]; } } for (int i = 0; i < files.size(); i++) { for(int j = 0; j < files[i].size(); j++) { if (files[i][j] != "") { m->mothurRemove(files[i][j]); } } } inForward.close(); inReverse.close(); if (findex != "") { infIndex.close(); } if (findex != "") { inrIndex.close(); } return files; }
1276 //if the reads are okay write to output files
1277 int process = count % processors;
1279 *(tempfiles[process][0]) << ">" << fread.name << endl << fread.sequence << endl;
1280 *(tempfiles[process][1]) << ">" << fread.name << endl;
1281 for (int i = 0; i < fread.scores.size(); i++) { *(tempfiles[process][1]) << fread.scores[i] << " "; }
1282 *(tempfiles[process][1]) << endl;
1283 *(tempfiles[process][2]) << ">" << rread.name << endl << rread.sequence << endl;
1284 *(tempfiles[process][3]) << ">" << rread.name << endl;
1285 for (int i = 0; i < rread.scores.size(); i++) { *(tempfiles[process][3]) << rread.scores[i] << " "; }
1286 *(tempfiles[process][3]) << endl;
1287 if (findex != "") { *(tempfiles[process][4]) << ">" << firead.name << endl << firead.sequence << endl; }
1288 if (rindex != "") { *(tempfiles[process][5]) << ">" << riread.name << endl << riread.sequence << endl; }
1293 if((count) % 10000 == 0){ m->mothurOutJustToScreen(toString(count)); m->mothurOutEndLine(); }
1298 if((count) % 10000 != 0){ m->mothurOut(toString(count)); m->mothurOutEndLine(); }
1300 if (uniques.size() != 0) {
1301 for (itUniques = uniques.begin(); itUniques != uniques.end(); itUniques++) {
1302 if (m->control_pressed) { break; }
1303 m->mothurOut("[WARNING]: did not find paired read for " + itUniques->first + ", ignoring.\n");
1305 for (map<string, pairFastqRead>:: iterator it = pairUniques.begin(); it != pairUniques.end(); it++) {
1306 if (m->control_pressed) { break; }
1307 m->mothurOut("[WARNING]: did not find paired read for " + (it->first).substr(1) + ", ignoring.\n");
1309 m->mothurOutEndLine();
1312 //close files, delete ofstreams
1313 for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close(); delete (it->second)[i]; } }
1316 if (findex != "") { infIndex.close(); }
1317 if (rindex != "") { inrIndex.close(); }
1321 catch(exception& e) {
1322 m->errorOut(e, "MakeContigsCommand", "readFastqFiles");
1326 //**********************************************************************************************************************
1327 vector< vector<string> > MakeContigsCommand::readFastaFiles(unsigned long int& count, string ffasta, string rfasta){
1329 vector< vector<string> > files;
1330 //maps processors number to file pointer
1331 map<int, vector<ofstream*> > tempfiles; //tempfiles[0] = forwardFasta, [1] = forwardQual, [2] = reverseFasta, [3] = reverseQual
1332 map<int, vector<ofstream*> >::iterator it;
1334 //create files to write to
1335 for (int i = 0; i < processors; i++) {
1336 vector<ofstream*> temp;
1337 ofstream* outFF = new ofstream; temp.push_back(outFF);
1338 ofstream* outFQ = new ofstream; temp.push_back(outFQ);
1339 ofstream* outRF = new ofstream; temp.push_back(outRF);
1340 ofstream* outRQ = new ofstream; temp.push_back(outRQ);
1341 tempfiles[i] = temp;
1343 vector<string> names;
1344 string thisOutputDir = outputDir;
1345 if (outputDir == "") { thisOutputDir = m->hasPath(ffasta); }
1346 string ffastafilename = thisOutputDir + m->getRootName(m->getSimpleName(ffasta)) + toString(i) + "ffastatemp";
1347 string rfastafilename = thisOutputDir + m->getRootName(m->getSimpleName(rfasta)) + toString(i) + "rfastatemp";
1348 string fqualfilename = "";
1349 if (fqualfile != "") { fqualfilename = thisOutputDir + m->getRootName(m->getSimpleName(fqualfile)) + toString(i) + "fqual.temp"; m->openOutputFile(fqualfilename, *outFQ); }
1350 string rqualfilename = "";
1351 if (rqualfile != "") { rqualfilename = thisOutputDir + m->getRootName(m->getSimpleName(rqualfile)) + toString(i) + "rqual.temp"; m->openOutputFile(rqualfilename, *outRQ); }
1352 string findexfilename = ""; string rindexfilename = "";
1353 names.push_back(ffastafilename); names.push_back(fqualfilename);
1354 names.push_back(rfastafilename); names.push_back(rqualfilename);
1355 names.push_back(findexfilename); names.push_back(rindexfilename);
1356 files.push_back(names);
1358 m->openOutputFile(ffastafilename, *outFF);
1359 m->openOutputFile(rfastafilename, *outRF);
1362 if (m->control_pressed) {
1363 //close files, delete ofstreams
1364 for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close(); delete (it->second)[i]; } }
1366 for (int i = 0; i < files.size(); i++) {
1367 for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); }
1371 ifstream inForwardFasta;
1372 m->openInputFile(ffasta, inForwardFasta);
1374 ifstream inReverseFasta;
1375 m->openInputFile(rfasta, inReverseFasta);
1377 ifstream inForwardQual, inReverseQual;
1378 if (fqualfile != "") { m->openInputFile(fqualfile, inForwardQual); m->openInputFile(rqualfile, inReverseQual); }
1381 map<string, fastqRead> uniques;
1382 map<string, fastqRead>::iterator itUniques;
1383 while ((!inForwardFasta.eof()) || (!inReverseFasta.eof())) {
1385 if (m->control_pressed) { for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close(); delete (it->second)[i]; } } for (int i = 0; i < files.size(); i++) { for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); } } inReverseFasta.close(); inForwardFasta.close(); if (fqualfile != "") { inReverseQual.close(); inReverseQual.close(); } return files; }
1387 //get a reads from forward and reverse fasta files
1388 bool ignoref, ignorer;
1389 fastqRead thisFread, thisRread;
1390 if (!inForwardFasta.eof()) {
1392 Sequence temp(inForwardFasta); m->gobble(inForwardFasta);
1393 thisFread.name = temp.getName();
1394 thisFread.sequence = temp.getUnaligned();
1395 }else { ignoref = true; }
1396 if (!inReverseFasta.eof()) {
1398 Sequence temp(inReverseFasta); m->gobble(inReverseFasta);
1399 thisRread.name = temp.getName();
1400 thisRread.sequence = temp.getUnaligned();
1401 }else { ignorer = true; }
1403 //get qual reads if given
1404 if (fqualfile != "") {
1405 if (!inForwardQual.eof() && !ignoref) {
1406 QualityScores temp(inForwardQual); m->gobble(inForwardQual);
1407 //if forward files dont match ignore read
1408 if (thisFread.name != temp.getName()) { ignoref = true; }
1409 else { thisFread.scores = temp.getQualityScores(); }
1410 }else { ignoref = true; }
1411 if (!inReverseQual.eof() && !ignorer) {
1412 QualityScores temp(inReverseQual); m->gobble(inReverseQual);
1413 //if reverse files dont match ignore read
1414 if (thisRread.name != temp.getName()) { ignorer = true; }
1415 else { thisRread.scores = temp.getQualityScores(); }
1416 }else { ignorer = true; }
1419 vector<pairFastqRead> reads = getReads(ignoref, ignorer, thisFread, thisRread, uniques, false);
1421 for (int i = 0; i < reads.size(); i++) {
1422 fastqRead fread = reads[i].forward;
1423 fastqRead rread = reads[i].reverse;
1425 if (m->debug) { m->mothurOut(toString(count) + '\t' + fread.name + '\t' + rread.name + '\n'); }
1427 // if (checkReads(fread, rread, ffasta, rfasta)) {
1428 if (m->control_pressed) { for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close(); delete (it->second)[i]; } } for (int i = 0; i < files.size(); i++) { for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); } } inReverseFasta.close(); inForwardFasta.close(); if (fqualfile != "") { inReverseQual.close(); inReverseQual.close(); } return files; }
1430 //if the reads are okay write to output files
1431 int process = count % processors;
1433 *(tempfiles[process][0]) << ">" << fread.name << endl << fread.sequence << endl;
1434 *(tempfiles[process][2]) << ">" << rread.name << endl << rread.sequence << endl;
1435 if (fqualfile != "") { //if you have quality info, print it
1436 *(tempfiles[process][1]) << ">" << fread.name << endl;
1437 for (int i = 0; i < fread.scores.size(); i++) { *(tempfiles[process][1]) << fread.scores[i] << " "; }
1438 *(tempfiles[process][1]) << endl;
1439 *(tempfiles[process][3]) << ">" << rread.name << endl;
1440 for (int i = 0; i < rread.scores.size(); i++) { *(tempfiles[process][3]) << rread.scores[i] << " "; }
1441 *(tempfiles[process][3]) << endl;
1446 if((count) % 10000 == 0){ m->mothurOut(toString(count)); m->mothurOutEndLine(); }
1451 if((count) % 10000 != 0){ m->mothurOut(toString(count)); m->mothurOutEndLine(); }
1453 if (uniques.size() != 0) {
1454 for (itUniques = uniques.begin(); itUniques != uniques.end(); itUniques++) {
1455 m->mothurOut("[WARNING]: did not find paired read for " + itUniques->first + ", ignoring.\n");
1457 m->mothurOutEndLine();
1460 //close files, delete ofstreams
1461 for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close(); delete (it->second)[i]; } }
1462 inReverseFasta.close();
1463 inForwardFasta.close();
1464 if (fqualfile != "") { inReverseQual.close(); inReverseQual.close(); }
1468 catch(exception& e) {
1469 m->errorOut(e, "MakeContigsCommand", "readFastaFiles");
1473 //**********************************************************************************************************************
1474 vector<pairFastqRead> MakeContigsCommand::getReads(bool ignoref, bool ignorer, fastqRead forward, fastqRead reverse, map<string, fastqRead>& uniques, bool allowOne){
1476 vector<pairFastqRead> reads;
1477 map<string, fastqRead>::iterator itUniques;
1479 if (!ignoref && !ignorer) {
1480 if (forward.name == reverse.name) {
1481 pairFastqRead temp(forward, reverse);
1482 reads.push_back(temp);
1485 //if no match are the names only different by 1 and 2?
1486 string tempFRead = forward.name.substr(0, forward.name.length()-1);
1487 string tempRRead = reverse.name.substr(0, reverse.name.length()-1);
1488 if (tempFRead == tempRRead) {
1489 if ((forward.name[forward.name.length()-1] == '1') && (reverse.name[reverse.name.length()-1] == '2')) {
1490 forward.name = tempFRead;
1491 reverse.name = tempRRead;
1492 pairFastqRead temp(forward, reverse);
1493 reads.push_back(temp);
1499 //look for forward pair
1500 itUniques = uniques.find(forward.name);
1501 if (itUniques != uniques.end()) { //we have the pair for this read
1502 pairFastqRead temp(forward, itUniques->second);
1503 reads.push_back(temp);
1504 uniques.erase(itUniques);
1505 }else { //save this read for later
1506 uniques[forward.name] = forward;
1509 //look for reverse pair
1510 itUniques = uniques.find(reverse.name);
1511 if (itUniques != uniques.end()) { //we have the pair for this read
1512 pairFastqRead temp(itUniques->second, reverse);
1513 reads.push_back(temp);
1514 uniques.erase(itUniques);
1515 }else { //save this read for later
1516 uniques[reverse.name] = reverse;
1521 }else if (!ignoref && ignorer) { //ignore reverse keep forward
1524 pairFastqRead temp(forward, dummy);
1525 reads.push_back(temp);
1527 //look for forward pair
1528 itUniques = uniques.find(forward.name);
1529 if (itUniques != uniques.end()) { //we have the pair for this read
1530 pairFastqRead temp(forward, itUniques->second);
1531 reads.push_back(temp);
1532 uniques.erase(itUniques);
1533 }else { //save this read for later
1534 uniques[forward.name] = forward;
1537 }else if (ignoref && !ignorer) { //ignore forward keep reverse
1540 pairFastqRead temp(dummy, reverse);
1541 reads.push_back(temp);
1543 //look for reverse pair
1544 itUniques = uniques.find(reverse.name);
1545 if (itUniques != uniques.end()) { //we have the pair for this read
1546 pairFastqRead temp(itUniques->second, reverse);
1547 reads.push_back(temp);
1548 uniques.erase(itUniques);
1549 }else { //save this read for later
1550 uniques[reverse.name] = reverse;
1553 }//else ignore both and do nothing
1557 catch(exception& e) {
1558 m->errorOut(e, "MakeContigsCommand", "readFastqFiles");
1562 //**********************************************************************************************************************
1563 //look through the reads from the forward and reverse files and try to find matching reads from index files.
1564 vector<pairFastqRead> MakeContigsCommand::mergeReads(vector<pairFastqRead> thisReads, vector<pairFastqRead> indexes, map<string, pairFastqRead>& uniques){
1566 vector<pairFastqRead> reads;
1567 map<string, pairFastqRead>::iterator itUniques;
1569 set<int> foundIndexes;
1570 for (int i = 0; i < thisReads.size(); i++) {
1572 for (int j = 0; j < indexes.size(); j++) {
1574 //incase only one index
1575 string indexName = indexes[j].forward.name;
1576 if (indexName == "") { indexName = indexes[j].reverse.name; }
1578 if (thisReads[i].forward.name == indexName){
1579 thisReads[i].findex = indexes[j].forward;
1580 thisReads[i].rindex = indexes[j].reverse;
1581 reads.push_back(thisReads[i]);
1583 foundIndexes.insert(j);
1588 //look for forward pair
1589 itUniques = uniques.find('i'+thisReads[i].forward.name);
1590 if (itUniques != uniques.end()) { //we have the pair for this read
1591 thisReads[i].findex = itUniques->second.forward;
1592 thisReads[i].rindex = itUniques->second.reverse;
1593 reads.push_back(thisReads[i]);
1594 uniques.erase(itUniques);
1595 }else { //save this read for later
1596 uniques['r'+thisReads[i].forward.name] = thisReads[i];
1601 if (foundIndexes.size() != indexes.size()) { //if we didnt match all the indexes look for them in uniques
1602 for (int j = 0; j < indexes.size(); j++) {
1603 if (foundIndexes.count(j) == 0) { //we didnt find this one
1604 //incase only one index
1605 string indexName = indexes[j].forward.name;
1606 if (indexName == "") { indexName = indexes[j].reverse.name; }
1608 //look for forward pair
1609 itUniques = uniques.find('r'+indexName);
1610 if (itUniques != uniques.end()) { //we have the pair for this read
1611 pairFastqRead temp(itUniques->second.forward, itUniques->second.reverse, indexes[j].forward, indexes[j].reverse);
1612 reads.push_back(temp);
1613 uniques.erase(itUniques);
1614 }else { //save this read for later
1615 uniques['i'+indexName] = indexes[j];
1624 catch(exception& e) {
1625 m->errorOut(e, "MakeContigsCommand", "mergeReads");
1629 //**********************************************************************************************************************
1630 fastqRead MakeContigsCommand::readFastq(ifstream& in, bool& ignore){
1636 //read sequence name
1637 string line = m->getline(in); m->gobble(in);
1638 vector<string> pieces = m->splitWhiteSpace(line);
1639 string name = ""; if (pieces.size() != 0) { name = pieces[0]; }
1640 if (name == "") { m->mothurOut("[WARNING]: Blank fasta name, ignoring read."); m->mothurOutEndLine(); ignore=true; }
1641 else if (name[0] != '@') { m->mothurOut("[WARNING]: reading " + name + " expected a name with @ as a leading character, ignoring read."); m->mothurOutEndLine(); ignore=true; }
1642 else { name = name.substr(1); }
1645 string sequence = m->getline(in); m->gobble(in);
1646 if (sequence == "") { m->mothurOut("[WARNING]: missing sequence for " + name + ", ignoring."); ignore=true; }
1648 //read sequence name
1649 line = m->getline(in); m->gobble(in);
1650 pieces = m->splitWhiteSpace(line);
1651 string name2 = ""; if (pieces.size() != 0) { name2 = pieces[0]; }
1652 if (name2 == "") { m->mothurOut("[WARNING]: expected a name with + as a leading character, ignoring."); ignore=true; }
1653 else if (name2[0] != '+') { m->mothurOut("[WARNING]: reading " + name2 + " expected a name with + as a leading character, ignoring."); ignore=true; }
1654 else { name2 = name2.substr(1); if (name2 == "") { name2 = name; } }
1656 //read quality scores
1657 string quality = m->getline(in); m->gobble(in);
1658 if (quality == "") { m->mothurOut("[WARNING]: missing quality for " + name2 + ", ignoring."); ignore=true; }
1660 //sanity check sequence length and number of quality scores match
1661 if (name2 != "") { if (name != name2) { m->mothurOut("[WARNING]: names do not match. read " + name + " for fasta and " + name2 + " for quality, ignoring."); ignore=true; } }
1662 if (quality.length() != sequence.length()) { m->mothurOut("[WARNING]: Lengths do not match for sequence " + name + ". Read " + toString(sequence.length()) + " characters for fasta and " + toString(quality.length()) + " characters for quality scores, ignoring read."); ignore=true; }
1664 vector<int> qualScores = convertQual(quality);
1668 read.sequence = sequence;
1669 read.scores = qualScores;
1671 if (m->debug) { m->mothurOut("[DEBUG]: " + read.name + " " + read.sequence + " " + quality + "\n"); }
1675 catch(exception& e) {
1676 m->errorOut(e, "MakeContigsCommand", "readFastq");
1680 /**********************************************************************************************************************
1681 bool MakeContigsCommand::checkReads(fastqRead& forward, fastqRead& reverse, string ffile, string rfile){
1685 //do sequence lengths match
1686 if (forward.sequence.length() != reverse.sequence.length()) {
1687 m->mothurOut("[WARNING]: For sequence " + forward.name + " I read a sequence of length " + toString(forward.sequence.length()) + " from " + ffile + ", but read a sequence of length " + toString(reverse.sequence.length()) + " from " + rfile + ", ignoring.\n");
1691 //do number of qual scores match
1692 if (forward.scores.size() != reverse.scores.size()) {
1693 m->mothurOut("[WARNING]: For sequence " + forward.name + " I read " + toString(forward.scores.size()) + " quality scores from " + ffile + ", but read " + toString(reverse.scores.size()) + " quality scores from " + rfile + ", ignoring.\n");
1699 catch(exception& e) {
1700 m->errorOut(e, "MakeContigsCommand", "checkReads");
1704 //***************************************************************************************************************
1705 //lines can be 2, 3, or 4 columns
1706 // forward.fastq reverse.fastq -> 2 column
1707 // groupName forward.fastq reverse.fastq -> 3 column
1708 // forward.fastq reverse.fastq forward.index.fastq reverse.index.fastq -> 4 column
1709 // forward.fastq reverse.fastq none reverse.index.fastq -> 4 column
1710 // forward.fastq reverse.fastq forward.index.fastq none -> 4 column
1711 vector< vector<string> > MakeContigsCommand::readFileNames(string filename){
1713 vector< vector<string> > files;
1714 string forward, reverse, findex, rindex;
1717 m->openInputFile(filename, in);
1721 if (m->control_pressed) { return files; }
1723 string line = m->getline(in); m->gobble(in);
1724 vector<string> pieces = m->splitWhiteSpace(line);
1727 if (pieces.size() == 2) {
1728 forward = pieces[0];
1729 reverse = pieces[1];
1733 }else if (pieces.size() == 3) {
1735 forward = pieces[1];
1736 reverse = pieces[2];
1739 createFileGroup = true;
1740 }else if (pieces.size() == 4) {
1741 forward = pieces[0];
1742 reverse = pieces[1];
1745 if ((findex == "none") || (findex == "NONE")){ findex = ""; }
1746 if ((rindex == "none") || (rindex == "NONE")){ rindex = ""; }
1748 m->mothurOut("[ERROR]: file lines can be 2, 3, or 4 columns. The forward fastq files in the first column and their matching reverse fastq files in the second column, or a groupName then forward fastq file and reverse fastq file, or forward fastq file then reverse fastq then forward index and reverse index file. If you only have one index file add 'none' for the other one. \n"); m->control_pressed = true;
1751 if (m->debug) { m->mothurOut("[DEBUG]: group = " + group + ", forward = " + forward + ", reverse = " + reverse + ", forwardIndex = " + findex + ", reverseIndex = " + rindex + ".\n"); }
1753 //check to make sure both are able to be opened
1755 int openForward = m->openInputFile(forward, in2, "noerror");
1757 //if you can't open it, try default location
1758 if (openForward == 1) {
1759 if (m->getDefaultPath() != "") { //default path is set
1760 string tryPath = m->getDefaultPath() + m->getSimpleName(forward);
1761 m->mothurOut("Unable to open " + forward + ". Trying default " + tryPath); m->mothurOutEndLine();
1763 openForward = m->openInputFile(tryPath, in3, "noerror");
1769 //if you can't open it, try output location
1770 if (openForward == 1) {
1771 if (m->getOutputDir() != "") { //default path is set
1772 string tryPath = m->getOutputDir() + m->getSimpleName(forward);
1773 m->mothurOut("Unable to open " + forward + ". Trying output directory " + tryPath); m->mothurOutEndLine();
1775 openForward = m->openInputFile(tryPath, in4, "noerror");
1781 if (openForward == 1) { //can't find it
1782 m->mothurOut("[WARNING]: can't find " + forward + ", ignoring pair.\n");
1783 }else{ in2.close(); }
1786 int openReverse = m->openInputFile(reverse, in3, "noerror");
1788 //if you can't open it, try default location
1789 if (openReverse == 1) {
1790 if (m->getDefaultPath() != "") { //default path is set
1791 string tryPath = m->getDefaultPath() + m->getSimpleName(reverse);
1792 m->mothurOut("Unable to open " + reverse + ". Trying default " + tryPath); m->mothurOutEndLine();
1794 openReverse = m->openInputFile(tryPath, in3, "noerror");
1800 //if you can't open it, try output location
1801 if (openReverse == 1) {
1802 if (m->getOutputDir() != "") { //default path is set
1803 string tryPath = m->getOutputDir() + m->getSimpleName(reverse);
1804 m->mothurOut("Unable to open " + reverse + ". Trying output directory " + tryPath); m->mothurOutEndLine();
1806 openReverse = m->openInputFile(tryPath, in4, "noerror");
1812 if (openReverse == 1) { //can't find it
1813 m->mothurOut("[WARNING]: can't find " + reverse + ", ignoring pair.\n");
1814 }else{ in3.close(); }
1819 openFindex = m->openInputFile(findex, in4, "noerror"); in4.close();
1821 //if you can't open it, try default location
1822 if (openFindex == 1) {
1823 if (m->getDefaultPath() != "") { //default path is set
1824 string tryPath = m->getDefaultPath() + m->getSimpleName(findex);
1825 m->mothurOut("Unable to open " + findex + ". Trying default " + tryPath); m->mothurOutEndLine();
1827 openFindex = m->openInputFile(tryPath, in5, "noerror");
1833 //if you can't open it, try output location
1834 if (openFindex == 1) {
1835 if (m->getOutputDir() != "") { //default path is set
1836 string tryPath = m->getOutputDir() + m->getSimpleName(findex);
1837 m->mothurOut("Unable to open " + findex + ". Trying output directory " + tryPath); m->mothurOutEndLine();
1839 openFindex = m->openInputFile(tryPath, in6, "noerror");
1845 if (openFindex == 1) { //can't find it
1846 m->mothurOut("[WARNING]: can't find " + findex + ", ignoring pair.\n");
1853 openRindex = m->openInputFile(rindex, in7, "noerror"); in7.close();
1855 //if you can't open it, try default location
1856 if (openRindex == 1) {
1857 if (m->getDefaultPath() != "") { //default path is set
1858 string tryPath = m->getDefaultPath() + m->getSimpleName(rindex);
1859 m->mothurOut("Unable to open " + rindex + ". Trying default " + tryPath); m->mothurOutEndLine();
1861 openRindex = m->openInputFile(tryPath, in8, "noerror");
1867 //if you can't open it, try output location
1868 if (openRindex == 1) {
1869 if (m->getOutputDir() != "") { //default path is set
1870 string tryPath = m->getOutputDir() + m->getSimpleName(rindex);
1871 m->mothurOut("Unable to open " + rindex + ". Trying output directory " + tryPath); m->mothurOutEndLine();
1873 openRindex = m->openInputFile(tryPath, in9, "noerror");
1879 if (openRindex == 1) { //can't find it
1880 m->mothurOut("[WARNING]: can't find " + rindex + ", ignoring pair.\n");
1885 if ((openForward != 1) && (openReverse != 1) && (openFindex != 1) && (openRindex != 1)) { //good pair
1886 file2Group[files.size()] = group;
1887 vector<string> pair;
1888 pair.push_back(forward);
1889 pair.push_back(reverse);
1890 pair.push_back(findex);
1891 pair.push_back(rindex);
1892 if (((findex != "") || (rindex != "")) && (oligosfile == "")) { m->mothurOut("[ERROR]: You need to provide an oligos file if you are going to use an index file.\n"); m->control_pressed = true; }
1893 files.push_back(pair);
1900 catch(exception& e) {
1901 m->errorOut(e, "MakeContigsCommand", "readFileNames");
1905 //***************************************************************************************************************
1906 //illumina data requires paired forward and reverse data
1907 //BARCODE atgcatgc atgcatgc groupName
1908 //PRIMER atgcatgc atgcatgc groupName
1909 //PRIMER atgcatgc atgcatgc
1910 bool MakeContigsCommand::getOligos(vector<vector<string> >& fastaFileNames, string rootname){
1913 m->openInputFile(oligosfile, in);
1917 string type, foligo, roligo, group;
1919 int indexPrimer = 0;
1920 int indexBarcode = 0;
1921 set<string> uniquePrimers;
1922 set<string> uniqueBarcodes;
1928 if (m->debug) { m->mothurOut("[DEBUG]: reading type - " + type + ".\n"); }
1931 while (!in.eof()) { char c = in.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there
1936 //make type case insensitive
1937 for(int i=0;i<type.length();i++){ type[i] = toupper(type[i]); }
1941 if (m->debug) { m->mothurOut("[DEBUG]: reading - " + foligo + ".\n"); }
1943 for(int i=0;i<foligo.length();i++){
1944 foligo[i] = toupper(foligo[i]);
1945 if(foligo[i] == 'U') { foligo[i] = 'T'; }
1948 if(type == "PRIMER"){
1953 for(int i=0;i<roligo.length();i++){
1954 roligo[i] = toupper(roligo[i]);
1955 if(roligo[i] == 'U') { roligo[i] = 'T'; }
1957 //roligo = reverseOligo(roligo);
1959 if (m->debug) { m->mothurOut("[DEBUG]: reading - " + roligo + ".\n"); }
1963 // get rest of line in case there is a primer name
1966 if (c == 10 || c == 13 || c == -1){ break; }
1967 else if (c == 32 || c == 9){;} //space or tab
1968 else { group += c; }
1971 oligosPair newPrimer(foligo, roligo);
1973 if (m->debug) { m->mothurOut("[DEBUG]: primer pair " + newPrimer.forward + " " + newPrimer.reverse + ", and group = " + group + ".\n"); }
1975 //check for repeat barcodes
1976 string tempPair = foligo+roligo;
1977 if (uniquePrimers.count(tempPair) != 0) { m->mothurOut("primer pair " + newPrimer.forward + " " + newPrimer.reverse + " is in your oligos file already."); m->mothurOutEndLine(); }
1978 else { uniquePrimers.insert(tempPair); }
1980 if (m->debug) { if (group != "") { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); }else{ m->mothurOut("[DEBUG]: no group for primer pair " + newPrimer.forward + " " + newPrimer.reverse + ".\n"); } }
1981 primers[indexPrimer]=newPrimer; indexPrimer++;
1982 primerNameVector.push_back(group);
1983 }else if(type == "BARCODE"){
1988 for(int i=0;i<roligo.length();i++){
1989 roligo[i] = toupper(roligo[i]);
1990 if(roligo[i] == 'U') { roligo[i] = 'T'; }
1992 //roligo = reverseOligo(roligo);
1994 oligosPair newPair(foligo, roligo);
1996 if ((foligo == "NONE") || (roligo == "NONE")) { if (!noneOk) { m->mothurOut("[ERROR]: barcodes must be paired unless you are using an index file.\n"); m->control_pressed = true; } }
2001 if (c == 10 || c == 13 || c == -1){ break; }
2002 else if (c == 32 || c == 9){;} //space or tab
2003 else { group += c; }
2006 if (m->debug) { m->mothurOut("[DEBUG]: barcode pair " + newPair.forward + " " + newPair.reverse + ", and group = " + group + ".\n"); }
2008 //check for repeat barcodes
2009 string tempPair = foligo+roligo;
2010 if (uniqueBarcodes.count(tempPair) != 0) { m->mothurOut("barcode pair " + newPair.forward + " " + newPair.reverse + " is in your oligos file already, disregarding."); m->mothurOutEndLine(); }
2011 else { uniqueBarcodes.insert(tempPair); }
2013 barcodes[indexBarcode]=newPair; indexBarcode++;
2014 barcodeNameVector.push_back(group);
2015 }else if(type == "LINKER"){
2016 linker.push_back(foligo);
2017 m->mothurOut("[WARNING]: make.contigs is not setup to remove linkers, ignoring.\n");
2018 }else if(type == "SPACER"){
2019 spacer.push_back(foligo);
2020 m->mothurOut("[WARNING]: make.contigs is not setup to remove spacers, ignoring.\n");
2022 else{ m->mothurOut("[WARNING]: " + type + " is not recognized as a valid type. Choices are primer, barcode, linker and spacer. Ignoring " + foligo + "."); m->mothurOutEndLine(); }
2028 if(barcodeNameVector.size() == 0 && primerNameVector[0] == ""){ allFiles = 0; }
2030 //add in potential combos
2031 if(barcodeNameVector.size() == 0){
2032 oligosPair temp("", "");
2034 barcodeNameVector.push_back("");
2037 if(primerNameVector.size() == 0){
2038 oligosPair temp("", "");
2040 primerNameVector.push_back("");
2043 fastaFileNames.resize(barcodeNameVector.size());
2044 for(int i=0;i<fastaFileNames.size();i++){
2045 fastaFileNames[i].assign(primerNameVector.size(), "");
2049 set<string> uniqueNames; //used to cleanup outputFileNames
2050 for(map<int, oligosPair>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
2051 for(map<int, oligosPair>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
2053 string primerName = primerNameVector[itPrimer->first];
2054 string barcodeName = barcodeNameVector[itBar->first];
2056 if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing
2058 string comboGroupName = "";
2059 string fastaFileName = "";
2060 string qualFileName = "";
2061 string nameFileName = "";
2062 string countFileName = "";
2064 if(primerName == ""){
2065 comboGroupName = barcodeNameVector[itBar->first];
2068 if(barcodeName == ""){
2069 comboGroupName = primerNameVector[itPrimer->first];
2072 comboGroupName = barcodeNameVector[itBar->first] + "." + primerNameVector[itPrimer->first];
2078 fastaFileName = rootname + comboGroupName + ".fasta";
2079 if (uniqueNames.count(fastaFileName) == 0) {
2080 outputNames.push_back(fastaFileName);
2081 outputTypes["fasta"].push_back(fastaFileName);
2082 uniqueNames.insert(fastaFileName);
2085 fastaFileNames[itBar->first][itPrimer->first] = fastaFileName;
2086 m->openOutputFile(fastaFileName, temp); temp.close();
2092 bool allBlank = true;
2093 for (int i = 0; i < barcodeNameVector.size(); i++) {
2094 if (barcodeNameVector[i] != "") {
2099 for (int i = 0; i < primerNameVector.size(); i++) {
2100 if (primerNameVector[i] != "") {
2107 m->mothurOut("[WARNING]: your oligos file does not contain any group names. mothur will not create a groupfile."); m->mothurOutEndLine();
2115 catch(exception& e) {
2116 m->errorOut(e, "MakeContigsCommand", "getOligos");
2120 //********************************************************************/
2121 string MakeContigsCommand::reverseOligo(string oligo){
2123 string reverse = "";
2125 for(int i=oligo.length()-1;i>=0;i--){
2127 if(oligo[i] == 'A') { reverse += 'T'; }
2128 else if(oligo[i] == 'T'){ reverse += 'A'; }
2129 else if(oligo[i] == 'U'){ reverse += 'A'; }
2131 else if(oligo[i] == 'G'){ reverse += 'C'; }
2132 else if(oligo[i] == 'C'){ reverse += 'G'; }
2134 else if(oligo[i] == 'R'){ reverse += 'Y'; }
2135 else if(oligo[i] == 'Y'){ reverse += 'R'; }
2137 else if(oligo[i] == 'M'){ reverse += 'K'; }
2138 else if(oligo[i] == 'K'){ reverse += 'M'; }
2140 else if(oligo[i] == 'W'){ reverse += 'W'; }
2141 else if(oligo[i] == 'S'){ reverse += 'S'; }
2143 else if(oligo[i] == 'B'){ reverse += 'V'; }
2144 else if(oligo[i] == 'V'){ reverse += 'B'; }
2146 else if(oligo[i] == 'D'){ reverse += 'H'; }
2147 else if(oligo[i] == 'H'){ reverse += 'D'; }
2149 else { reverse += 'N'; }
2155 catch(exception& e) {
2156 m->errorOut(e, "MakeContigsCommand", "reverseOligo");
2160 //**********************************************************************************************************************
2161 vector<int> MakeContigsCommand::convertQual(string qual) {
2163 vector<int> qualScores;
2164 bool negativeScores = false;
2166 for (int i = 0; i < qual.length(); i++) {
2169 temp = int(qual[i]);
2170 if (format == "illumina") {
2171 temp -= 64; //char '@'
2172 }else if (format == "illumina1.8+") {
2173 temp -= int('!'); //char '!'
2174 }else if (format == "solexa") {
2175 temp = int(convertTable[temp]); //convert to sanger
2176 temp -= int('!'); //char '!'
2178 temp -= int('!'); //char '!'
2181 if (temp < -5) { negativeScores = true; }
2182 qualScores.push_back(temp);
2185 if (negativeScores) { m->mothurOut("[ERROR]: finding negative quality scores, do you have the right format selected? http://en.wikipedia.org/wiki/FASTQ_format#Encoding \n"); m->control_pressed = true; }
2189 catch(exception& e) {
2190 m->errorOut(e, "MakeContigsCommand", "convertQual");
2195 //**********************************************************************************************************************