2 * parsefastaqcommand.cpp
5 * Created by westcott on 9/30/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "parsefastaqcommand.h"
11 #include "sequence.hpp"
13 //**********************************************************************************************************************
14 vector<string> ParseFastaQCommand::setParameters(){
16 CommandParameter pfastq("fastq", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(pfastq);
17 CommandParameter poligos("oligos", "InputTypes", "", "", "oligosGroup", "none", "none","",false,false); parameters.push_back(poligos);
18 CommandParameter pgroup("group", "InputTypes", "", "", "oligosGroup", "none", "none","",false,false); parameters.push_back(pgroup);
19 CommandParameter preorient("checkorient", "Boolean", "", "F", "", "", "","",false,false,true); parameters.push_back(preorient);
20 CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ppdiffs);
21 CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pbdiffs);
22 CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pldiffs);
23 CommandParameter psdiffs("sdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(psdiffs);
24 CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ptdiffs);
25 CommandParameter pfasta("fasta", "Boolean", "", "T", "", "", "","fasta",false,false); parameters.push_back(pfasta);
26 CommandParameter pqual("qfile", "Boolean", "", "T", "", "", "","qfile",false,false); parameters.push_back(pqual);
27 CommandParameter ppacbio("pacbio", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(ppacbio);
28 CommandParameter pformat("format", "Multiple", "sanger-illumina-solexa-illumina1.8+", "sanger", "", "", "","",false,false,true); parameters.push_back(pformat);
29 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
30 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
32 vector<string> myArray;
33 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
37 m->errorOut(e, "ParseFastaQCommand", "setParameters");
41 //**********************************************************************************************************************
42 string ParseFastaQCommand::getHelpString(){
44 string helpString = "";
45 helpString += "The fastq.info command reads a fastq file and creates a fasta and quality file.\n";
46 helpString += "The fastq.info command parameters are fastq, fasta, qfile, oligos, group and format; fastq is required.\n";
47 helpString += "The fastq.info command should be in the following format: fastq.info(fastaq=yourFastaQFile).\n";
48 helpString += "The oligos parameter allows you to provide an oligos file to split your fastq file into separate fastq files by barcode and primers. \n";
49 helpString += "The group parameter allows you to provide a group file to split your fastq file into separate fastq files by group. \n";
50 helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the reads. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n";
51 helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";
52 helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
53 helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n";
54 helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n";
55 helpString += "The checkorient parameter will check look for the reverse compliment of the barcode or primer in the sequence. If found the sequence is flipped. The default is false.\n";
56 helpString += "The format parameter is used to indicate whether your sequences are sanger, solexa, illumina1.8+ or illumina, default=sanger.\n";
57 helpString += "The fasta parameter allows you to indicate whether you want a fasta file generated. Default=T.\n";
58 helpString += "The qfile parameter allows you to indicate whether you want a quality file generated. Default=T.\n";
59 helpString += "The pacbio parameter allows you to indicate .... When set to true, quality scores of 0 will results in a corresponding base of N. Default=F.\n";
60 helpString += "Example fastq.info(fastaq=test.fastaq).\n";
61 helpString += "Note: No spaces between parameter labels (i.e. fastq), '=' and yourFastQFile.\n";
65 m->errorOut(e, "ParseFastaQCommand", "getHelpString");
69 //**********************************************************************************************************************
70 string ParseFastaQCommand::getOutputPattern(string type) {
74 if (type == "fasta") { pattern = "[filename],fasta"; }
75 else if (type == "qfile") { pattern = "[filename],qual"; }
76 else if (type == "fastq") { pattern = "[filename],[group],fastq"; }
77 else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
82 m->errorOut(e, "ParseFastaQCommand", "getOutputPattern");
86 //**********************************************************************************************************************
87 ParseFastaQCommand::ParseFastaQCommand(){
89 abort = true; calledHelp = true;
91 vector<string> tempOutNames;
92 outputTypes["fasta"] = tempOutNames;
93 outputTypes["qfile"] = tempOutNames;
94 outputTypes["fastq"] = tempOutNames;
97 m->errorOut(e, "ParseFastaQCommand", "ParseFastaQCommand");
101 //**********************************************************************************************************************
102 ParseFastaQCommand::ParseFastaQCommand(string option){
104 abort = false; calledHelp = false;
107 if(option == "help") { help(); abort = true; calledHelp = true; }
108 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
111 vector<string> myArray = setParameters();
113 OptionParser parser(option);
114 map<string,string> parameters = parser.getParameters();
116 ValidParameters validParameter;
117 map<string,string>::iterator it;
119 //check to make sure all parameters are valid for command
120 for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
121 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
124 //initialize outputTypes
125 vector<string> tempOutNames;
126 outputTypes["fasta"] = tempOutNames;
127 outputTypes["qfile"] = tempOutNames;
128 outputTypes["fastq"] = tempOutNames;
130 //if the user changes the input directory command factory will send this info to us in the output parameter
131 string inputDir = validParameter.validFile(parameters, "inputdir", false);
132 if (inputDir == "not found"){ inputDir = ""; }
135 it = parameters.find("fastq");
136 //user has given a template file
137 if(it != parameters.end()){
138 path = m->hasPath(it->second);
139 //if the user has not given a path then, add inputdir. else leave path alone.
140 if (path == "") { parameters["fastq"] = inputDir + it->second; }
143 it = parameters.find("oligos");
144 //user has given a template file
145 if(it != parameters.end()){
146 path = m->hasPath(it->second);
147 //if the user has not given a path then, add inputdir. else leave path alone.
148 if (path == "") { parameters["oligos"] = inputDir + it->second; }
151 it = parameters.find("group");
152 //user has given a template file
153 if(it != parameters.end()){
154 path = m->hasPath(it->second);
155 //if the user has not given a path then, add inputdir. else leave path alone.
156 if (path == "") { parameters["group"] = inputDir + it->second; }
160 //check for required parameters
161 fastaQFile = validParameter.validFile(parameters, "fastq", true);
162 if (fastaQFile == "not found") { m->mothurOut("fastq is a required parameter for the fastq.info command."); m->mothurOutEndLine(); abort = true; }
163 else if (fastaQFile == "not open") { fastaQFile = ""; abort = true; }
165 oligosfile = validParameter.validFile(parameters, "oligos", true);
166 if (oligosfile == "not found") { oligosfile = ""; }
167 else if (oligosfile == "not open") { oligosfile = ""; abort = true; }
168 else { m->setOligosFile(oligosfile); split = 2; }
170 groupfile = validParameter.validFile(parameters, "group", true);
171 if (groupfile == "not found") { groupfile = ""; }
172 else if (groupfile == "not open") { groupfile = ""; abort = true; }
173 else { m->setGroupFile(groupfile); split = 2; }
175 if ((groupfile != "") && (oligosfile != "")) { m->mothurOut("You must enter ONLY ONE of the following: oligos or group."); m->mothurOutEndLine(); abort = true; }
177 //if the user changes the output directory command factory will send this info to us in the output parameter
178 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = m->hasPath(fastaQFile); }
181 temp = validParameter.validFile(parameters, "fasta", false); if(temp == "not found"){ temp = "T"; }
182 fasta = m->isTrue(temp);
184 temp = validParameter.validFile(parameters, "qfile", false); if(temp == "not found"){ temp = "T"; }
185 qual = m->isTrue(temp);
187 temp = validParameter.validFile(parameters, "pacbio", false); if(temp == "not found"){ temp = "F"; }
188 pacbio = m->isTrue(temp);
190 temp = validParameter.validFile(parameters, "bdiffs", false); if (temp == "not found") { temp = "0"; }
191 m->mothurConvert(temp, bdiffs);
193 temp = validParameter.validFile(parameters, "pdiffs", false); if (temp == "not found") { temp = "0"; }
194 m->mothurConvert(temp, pdiffs);
196 temp = validParameter.validFile(parameters, "ldiffs", false); if (temp == "not found") { temp = "0"; }
197 m->mothurConvert(temp, ldiffs);
199 temp = validParameter.validFile(parameters, "sdiffs", false); if (temp == "not found") { temp = "0"; }
200 m->mothurConvert(temp, sdiffs);
202 temp = validParameter.validFile(parameters, "tdiffs", false); if (temp == "not found") { int tempTotal = pdiffs + bdiffs + ldiffs + sdiffs; temp = toString(tempTotal); }
203 m->mothurConvert(temp, tdiffs);
205 if(tdiffs == 0){ tdiffs = bdiffs + pdiffs + ldiffs + sdiffs; }
208 format = validParameter.validFile(parameters, "format", false); if (format == "not found"){ format = "sanger"; }
210 if ((format != "sanger") && (format != "illumina") && (format != "illumina1.8+") && (format != "solexa")) {
211 m->mothurOut(format + " is not a valid format. Your format choices are sanger, solexa, illumina1.8+ and illumina, aborting." ); m->mothurOutEndLine();
215 if ((!fasta) && (!qual)) { m->mothurOut("[ERROR]: no outputs selected. Aborting."); m->mothurOutEndLine(); abort=true; }
217 temp = validParameter.validFile(parameters, "checkorient", false); if (temp == "not found") { temp = "F"; }
218 reorient = m->isTrue(temp);
222 catch(exception& e) {
223 m->errorOut(e, "ParseFastaQCommand", "ParseFastaQCommand");
227 //**********************************************************************************************************************
229 int ParseFastaQCommand::execute(){
231 if (abort == true) { if (calledHelp) { return 0; } return 2; }
234 map<string, string> variables;
235 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaQFile));
236 string fastaFile = getOutputFileName("fasta",variables);
237 string qualFile = getOutputFileName("qfile",variables);
238 ofstream outFasta, outQual;
240 if (fasta) { m->openOutputFile(fastaFile, outFasta); outputNames.push_back(fastaFile); outputTypes["fasta"].push_back(fastaFile); }
241 if (qual) { m->openOutputFile(qualFile, outQual); outputNames.push_back(qualFile); outputTypes["qfile"].push_back(qualFile); }
243 TrimOligos* trimOligos = NULL; TrimOligos* rtrimOligos = NULL;
244 pairedOligos = false; numBarcodes = 0; numPrimers= 0; numLinkers= 0; numSpacers = 0; numRPrimers = 0;
245 if (oligosfile != "") {
246 readOligos(oligosfile);
247 //find group read belongs to
248 if (pairedOligos) { trimOligos = new TrimOligos(pdiffs, bdiffs, 0, 0, oligos.getPairedPrimers(), oligos.getPairedBarcodes()); numBarcodes = oligos.getPairedBarcodes().size(); numPrimers = oligos.getPairedPrimers().size(); }
249 else { trimOligos = new TrimOligos(pdiffs, bdiffs, ldiffs, sdiffs, oligos.getPrimers(), oligos.getBarcodes(), oligos.getReversePrimers(), oligos.getLinkers(), oligos.getSpacers()); numPrimers = oligos.getPrimers().size(); numBarcodes = oligos.getBarcodes().size(); }
252 rtrimOligos = new TrimOligos(pdiffs, bdiffs, 0, 0, oligos.getReorientedPairedPrimers(), oligos.getReorientedPairedBarcodes()); numBarcodes = oligos.getReorientedPairedBarcodes().size();
256 else if (groupfile != "") { readGroup(groupfile); }
259 m->openInputFile(fastaQFile, in);
261 //fill convert table - goes from solexa to sanger. Used fq_all2std.pl as a reference.
262 for (int i = -64; i < 65; i++) {
263 char temp = (char) ((int)(33 + 10*log(1+pow(10,(i/10.0)))/log(10)+0.499));
264 convertTable.push_back(temp);
271 if (m->control_pressed) { break; }
274 fastqRead2 thisRead = readFastq(in, ignore);
277 vector<int> qualScores;
279 qualScores = convertQual(thisRead.quality);
280 outQual << ">" << thisRead.seq.getName() << endl;
281 for (int i = 0; i < qualScores.size(); i++) { outQual << qualScores[i] << " "; }
285 if (m->control_pressed) { break; }
288 if (!qual) { qualScores = convertQual(thisRead.quality); } //convert if not done
289 string sequence = thisRead.seq.getAligned();
290 for (int i = 0; i < qualScores.size(); i++) {
291 if (qualScores[i] == 0){ sequence[i] = 'N'; }
293 thisRead.seq.setAligned(sequence);
296 //print sequence info to files
297 if (fasta) { thisRead.seq.printSequence(outFasta); }
300 int barcodeIndex, primerIndex, trashCodeLength;
301 if (oligosfile != "") { trashCodeLength = findGroup(thisRead, barcodeIndex, primerIndex, trimOligos, rtrimOligos, numBarcodes, numPrimers); }
302 else if (groupfile != "") { trashCodeLength = findGroup(thisRead, barcodeIndex, primerIndex, "groupMode"); }
303 else { m->mothurOut("[ERROR]: uh oh, we shouldn't be here...\n"); }
305 if(trashCodeLength == 0){
307 m->openOutputFileAppend(fastqFileNames[barcodeIndex][primerIndex], out);
308 out << thisRead.wholeRead;
312 m->openOutputFileAppend(noMatchFile, out);
313 out << thisRead.wholeRead;
318 if((count+1) % 10000 == 0){ m->mothurOut(toString(count+1)); m->mothurOutEndLine(); }
324 if (fasta) { outFasta.close(); }
325 if (qual) { outQual.close(); }
328 if (!m->control_pressed) { if((count) % 10000 != 0){ m->mothurOut(toString(count)); m->mothurOutEndLine(); } }
332 if (groupfile != "") { delete groupMap; }
333 else if (oligosfile != "") { delete trimOligos; if (reorient) { delete rtrimOligos; } }
335 map<string, string>::iterator it;
336 set<string> namesToRemove;
337 for(int i=0;i<fastqFileNames.size();i++){
338 for(int j=0;j<fastqFileNames[0].size();j++){
339 if (fastqFileNames[i][j] != "") {
340 if (namesToRemove.count(fastqFileNames[i][j]) == 0) {
341 if(m->isBlank(fastqFileNames[i][j])){
342 m->mothurRemove(fastqFileNames[i][j]);
343 namesToRemove.insert(fastqFileNames[i][j]);
350 //remove names for outputFileNames, just cleans up the output
351 for(int i = 0; i < outputNames.size(); i++) {
352 if (namesToRemove.count(outputNames[i]) != 0) {
353 outputNames.erase(outputNames.begin()+i);
357 if(m->isBlank(noMatchFile)){ m->mothurRemove(noMatchFile); }
358 else { outputNames.push_back(noMatchFile); outputTypes["fastq"].push_back(noMatchFile); }
361 if (m->control_pressed) { outputTypes.clear(); outputNames.clear(); m->mothurRemove(fastaFile); m->mothurRemove(qualFile); return 0; }
363 //set fasta file as new current fastafile
365 itTypes = outputTypes.find("fasta");
366 if (itTypes != outputTypes.end()) {
367 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
370 itTypes = outputTypes.find("qfile");
371 if (itTypes != outputTypes.end()) {
372 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); }
375 m->mothurOutEndLine();
376 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
377 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
378 m->mothurOutEndLine();
382 catch(exception& e) {
383 m->errorOut(e, "ParseFastaQCommand", "execute");
387 //**********************************************************************************************************************
388 fastqRead2 ParseFastaQCommand::readFastq(ifstream& in, bool& ignore){
391 string wholeRead = "";
394 string line = m->getline(in); m->gobble(in); if (split > 1) { wholeRead += line + "\n"; }
395 vector<string> pieces = m->splitWhiteSpace(line);
396 string name = ""; if (pieces.size() != 0) { name = pieces[0]; }
397 if (name == "") { m->mothurOut("[WARNING]: Blank fasta name, ignoring read."); m->mothurOutEndLine(); ignore=true; }
398 else if (name[0] != '@') { m->mothurOut("[WARNING]: reading " + name + " expected a name with @ as a leading character, ignoring read."); m->mothurOutEndLine(); ignore=true; }
399 else { name = name.substr(1); }
402 string sequence = m->getline(in); m->gobble(in); if (split > 1) { wholeRead += sequence + "\n"; }
403 if (sequence == "") { m->mothurOut("[WARNING]: missing sequence for " + name + ", ignoring."); ignore=true; }
406 line = m->getline(in); m->gobble(in); if (split > 1) { wholeRead += line + "\n"; }
407 pieces = m->splitWhiteSpace(line);
408 string name2 = ""; if (pieces.size() != 0) { name2 = pieces[0]; }
409 if (name2 == "") { m->mothurOut("[WARNING]: expected a name with + as a leading character, ignoring."); ignore=true; }
410 else if (name2[0] != '+') { m->mothurOut("[WARNING]: reading " + name2 + " expected a name with + as a leading character, ignoring."); ignore=true; }
411 else { name2 = name2.substr(1); if (name2 == "") { name2 = name; } }
414 //read quality scores
415 string quality = m->getline(in); m->gobble(in); if (split > 1) { wholeRead += quality + "\n"; }
416 if (quality == "") { m->mothurOut("[WARNING]: missing quality for " + name2 + ", ignoring."); ignore=true; }
418 //sanity check sequence length and number of quality scores match
419 if (name2 != "") { if (name != name2) { m->mothurOut("[WARNING]: names do not match. read " + name + " for fasta and " + name2 + " for quality, ignoring."); ignore=true; } }
420 if (quality.length() != sequence.length()) { m->mothurOut("[WARNING]: Lengths do not match for sequence " + name + ". Read " + toString(sequence.length()) + " characters for fasta and " + toString(quality.length()) + " characters for quality scores, ignoring read."); ignore=true; }
423 Sequence seq(name, sequence);
424 fastqRead2 read(seq, quality, wholeRead);
426 if (m->debug) { m->mothurOut("[DEBUG]: " + read.seq.getName() + " " + read.seq.getAligned() + " " + quality + "\n"); }
430 catch(exception& e) {
431 m->errorOut(e, "ParseFastaQCommand", "readFastq");
436 //**********************************************************************************************************************
437 vector<int> ParseFastaQCommand::convertQual(string qual) {
439 vector<int> qualScores;
441 bool negativeScores = false;
443 for (int i = 0; i < qual.length(); i++) {
447 if (format == "illumina") {
448 temp -= 64; //char '@'
449 }else if (format == "illumina1.8+") {
450 temp -= int('!'); //char '!'
451 }else if (format == "solexa") {
452 temp = int(convertTable[temp]); //convert to sanger
453 temp -= int('!'); //char '!'
455 temp -= int('!'); //char '!'
457 if (temp < -5) { negativeScores = true; }
458 qualScores.push_back(temp);
461 if (negativeScores) { m->mothurOut("[ERROR]: finding negative quality scores, do you have the right format selected? http://en.wikipedia.org/wiki/FASTQ_format#Encoding \n"); m->control_pressed = true; }
465 catch(exception& e) {
466 m->errorOut(e, "ParseFastaQCommand", "convertQual");
470 //**********************************************************************************************************************
471 int ParseFastaQCommand::findGroup(fastqRead2 thisRead, int& barcode, int& primer, TrimOligos*& trimOligos, TrimOligos*& rtrimOligos, int numBarcodes, int numPrimers) {
474 string trashCode = "";
475 int currentSeqsDiffs = 0;
477 Sequence currSeq(thisRead.seq.getName(), thisRead.seq.getAligned());
478 QualityScores currQual; currQual.setScores(convertQual(thisRead.quality));
481 Sequence savedSeq(currSeq.getName(), currSeq.getAligned());
482 QualityScores savedQual(currQual.getName(), currQual.getScores());
485 success = trimOligos->stripLinker(currSeq, currQual);
486 if(success > ldiffs) { trashCode += 'k'; }
487 else{ currentSeqsDiffs += success; }
491 if(numBarcodes != 0){
492 success = trimOligos->stripBarcode(currSeq, currQual, barcode);
493 if(success > bdiffs) { trashCode += 'b'; }
494 else{ currentSeqsDiffs += success; }
498 success = trimOligos->stripSpacer(currSeq, currQual);
499 if(success > sdiffs) { trashCode += 's'; }
500 else{ currentSeqsDiffs += success; }
505 success = trimOligos->stripForward(currSeq, currQual, primer, true);
506 if(success > pdiffs) { trashCode += 'f'; }
507 else{ currentSeqsDiffs += success; }
510 if (currentSeqsDiffs > tdiffs) { trashCode += 't'; }
512 if(numRPrimers != 0){
513 success = trimOligos->stripReverse(currSeq, currQual);
514 if(!success) { trashCode += 'r'; }
517 if (reorient && (trashCode != "")) { //if you failed and want to check the reverse
519 string thisTrashCode = "";
520 int thisCurrentSeqsDiffs = 0;
522 int thisBarcodeIndex = 0;
523 int thisPrimerIndex = 0;
524 //cout << currSeq.getName() << '\t' << savedSeq.getUnaligned() << endl;
525 if(numBarcodes != 0){
526 thisSuccess = rtrimOligos->stripBarcode(savedSeq, savedQual, thisBarcodeIndex);
527 if(thisSuccess > bdiffs) { thisTrashCode += "b"; }
528 else{ thisCurrentSeqsDiffs += thisSuccess; }
530 //cout << currSeq.getName() << '\t' << savedSeq.getUnaligned() << endl;
532 thisSuccess = rtrimOligos->stripForward(savedSeq, savedQual, thisPrimerIndex, true);
533 if(thisSuccess > pdiffs) { thisTrashCode += "f"; }
534 else{ thisCurrentSeqsDiffs += thisSuccess; }
537 if (thisCurrentSeqsDiffs > tdiffs) { thisTrashCode += 't'; }
539 if (thisTrashCode == "") {
540 trashCode = thisTrashCode;
541 success = thisSuccess;
542 currentSeqsDiffs = thisCurrentSeqsDiffs;
543 barcode = thisBarcodeIndex;
544 primer = thisPrimerIndex;
545 savedSeq.reverseComplement();
546 currSeq.setAligned(savedSeq.getAligned());
547 savedQual.flipQScores();
548 currQual.setScores(savedQual.getScores());
549 }else { trashCode += "(" + thisTrashCode + ")"; }
552 if (trashCode.length() == 0) { //is this sequence in the ignore group
553 string thisGroup = oligos.getGroupName(barcode, primer);
555 int pos = thisGroup.find("ignore");
556 if (pos != string::npos) { trashCode += "i"; }
560 return trashCode.length();
562 catch(exception& e) {
563 m->errorOut(e, "ParseFastaQCommand", "findGroup");
567 //**********************************************************************************************************************
568 int ParseFastaQCommand::findGroup(fastqRead2 thisRead, int& barcode, int& primer, string groupMode) {
570 string trashCode = "";
573 string group = groupMap->getGroup(thisRead.seq.getName());
574 if (group == "not found") { trashCode += "g"; } //scrap for group
576 return trashCode.length();
578 catch(exception& e) {
579 m->errorOut(e, "ParseFastaQCommand", "findGroup");
583 //***************************************************************************************************************
585 bool ParseFastaQCommand::readOligos(string oligoFile){
587 bool allBlank = false;
588 oligos.read(oligosfile);
590 if (m->control_pressed) { return false; } //error in reading oligos
592 if (oligos.hasPairedBarcodes()) {
594 numPrimers = oligos.getPairedPrimers().size();
595 numBarcodes = oligos.getPairedBarcodes().size();
597 pairedOligos = false;
598 numPrimers = oligos.getPrimers().size();
599 numBarcodes = oligos.getBarcodes().size();
602 numLinkers = oligos.getLinkers().size();
603 numSpacers = oligos.getSpacers().size();
604 numRPrimers = oligos.getReversePrimers().size();
606 vector<string> groupNames = oligos.getGroupNames();
607 if (groupNames.size() == 0) { allBlank = true; }
610 fastqFileNames.resize(oligos.getBarcodeNames().size());
611 for(int i=0;i<fastqFileNames.size();i++){
612 for(int j=0;j<oligos.getPrimerNames().size();j++){ fastqFileNames[i].push_back(""); }
615 set<string> uniqueNames; //used to cleanup outputFileNames
617 map<int, oligosPair> barcodes = oligos.getPairedBarcodes();
618 map<int, oligosPair> primers = oligos.getPairedPrimers();
619 for(map<int, oligosPair>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
620 for(map<int, oligosPair>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
622 string primerName = oligos.getPrimerName(itPrimer->first);
623 string barcodeName = oligos.getBarcodeName(itBar->first);
625 if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing
626 else if ((primerName == "") && (barcodeName == "")) { } //do nothing
628 string comboGroupName = "";
629 string fastaFileName = "";
630 string qualFileName = "";
631 string nameFileName = "";
632 string countFileName = "";
634 if(primerName == ""){
635 comboGroupName = barcodeName;
637 if(barcodeName == ""){
638 comboGroupName = primerName;
641 comboGroupName = barcodeName + "." + primerName;
646 map<string, string> variables;
647 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaQFile));
648 variables["[group]"] = comboGroupName;
649 string fastqFileName = getOutputFileName("fastq", variables);
650 if (uniqueNames.count(fastqFileName) == 0) {
651 outputNames.push_back(fastqFileName);
652 outputTypes["fastq"].push_back(fastqFileName);
653 uniqueNames.insert(fastqFileName);
656 fastqFileNames[itBar->first][itPrimer->first] = fastqFileName;
657 m->openOutputFile(fastqFileName, temp); temp.close();
662 map<string, int> barcodes = oligos.getBarcodes() ;
663 map<string, int> primers = oligos.getPrimers();
664 for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
665 for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
667 string primerName = oligos.getPrimerName(itPrimer->second);
668 string barcodeName = oligos.getBarcodeName(itBar->second);
670 if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing
671 else if ((primerName == "") && (barcodeName == "")) { } //do nothing
673 string comboGroupName = "";
674 string fastaFileName = "";
675 string qualFileName = "";
676 string nameFileName = "";
677 string countFileName = "";
679 if(primerName == ""){
680 comboGroupName = barcodeName;
682 if(barcodeName == ""){
683 comboGroupName = primerName;
686 comboGroupName = barcodeName + "." + primerName;
691 map<string, string> variables;
692 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaQFile));
693 variables["[group]"] = comboGroupName;
694 string fastqFileName = getOutputFileName("fastq", variables);
695 if (uniqueNames.count(fastqFileName) == 0) {
696 outputNames.push_back(fastqFileName);
697 outputTypes["fastq"].push_back(fastqFileName);
698 uniqueNames.insert(fastqFileName);
701 fastqFileNames[itBar->second][itPrimer->second] = fastqFileName;
702 m->openOutputFile(fastqFileName, temp); temp.close();
709 m->mothurOut("[WARNING]: your oligos file does not contain any group names. mothur will not create a groupfile."); m->mothurOutEndLine();
714 map<string, string> variables;
715 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaQFile));
716 variables["[group]"] = "scrap";
717 noMatchFile = getOutputFileName("fastq", variables);
718 m->openOutputFile(noMatchFile, temp); temp.close();
723 catch(exception& e) {
724 m->errorOut(e, "ParseFastaQCommand", "getOligos");
728 //***************************************************************************************************************
729 bool ParseFastaQCommand::readGroup(string groupfile){
731 fastqFileNames.clear();
733 groupMap = new GroupMap();
734 groupMap->readMap(groupfile);
736 //like barcodeNameVector - no primer names
737 vector<string> groups = groupMap->getNamesOfGroups();
739 fastqFileNames.resize(groups.size());
740 for (int i = 0; i < fastqFileNames.size(); i++) {
741 for (int j = 0; j < 1; j++) {
743 map<string, string> variables;
744 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaQFile));
745 variables["[group]"] = groups[i];
746 string thisFilename = getOutputFileName("fastq",variables);
747 outputNames.push_back(thisFilename);
748 outputTypes["fastq"].push_back(thisFilename);
751 m->openOutputFileBinary(thisFilename, temp); temp.close();
752 fastqFileNames[i].push_back(thisFilename);
756 map<string, string> variables;
757 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaQFile));
758 variables["[group]"] = "scrap";
759 noMatchFile = getOutputFileName("fastq",variables);
760 m->mothurRemove(noMatchFile);
765 catch(exception& e) {
766 m->errorOut(e, "ParseFastaQCommand", "readGroup");
770 //**********************************************************************************************************************