2 * parsefastaqcommand.cpp
5 * Created by westcott on 9/30/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "parsefastaqcommand.h"
11 #include "sequence.hpp"
13 //**********************************************************************************************************************
14 vector<string> ParseFastaQCommand::setParameters(){
16 CommandParameter pfastq("fastq", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(pfastq);
17 CommandParameter poligos("oligos", "InputTypes", "", "", "oligosGroup", "none", "none","",false,false); parameters.push_back(poligos);
18 CommandParameter pgroup("group", "InputTypes", "", "", "oligosGroup", "none", "none","",false,false); parameters.push_back(pgroup);
19 CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ppdiffs);
20 CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pbdiffs);
21 CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pldiffs);
22 CommandParameter psdiffs("sdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(psdiffs);
23 CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ptdiffs);
24 CommandParameter pfasta("fasta", "Boolean", "", "T", "", "", "","fasta",false,false); parameters.push_back(pfasta);
25 CommandParameter pqual("qfile", "Boolean", "", "T", "", "", "","qfile",false,false); parameters.push_back(pqual);
26 CommandParameter ppacbio("pacbio", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(ppacbio);
27 CommandParameter pformat("format", "Multiple", "sanger-illumina-solexa-illumina1.8+", "sanger", "", "", "","",false,false,true); parameters.push_back(pformat);
28 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
29 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
31 vector<string> myArray;
32 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
36 m->errorOut(e, "ParseFastaQCommand", "setParameters");
40 //**********************************************************************************************************************
41 string ParseFastaQCommand::getHelpString(){
43 string helpString = "";
44 helpString += "The fastq.info command reads a fastq file and creates a fasta and quality file.\n";
45 helpString += "The fastq.info command parameters are fastq, fasta, qfile, oligos, group and format; fastq is required.\n";
46 helpString += "The fastq.info command should be in the following format: fastq.info(fastaq=yourFastaQFile).\n";
47 helpString += "The oligos parameter allows you to provide an oligos file to split your fastq file into separate fastq files by barcode and primers. \n";
48 helpString += "The group parameter allows you to provide a group file to split your fastq file into separate fastq files by group. \n";
49 helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the reads. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n";
50 helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";
51 helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
52 helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n";
53 helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n";
54 helpString += "The format parameter is used to indicate whether your sequences are sanger, solexa, illumina1.8+ or illumina, default=sanger.\n";
55 helpString += "The fasta parameter allows you to indicate whether you want a fasta file generated. Default=T.\n";
56 helpString += "The qfile parameter allows you to indicate whether you want a quality file generated. Default=T.\n";
57 helpString += "The pacbio parameter allows you to indicate .... When set to true, quality scores of 0 will results in a corresponding base of N. Default=F.\n";
58 helpString += "Example fastq.info(fastaq=test.fastaq).\n";
59 helpString += "Note: No spaces between parameter labels (i.e. fastq), '=' and yourFastQFile.\n";
63 m->errorOut(e, "ParseFastaQCommand", "getHelpString");
67 //**********************************************************************************************************************
68 string ParseFastaQCommand::getOutputPattern(string type) {
72 if (type == "fasta") { pattern = "[filename],fasta"; }
73 else if (type == "qfile") { pattern = "[filename],qual"; }
74 else if (type == "fastq") { pattern = "[filename],[group],fastq"; }
75 else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
80 m->errorOut(e, "ParseFastaQCommand", "getOutputPattern");
84 //**********************************************************************************************************************
85 ParseFastaQCommand::ParseFastaQCommand(){
87 abort = true; calledHelp = true;
89 vector<string> tempOutNames;
90 outputTypes["fasta"] = tempOutNames;
91 outputTypes["qfile"] = tempOutNames;
92 outputTypes["fastq"] = tempOutNames;
95 m->errorOut(e, "ParseFastaQCommand", "ParseFastaQCommand");
99 //**********************************************************************************************************************
100 ParseFastaQCommand::ParseFastaQCommand(string option){
102 abort = false; calledHelp = false;
105 if(option == "help") { help(); abort = true; calledHelp = true; }
106 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
109 vector<string> myArray = setParameters();
111 OptionParser parser(option);
112 map<string,string> parameters = parser.getParameters();
114 ValidParameters validParameter;
115 map<string,string>::iterator it;
117 //check to make sure all parameters are valid for command
118 for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
119 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
122 //initialize outputTypes
123 vector<string> tempOutNames;
124 outputTypes["fasta"] = tempOutNames;
125 outputTypes["qfile"] = tempOutNames;
126 outputTypes["fastq"] = tempOutNames;
128 //if the user changes the input directory command factory will send this info to us in the output parameter
129 string inputDir = validParameter.validFile(parameters, "inputdir", false);
130 if (inputDir == "not found"){ inputDir = ""; }
133 it = parameters.find("fastq");
134 //user has given a template file
135 if(it != parameters.end()){
136 path = m->hasPath(it->second);
137 //if the user has not given a path then, add inputdir. else leave path alone.
138 if (path == "") { parameters["fastq"] = inputDir + it->second; }
141 it = parameters.find("oligos");
142 //user has given a template file
143 if(it != parameters.end()){
144 path = m->hasPath(it->second);
145 //if the user has not given a path then, add inputdir. else leave path alone.
146 if (path == "") { parameters["oligos"] = inputDir + it->second; }
149 it = parameters.find("group");
150 //user has given a template file
151 if(it != parameters.end()){
152 path = m->hasPath(it->second);
153 //if the user has not given a path then, add inputdir. else leave path alone.
154 if (path == "") { parameters["group"] = inputDir + it->second; }
158 //check for required parameters
159 fastaQFile = validParameter.validFile(parameters, "fastq", true);
160 if (fastaQFile == "not found") { m->mothurOut("fastq is a required parameter for the fastq.info command."); m->mothurOutEndLine(); abort = true; }
161 else if (fastaQFile == "not open") { fastaQFile = ""; abort = true; }
163 oligosfile = validParameter.validFile(parameters, "oligos", true);
164 if (oligosfile == "not found") { oligosfile = ""; }
165 else if (oligosfile == "not open") { oligosfile = ""; abort = true; }
166 else { m->setOligosFile(oligosfile); split = 2; }
168 groupfile = validParameter.validFile(parameters, "group", true);
169 if (groupfile == "not found") { groupfile = ""; }
170 else if (groupfile == "not open") { groupfile = ""; abort = true; }
171 else { m->setGroupFile(groupfile); split = 2; }
173 if ((groupfile != "") && (oligosfile != "")) { m->mothurOut("You must enter ONLY ONE of the following: oligos or group."); m->mothurOutEndLine(); abort = true; }
175 //if the user changes the output directory command factory will send this info to us in the output parameter
176 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = m->hasPath(fastaQFile); }
179 temp = validParameter.validFile(parameters, "fasta", false); if(temp == "not found"){ temp = "T"; }
180 fasta = m->isTrue(temp);
182 temp = validParameter.validFile(parameters, "qfile", false); if(temp == "not found"){ temp = "T"; }
183 qual = m->isTrue(temp);
185 temp = validParameter.validFile(parameters, "pacbio", false); if(temp == "not found"){ temp = "F"; }
186 pacbio = m->isTrue(temp);
188 temp = validParameter.validFile(parameters, "bdiffs", false); if (temp == "not found") { temp = "0"; }
189 m->mothurConvert(temp, bdiffs);
191 temp = validParameter.validFile(parameters, "pdiffs", false); if (temp == "not found") { temp = "0"; }
192 m->mothurConvert(temp, pdiffs);
194 temp = validParameter.validFile(parameters, "ldiffs", false); if (temp == "not found") { temp = "0"; }
195 m->mothurConvert(temp, ldiffs);
197 temp = validParameter.validFile(parameters, "sdiffs", false); if (temp == "not found") { temp = "0"; }
198 m->mothurConvert(temp, sdiffs);
200 temp = validParameter.validFile(parameters, "tdiffs", false); if (temp == "not found") { int tempTotal = pdiffs + bdiffs + ldiffs + sdiffs; temp = toString(tempTotal); }
201 m->mothurConvert(temp, tdiffs);
203 if(tdiffs == 0){ tdiffs = bdiffs + pdiffs + ldiffs + sdiffs; }
206 format = validParameter.validFile(parameters, "format", false); if (format == "not found"){ format = "sanger"; }
208 if ((format != "sanger") && (format != "illumina") && (format != "illumina1.8+") && (format != "solexa")) {
209 m->mothurOut(format + " is not a valid format. Your format choices are sanger, solexa, illumina1.8+ and illumina, aborting." ); m->mothurOutEndLine();
213 if ((!fasta) && (!qual)) { m->mothurOut("[ERROR]: no outputs selected. Aborting."); m->mothurOutEndLine(); abort=true; }
217 catch(exception& e) {
218 m->errorOut(e, "ParseFastaQCommand", "ParseFastaQCommand");
222 //**********************************************************************************************************************
224 int ParseFastaQCommand::execute(){
226 if (abort == true) { if (calledHelp) { return 0; } return 2; }
229 map<string, string> variables;
230 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaQFile));
231 string fastaFile = getOutputFileName("fasta",variables);
232 string qualFile = getOutputFileName("qfile",variables);
233 ofstream outFasta, outQual;
235 if (fasta) { m->openOutputFile(fastaFile, outFasta); outputNames.push_back(fastaFile); outputTypes["fasta"].push_back(fastaFile); }
236 if (qual) { m->openOutputFile(qualFile, outQual); outputNames.push_back(qualFile); outputTypes["qfile"].push_back(qualFile); }
238 TrimOligos* trimOligos = NULL;
239 int numBarcodes, numPrimers; numBarcodes = 0; numPrimers = 0;
240 if (oligosfile != "") {
241 readOligos(oligosfile);
242 numPrimers = primers.size(); numBarcodes = barcodes.size();
243 //find group read belongs to
244 if (pairedOligos) { trimOligos = new TrimOligos(pdiffs, bdiffs, 0, 0, pairedPrimers, pairedBarcodes); numBarcodes = pairedBarcodes.size(); numPrimers = pairedPrimers.size(); }
245 else { trimOligos = new TrimOligos(pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, revPrimer, linker, spacer); }
248 else if (groupfile != "") { readGroup(groupfile); }
251 m->openInputFile(fastaQFile, in);
253 //fill convert table - goes from solexa to sanger. Used fq_all2std.pl as a reference.
254 for (int i = -64; i < 65; i++) {
255 char temp = (char) ((int)(33 + 10*log(1+pow(10,(i/10.0)))/log(10)+0.499));
256 convertTable.push_back(temp);
263 if (m->control_pressed) { break; }
266 fastqRead2 thisRead = readFastq(in, ignore);
269 vector<int> qualScores;
271 qualScores = convertQual(thisRead.quality);
272 outQual << ">" << thisRead.seq.getName() << endl;
273 for (int i = 0; i < qualScores.size(); i++) { outQual << qualScores[i] << " "; }
277 if (m->control_pressed) { break; }
280 if (!qual) { qualScores = convertQual(thisRead.quality); } //convert if not done
281 string sequence = thisRead.seq.getAligned();
282 for (int i = 0; i < qualScores.size(); i++) {
283 if (qualScores[i] == 0){ sequence[i] = 'N'; }
285 thisRead.seq.setAligned(sequence);
288 //print sequence info to files
289 if (fasta) { thisRead.seq.printSequence(outFasta); }
292 int barcodeIndex, primerIndex, trashCodeLength;
293 if (oligosfile != "") { trashCodeLength = findGroup(thisRead, barcodeIndex, primerIndex, trimOligos, numBarcodes, numPrimers); }
294 else if (groupfile != "") { trashCodeLength = findGroup(thisRead, barcodeIndex, primerIndex, "groupMode"); }
295 else { m->mothurOut("[ERROR]: uh oh, we shouldn't be here...\n"); }
297 if(trashCodeLength == 0){
299 m->openOutputFileAppend(fastqFileNames[barcodeIndex][primerIndex], out);
300 out << thisRead.wholeRead;
304 m->openOutputFileAppend(noMatchFile, out);
305 out << thisRead.wholeRead;
310 if((count+1) % 10000 == 0){ m->mothurOut(toString(count+1)); m->mothurOutEndLine(); }
311 if(count > 100000){ break; }
317 if (fasta) { outFasta.close(); }
318 if (qual) { outQual.close(); }
321 if (!m->control_pressed) { if((count) % 10000 != 0){ m->mothurOut(toString(count)); m->mothurOutEndLine(); } }
325 if (groupfile != "") { delete groupMap; }
326 else if (oligosfile != "") { delete trimOligos; }
328 map<string, string>::iterator it;
329 set<string> namesToRemove;
330 for(int i=0;i<fastqFileNames.size();i++){
331 for(int j=0;j<fastqFileNames[0].size();j++){
332 if (fastqFileNames[i][j] != "") {
333 if (namesToRemove.count(fastqFileNames[i][j]) == 0) {
334 if(m->isBlank(fastqFileNames[i][j])){
335 m->mothurRemove(fastqFileNames[i][j]);
336 namesToRemove.insert(fastqFileNames[i][j]);
343 //remove names for outputFileNames, just cleans up the output
344 for(int i = 0; i < outputNames.size(); i++) {
345 if (namesToRemove.count(outputNames[i]) != 0) {
346 outputNames.erase(outputNames.begin()+i);
350 if(m->isBlank(noMatchFile)){ m->mothurRemove(noMatchFile); }
351 else { outputNames.push_back(noMatchFile); outputTypes["fastq"].push_back(noMatchFile); }
354 if (m->control_pressed) { outputTypes.clear(); outputNames.clear(); m->mothurRemove(fastaFile); m->mothurRemove(qualFile); return 0; }
356 //set fasta file as new current fastafile
358 itTypes = outputTypes.find("fasta");
359 if (itTypes != outputTypes.end()) {
360 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
363 itTypes = outputTypes.find("qfile");
364 if (itTypes != outputTypes.end()) {
365 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); }
368 m->mothurOutEndLine();
369 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
370 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
371 m->mothurOutEndLine();
375 catch(exception& e) {
376 m->errorOut(e, "ParseFastaQCommand", "execute");
380 //**********************************************************************************************************************
381 fastqRead2 ParseFastaQCommand::readFastq(ifstream& in, bool& ignore){
384 string wholeRead = "";
387 string line = m->getline(in); m->gobble(in); if (split > 1) { wholeRead += line + "\n"; }
388 vector<string> pieces = m->splitWhiteSpace(line);
389 string name = ""; if (pieces.size() != 0) { name = pieces[0]; }
390 if (name == "") { m->mothurOut("[WARNING]: Blank fasta name, ignoring read."); m->mothurOutEndLine(); ignore=true; }
391 else if (name[0] != '@') { m->mothurOut("[WARNING]: reading " + name + " expected a name with @ as a leading character, ignoring read."); m->mothurOutEndLine(); ignore=true; }
392 else { name = name.substr(1); }
395 string sequence = m->getline(in); m->gobble(in); if (split > 1) { wholeRead += sequence + "\n"; }
396 if (sequence == "") { m->mothurOut("[WARNING]: missing sequence for " + name + ", ignoring."); ignore=true; }
399 line = m->getline(in); m->gobble(in); if (split > 1) { wholeRead += line + "\n"; }
400 pieces = m->splitWhiteSpace(line);
401 string name2 = ""; if (pieces.size() != 0) { name2 = pieces[0]; }
402 if (name2 == "") { m->mothurOut("[WARNING]: expected a name with + as a leading character, ignoring."); ignore=true; }
403 else if (name2[0] != '+') { m->mothurOut("[WARNING]: reading " + name2 + " expected a name with + as a leading character, ignoring."); ignore=true; }
404 else { name2 = name2.substr(1); if (name2 == "") { name2 = name; } }
407 //read quality scores
408 string quality = m->getline(in); m->gobble(in); if (split > 1) { wholeRead += quality + "\n"; }
409 if (quality == "") { m->mothurOut("[WARNING]: missing quality for " + name2 + ", ignoring."); ignore=true; }
411 //sanity check sequence length and number of quality scores match
412 if (name2 != "") { if (name != name2) { m->mothurOut("[WARNING]: names do not match. read " + name + " for fasta and " + name2 + " for quality, ignoring."); ignore=true; } }
413 if (quality.length() != sequence.length()) { m->mothurOut("[WARNING]: Lengths do not match for sequence " + name + ". Read " + toString(sequence.length()) + " characters for fasta and " + toString(quality.length()) + " characters for quality scores, ignoring read."); ignore=true; }
416 Sequence seq(name, sequence);
417 fastqRead2 read(seq, quality, wholeRead);
419 if (m->debug) { m->mothurOut("[DEBUG]: " + read.seq.getName() + " " + read.seq.getAligned() + " " + quality + "\n"); }
423 catch(exception& e) {
424 m->errorOut(e, "ParseFastaQCommand", "readFastq");
429 //**********************************************************************************************************************
430 vector<int> ParseFastaQCommand::convertQual(string qual) {
432 vector<int> qualScores;
434 bool negativeScores = false;
436 for (int i = 0; i < qual.length(); i++) {
440 if (format == "illumina") {
441 temp -= 64; //char '@'
442 }else if (format == "illumina1.8+") {
443 temp -= int('!'); //char '!'
444 }else if (format == "solexa") {
445 temp = int(convertTable[temp]); //convert to sanger
446 temp -= int('!'); //char '!'
448 temp -= int('!'); //char '!'
450 if (temp < -5) { negativeScores = true; }
451 qualScores.push_back(temp);
454 if (negativeScores) { m->mothurOut("[ERROR]: finding negative quality scores, do you have the right format selected? http://en.wikipedia.org/wiki/FASTQ_format#Encoding \n"); m->control_pressed = true; }
458 catch(exception& e) {
459 m->errorOut(e, "ParseFastaQCommand", "convertQual");
463 //**********************************************************************************************************************
464 int ParseFastaQCommand::findGroup(fastqRead2 thisRead, int& barcode, int& primer, TrimOligos*& trimOligos, int numBarcodes, int numPrimers) {
467 string trashCode = "";
468 int currentSeqsDiffs = 0;
470 Sequence currSeq(thisRead.seq.getName(), thisRead.seq.getAligned());
471 QualityScores currQual; currQual.setScores(convertQual(thisRead.quality));
473 if(linker.size() != 0){
474 success = trimOligos->stripLinker(currSeq, currQual);
475 if(success > ldiffs) { trashCode += 'k'; }
476 else{ currentSeqsDiffs += success; }
480 if(numBarcodes != 0){
481 success = trimOligos->stripBarcode(currSeq, currQual, barcode);
482 if(success > bdiffs) { trashCode += 'b'; }
483 else{ currentSeqsDiffs += success; }
486 if(spacer.size() != 0){
487 success = trimOligos->stripSpacer(currSeq, currQual);
488 if(success > sdiffs) { trashCode += 's'; }
489 else{ currentSeqsDiffs += success; }
494 success = trimOligos->stripForward(currSeq, currQual, primer, true);
495 if(success > pdiffs) { trashCode += 'f'; }
496 else{ currentSeqsDiffs += success; }
499 if (currentSeqsDiffs > tdiffs) { trashCode += 't'; }
501 if(revPrimer.size() != 0){
502 success = trimOligos->stripReverse(currSeq, currQual);
503 if(!success) { trashCode += 'r'; }
507 return trashCode.length();
509 catch(exception& e) {
510 m->errorOut(e, "ParseFastaQCommand", "findGroup");
514 //**********************************************************************************************************************
515 int ParseFastaQCommand::findGroup(fastqRead2 thisRead, int& barcode, int& primer, string groupMode) {
517 string trashCode = "";
520 string group = groupMap->getGroup(thisRead.seq.getName());
521 if (group == "not found") { trashCode += "g"; } //scrap for group
522 else { //find file group
523 map<string, int>::iterator it = barcodes.find(group);
524 if (it != barcodes.end()) {
525 barcode = it->second;
526 }else { trashCode += "g"; }
529 return trashCode.length();
531 catch(exception& e) {
532 m->errorOut(e, "ParseFastaQCommand", "findGroup");
536 //***************************************************************************************************************
538 bool ParseFastaQCommand::readOligos(string oligoFile){
541 m->openInputFile(oligoFile, inOligos);
543 string type, oligo, roligo, group;
544 bool hasPrimer = false; bool hasPairedBarcodes = false; pairedOligos = false;
547 int indexBarcode = 0;
548 int indexPairedPrimer = 0;
549 int indexPairedBarcode = 0;
550 set<string> uniquePrimers;
551 set<string> uniqueBarcodes;
553 while(!inOligos.eof()){
557 if (m->debug) { m->mothurOut("[DEBUG]: reading type - " + type + ".\n"); }
560 while (!inOligos.eof()) { char c = inOligos.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there
565 //make type case insensitive
566 for(int i=0;i<type.length();i++){ type[i] = toupper(type[i]); }
570 if (m->debug) { m->mothurOut("[DEBUG]: reading - " + oligo + ".\n"); }
572 for(int i=0;i<oligo.length();i++){
573 oligo[i] = toupper(oligo[i]);
574 if(oligo[i] == 'U') { oligo[i] = 'T'; }
577 if(type == "FORWARD"){
580 // get rest of line in case there is a primer name
581 while (!inOligos.eof()) {
582 char c = inOligos.get();
583 if (c == 10 || c == 13 || c == -1){ break; }
584 else if (c == 32 || c == 9){;} //space or tab
588 //check for repeat barcodes
589 map<string, int>::iterator itPrime = primers.find(oligo);
590 if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); }
592 if (m->debug) { if (group != "") { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); }else{ m->mothurOut("[DEBUG]: no group for primer " + oligo + ".\n"); } }
594 primers[oligo]=indexPrimer; indexPrimer++;
595 primerNameVector.push_back(group);
597 else if (type == "PRIMER"){
602 for(int i=0;i<roligo.length();i++){
603 roligo[i] = toupper(roligo[i]);
604 if(roligo[i] == 'U') { roligo[i] = 'T'; }
606 roligo = reverseOligo(roligo);
610 // get rest of line in case there is a primer name
611 while (!inOligos.eof()) {
612 char c = inOligos.get();
613 if (c == 10 || c == 13 || c == -1){ break; }
614 else if (c == 32 || c == 9){;} //space or tab
618 oligosPair newPrimer(oligo, roligo);
620 if (m->debug) { m->mothurOut("[DEBUG]: primer pair " + newPrimer.forward + " " + newPrimer.reverse + ", and group = " + group + ".\n"); }
622 //check for repeat barcodes
623 string tempPair = oligo+roligo;
624 if (uniquePrimers.count(tempPair) != 0) { m->mothurOut("primer pair " + newPrimer.forward + " " + newPrimer.reverse + " is in your oligos file already."); m->mothurOutEndLine(); }
625 else { uniquePrimers.insert(tempPair); }
627 if (m->debug) { if (group != "") { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); }else{ m->mothurOut("[DEBUG]: no group for primer pair " + newPrimer.forward + " " + newPrimer.reverse + ".\n"); } }
629 pairedPrimers[indexPairedPrimer]=newPrimer; indexPairedPrimer++;
630 primerNameVector.push_back(group);
633 else if(type == "REVERSE"){
634 //Sequence oligoRC("reverse", oligo);
635 //oligoRC.reverseComplement();
636 string oligoRC = reverseOligo(oligo);
637 revPrimer.push_back(oligoRC);
639 else if(type == "BARCODE"){
642 //barcode lines can look like BARCODE atgcatgc groupName - for 454 seqs
643 //or BARCODE atgcatgc atgcatgc groupName - for illumina data that has forward and reverse info
646 while (!inOligos.eof()) {
647 char c = inOligos.get();
648 if (c == 10 || c == 13 || c == -1){ break; }
649 else if (c == 32 || c == 9){;} //space or tab
653 //then this is illumina data with 4 columns
655 hasPairedBarcodes = true;
656 string reverseBarcode = group; //reverseOligo(group); //reverse barcode
659 for(int i=0;i<reverseBarcode.length();i++){
660 reverseBarcode[i] = toupper(reverseBarcode[i]);
661 if(reverseBarcode[i] == 'U') { reverseBarcode[i] = 'T'; }
664 reverseBarcode = reverseOligo(reverseBarcode);
665 oligosPair newPair(oligo, reverseBarcode);
667 if (m->debug) { m->mothurOut("[DEBUG]: barcode pair " + newPair.forward + " " + newPair.reverse + ", and group = " + group + ".\n"); }
668 //check for repeat barcodes
669 string tempPair = oligo+reverseBarcode;
670 if (uniqueBarcodes.count(tempPair) != 0) { m->mothurOut("barcode pair " + newPair.forward + " " + newPair.reverse + " is in your oligos file already, disregarding."); m->mothurOutEndLine(); }
671 else { uniqueBarcodes.insert(tempPair); }
673 pairedBarcodes[indexPairedBarcode]=newPair; indexPairedBarcode++;
674 barcodeNameVector.push_back(group);
676 //check for repeat barcodes
677 map<string, int>::iterator itBar = barcodes.find(oligo);
678 if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); }
680 barcodes[oligo]=indexBarcode; indexBarcode++;
681 barcodeNameVector.push_back(group);
683 }else if(type == "LINKER"){
684 linker.push_back(oligo);
685 }else if(type == "SPACER"){
686 spacer.push_back(oligo);
688 else{ m->mothurOut("[WARNING]: " + type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); }
694 if (hasPairedBarcodes || hasPrimer) {
696 if ((primers.size() != 0) || (barcodes.size() != 0) || (linker.size() != 0) || (spacer.size() != 0) || (revPrimer.size() != 0)) { m->control_pressed = true; m->mothurOut("[ERROR]: cannot mix paired primers and barcodes with non paired or linkers and spacers, quitting."); m->mothurOutEndLine(); return 0; }
699 //add in potential combos
700 if(barcodeNameVector.size() == 0){
702 barcodeNameVector.push_back("");
705 if(primerNameVector.size() == 0){
707 primerNameVector.push_back("");
710 fastqFileNames.resize(barcodeNameVector.size());
711 for(int i=0;i<fastqFileNames.size();i++){
712 fastqFileNames[i].assign(primerNameVector.size(), "");
716 set<string> uniqueNames; //used to cleanup outputFileNames
718 for(map<int, oligosPair>::iterator itBar = pairedBarcodes.begin();itBar != pairedBarcodes.end();itBar++){
719 for(map<int, oligosPair>::iterator itPrimer = pairedPrimers.begin();itPrimer != pairedPrimers.end(); itPrimer++){
721 string primerName = primerNameVector[itPrimer->first];
722 string barcodeName = barcodeNameVector[itBar->first];
724 if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing
726 string comboGroupName = "";
727 string fastqFileName = "";
729 if(primerName == ""){
730 comboGroupName = barcodeNameVector[itBar->first];
733 if(barcodeName == ""){
734 comboGroupName = primerNameVector[itPrimer->first];
737 comboGroupName = barcodeNameVector[itBar->first] + "." + primerNameVector[itPrimer->first];
743 map<string, string> variables;
744 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaQFile));
745 variables["[group]"] = comboGroupName;
746 fastqFileName = getOutputFileName("fastq", variables);
747 if (uniqueNames.count(fastqFileName) == 0) {
748 outputNames.push_back(fastqFileName);
749 outputTypes["fastq"].push_back(fastqFileName);
750 uniqueNames.insert(fastqFileName);
753 fastqFileNames[itBar->first][itPrimer->first] = fastqFileName;
754 m->openOutputFile(fastqFileName, temp); temp.close();
760 for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
761 for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
763 string primerName = primerNameVector[itPrimer->second];
764 string barcodeName = barcodeNameVector[itBar->second];
766 if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing
768 string comboGroupName = "";
769 string fastqFileName = "";
771 if(primerName == ""){
772 comboGroupName = barcodeNameVector[itBar->second];
775 if(barcodeName == ""){
776 comboGroupName = primerNameVector[itPrimer->second];
779 comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second];
785 map<string, string> variables;
786 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaQFile));
787 variables["[group]"] = comboGroupName;
788 fastqFileName = getOutputFileName("fastq", variables);
789 if (uniqueNames.count(fastqFileName) == 0) {
790 outputNames.push_back(fastqFileName);
791 outputTypes["fastq"].push_back(fastqFileName);
792 uniqueNames.insert(fastqFileName);
795 fastqFileNames[itBar->second][itPrimer->second] = fastqFileName;
796 m->openOutputFile(fastqFileName, temp); temp.close();
804 map<string, string> variables;
805 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaQFile));
806 variables["[group]"] = "scrap";
807 noMatchFile = getOutputFileName("fastq", variables);
808 m->openOutputFile(noMatchFile, temp); temp.close();
813 catch(exception& e) {
814 m->errorOut(e, "ParseFastaQCommand", "getOligos");
818 //***************************************************************************************************************
819 bool ParseFastaQCommand::readGroup(string groupfile){
821 fastqFileNames.clear();
823 groupMap = new GroupMap();
824 groupMap->readMap(groupfile);
826 //like barcodeNameVector - no primer names
827 vector<string> groups = groupMap->getNamesOfGroups();
829 fastqFileNames.resize(groups.size());
830 for (int i = 0; i < fastqFileNames.size(); i++) {
831 for (int j = 0; j < 1; j++) {
833 map<string, string> variables;
834 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaQFile));
835 variables["[group]"] = groups[i];
836 string thisFilename = getOutputFileName("fastq",variables);
837 outputNames.push_back(thisFilename);
838 outputTypes["fastq"].push_back(thisFilename);
841 m->openOutputFileBinary(thisFilename, temp); temp.close();
842 fastqFileNames[i].push_back(thisFilename);
843 barcodes[groups[i]] = i;
847 map<string, string> variables;
848 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaQFile));
849 variables["[group]"] = "scrap";
850 noMatchFile = getOutputFileName("fastq",variables);
851 m->mothurRemove(noMatchFile);
856 catch(exception& e) {
857 m->errorOut(e, "ParseFastaQCommand", "readGroup");
861 //********************************************************************/
862 string ParseFastaQCommand::reverseOligo(string oligo){
866 for(int i=oligo.length()-1;i>=0;i--){
868 if(oligo[i] == 'A') { reverse += 'T'; }
869 else if(oligo[i] == 'T'){ reverse += 'A'; }
870 else if(oligo[i] == 'U'){ reverse += 'A'; }
872 else if(oligo[i] == 'G'){ reverse += 'C'; }
873 else if(oligo[i] == 'C'){ reverse += 'G'; }
875 else if(oligo[i] == 'R'){ reverse += 'Y'; }
876 else if(oligo[i] == 'Y'){ reverse += 'R'; }
878 else if(oligo[i] == 'M'){ reverse += 'K'; }
879 else if(oligo[i] == 'K'){ reverse += 'M'; }
881 else if(oligo[i] == 'W'){ reverse += 'W'; }
882 else if(oligo[i] == 'S'){ reverse += 'S'; }
884 else if(oligo[i] == 'B'){ reverse += 'V'; }
885 else if(oligo[i] == 'V'){ reverse += 'B'; }
887 else if(oligo[i] == 'D'){ reverse += 'H'; }
888 else if(oligo[i] == 'H'){ reverse += 'D'; }
890 else { reverse += 'N'; }
896 catch(exception& e) {
897 m->errorOut(e, "ParseFastaQCommand", "reverseOligo");
903 //**********************************************************************************************************************