]> git.donarmstrong.com Git - mothur.git/blob - parsefastaqcommand.cpp
added oligos class. added check orient parameter to trim.flows, sffinfo, fastq.info...
[mothur.git] / parsefastaqcommand.cpp
1 /*
2  *  parsefastaqcommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 9/30/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "parsefastaqcommand.h"
11 #include "sequence.hpp"
12
13 //**********************************************************************************************************************
14 vector<string> ParseFastaQCommand::setParameters(){     
15         try {
16                 CommandParameter pfastq("fastq", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(pfastq);
17         CommandParameter poligos("oligos", "InputTypes", "", "", "oligosGroup", "none", "none","",false,false); parameters.push_back(poligos);
18         CommandParameter pgroup("group", "InputTypes", "", "", "oligosGroup", "none", "none","",false,false); parameters.push_back(pgroup);
19         CommandParameter preorient("checkorient", "Boolean", "", "F", "", "", "","",false,false,true); parameters.push_back(preorient);
20         CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ppdiffs);
21                 CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pbdiffs);
22         CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pldiffs);
23                 CommandParameter psdiffs("sdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(psdiffs);
24         CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ptdiffs);
25                 CommandParameter pfasta("fasta", "Boolean", "", "T", "", "", "","fasta",false,false); parameters.push_back(pfasta);
26                 CommandParameter pqual("qfile", "Boolean", "", "T", "", "", "","qfile",false,false); parameters.push_back(pqual);
27         CommandParameter ppacbio("pacbio", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(ppacbio);
28                 CommandParameter pformat("format", "Multiple", "sanger-illumina-solexa-illumina1.8+", "sanger", "", "", "","",false,false,true); parameters.push_back(pformat);
29         CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
30                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
31                 
32                 vector<string> myArray;
33                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
34                 return myArray;
35         }
36         catch(exception& e) {
37                 m->errorOut(e, "ParseFastaQCommand", "setParameters");
38                 exit(1);
39         }
40 }
41 //**********************************************************************************************************************
42 string ParseFastaQCommand::getHelpString(){     
43         try {
44                 string helpString = "";
45                 helpString += "The fastq.info command reads a fastq file and creates a fasta and quality file.\n";
46                 helpString += "The fastq.info command parameters are fastq, fasta, qfile, oligos, group and format; fastq is required.\n";
47         helpString += "The fastq.info command should be in the following format: fastq.info(fastaq=yourFastaQFile).\n";
48         helpString += "The oligos parameter allows you to provide an oligos file to split your fastq file into separate fastq files by barcode and primers. \n";
49         helpString += "The group parameter allows you to provide a group file to split your fastq file into separate fastq files by group. \n";
50         helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the reads. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n";
51                 helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";
52                 helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
53         helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n";
54                 helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n";
55         helpString += "The checkorient parameter will check look for the reverse compliment of the barcode or primer in the sequence. If found the sequence is flipped. The default is false.\n";
56                 helpString += "The format parameter is used to indicate whether your sequences are sanger, solexa, illumina1.8+ or illumina, default=sanger.\n";
57         helpString += "The fasta parameter allows you to indicate whether you want a fasta file generated. Default=T.\n";
58         helpString += "The qfile parameter allows you to indicate whether you want a quality file generated. Default=T.\n";
59         helpString += "The pacbio parameter allows you to indicate .... When set to true, quality scores of 0 will results in a corresponding base of N. Default=F.\n";
60                 helpString += "Example fastq.info(fastaq=test.fastaq).\n";
61                 helpString += "Note: No spaces between parameter labels (i.e. fastq), '=' and yourFastQFile.\n";
62                 return helpString;
63         }
64         catch(exception& e) {
65                 m->errorOut(e, "ParseFastaQCommand", "getHelpString");
66                 exit(1);
67         }
68 }
69 //**********************************************************************************************************************
70 string ParseFastaQCommand::getOutputPattern(string type) {
71     try {
72         string pattern = "";
73         
74         if (type == "fasta") {  pattern = "[filename],fasta"; } 
75         else if (type == "qfile") {  pattern = "[filename],qual"; }
76         else if (type == "fastq") {  pattern = "[filename],[group],fastq"; } 
77         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
78         
79         return pattern;
80     }
81     catch(exception& e) {
82         m->errorOut(e, "ParseFastaQCommand", "getOutputPattern");
83         exit(1);
84     }
85 }
86 //**********************************************************************************************************************
87 ParseFastaQCommand::ParseFastaQCommand(){       
88         try {
89                 abort = true; calledHelp = true; 
90                 setParameters();
91                 vector<string> tempOutNames;
92                 outputTypes["fasta"] = tempOutNames;
93                 outputTypes["qfile"] = tempOutNames;
94         outputTypes["fastq"] = tempOutNames;
95         }
96         catch(exception& e) {
97                 m->errorOut(e, "ParseFastaQCommand", "ParseFastaQCommand");
98                 exit(1);
99         }
100 }
101 //**********************************************************************************************************************
102 ParseFastaQCommand::ParseFastaQCommand(string option){
103         try {
104                 abort = false; calledHelp = false;
105         split = 1;
106                 
107                 if(option == "help") {  help(); abort = true; calledHelp = true; }
108                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
109                 
110                 else {
111                         vector<string> myArray = setParameters();
112                         
113                         OptionParser parser(option);
114                         map<string,string> parameters = parser.getParameters();
115                         
116                         ValidParameters validParameter;
117                         map<string,string>::iterator it;
118
119                         //check to make sure all parameters are valid for command
120                         for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
121                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
122                         }
123                         
124                         //initialize outputTypes
125                         vector<string> tempOutNames;
126                         outputTypes["fasta"] = tempOutNames;
127                         outputTypes["qfile"] = tempOutNames;
128             outputTypes["fastq"] = tempOutNames;
129                         
130                         //if the user changes the input directory command factory will send this info to us in the output parameter 
131                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
132                         if (inputDir == "not found"){   inputDir = "";          }
133                         else {
134                                 string path;
135                                 it = parameters.find("fastq");
136                                 //user has given a template file
137                                 if(it != parameters.end()){ 
138                                         path = m->hasPath(it->second);
139                                         //if the user has not given a path then, add inputdir. else leave path alone.
140                                         if (path == "") {       parameters["fastq"] = inputDir + it->second;            }
141                                 }
142                 
143                 it = parameters.find("oligos");
144                                 //user has given a template file
145                                 if(it != parameters.end()){
146                                         path = m->hasPath(it->second);
147                                         //if the user has not given a path then, add inputdir. else leave path alone.
148                                         if (path == "") {       parameters["oligos"] = inputDir + it->second;           }
149                                 }
150                 
151                 it = parameters.find("group");
152                                 //user has given a template file
153                                 if(it != parameters.end()){
154                                         path = m->hasPath(it->second);
155                                         //if the user has not given a path then, add inputdir. else leave path alone.
156                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
157                                 }
158                         }
159                         
160                         //check for required parameters
161                         fastaQFile = validParameter.validFile(parameters, "fastq", true);
162                         if (fastaQFile == "not found") {        m->mothurOut("fastq is a required parameter for the fastq.info command.");      m->mothurOutEndLine();  abort = true;   }
163                         else if (fastaQFile == "not open")      {       fastaQFile = ""; abort = true;  }
164             
165             oligosfile = validParameter.validFile(parameters, "oligos", true);
166                         if (oligosfile == "not found") {        oligosfile = "";        }
167                         else if (oligosfile == "not open")      {       oligosfile = ""; abort = true;  }
168             else { m->setOligosFile(oligosfile); split = 2; }
169             
170             groupfile = validParameter.validFile(parameters, "group", true);
171                         if (groupfile == "not found") { groupfile = ""; }
172                         else if (groupfile == "not open")       {       groupfile = ""; abort = true;   }
173             else { m->setGroupFile(groupfile); split = 2; }
174             
175             if ((groupfile != "") && (oligosfile != "")) { m->mothurOut("You must enter ONLY ONE of the following: oligos or group."); m->mothurOutEndLine(); abort = true;  }
176                         
177                         //if the user changes the output directory command factory will send this info to us in the output parameter 
178                         outputDir = validParameter.validFile(parameters, "outputdir", false);   if (outputDir == "not found"){  outputDir = m->hasPath(fastaQFile);     }
179                         
180                         string temp;
181                         temp = validParameter.validFile(parameters, "fasta", false);    if(temp == "not found"){        temp = "T";     }
182                         fasta = m->isTrue(temp); 
183
184                         temp = validParameter.validFile(parameters, "qfile", false);    if(temp == "not found"){        temp = "T";     }
185                         qual = m->isTrue(temp);
186             
187             temp = validParameter.validFile(parameters, "pacbio", false);       if(temp == "not found"){        temp = "F";     }
188                         pacbio = m->isTrue(temp);
189
190             temp = validParameter.validFile(parameters, "bdiffs", false);               if (temp == "not found") { temp = "0"; }
191                         m->mothurConvert(temp, bdiffs);
192                         
193                         temp = validParameter.validFile(parameters, "pdiffs", false);           if (temp == "not found") { temp = "0"; }
194                         m->mothurConvert(temp, pdiffs);
195             
196             temp = validParameter.validFile(parameters, "ldiffs", false);               if (temp == "not found") { temp = "0"; }
197                         m->mothurConvert(temp, ldiffs);
198             
199             temp = validParameter.validFile(parameters, "sdiffs", false);               if (temp == "not found") { temp = "0"; }
200                         m->mothurConvert(temp, sdiffs);
201                         
202                         temp = validParameter.validFile(parameters, "tdiffs", false);           if (temp == "not found") { int tempTotal = pdiffs + bdiffs + ldiffs + sdiffs;  temp = toString(tempTotal); }
203                         m->mothurConvert(temp, tdiffs);
204                         
205                         if(tdiffs == 0){        tdiffs = bdiffs + pdiffs + ldiffs + sdiffs;     }
206
207                         
208             format = validParameter.validFile(parameters, "format", false);             if (format == "not found"){     format = "sanger";      }
209             
210             if ((format != "sanger") && (format != "illumina") && (format != "illumina1.8+") && (format != "solexa"))  { 
211                                 m->mothurOut(format + " is not a valid format. Your format choices are sanger, solexa, illumina1.8+ and illumina, aborting." ); m->mothurOutEndLine();
212                                 abort=true;
213                         }
214
215                         if ((!fasta) && (!qual)) { m->mothurOut("[ERROR]: no outputs selected. Aborting."); m->mothurOutEndLine(); abort=true; }
216             
217             temp = validParameter.validFile(parameters, "checkorient", false);          if (temp == "not found") { temp = "F"; }
218                         reorient = m->isTrue(temp);
219
220                 }               
221         }
222         catch(exception& e) {
223                 m->errorOut(e, "ParseFastaQCommand", "ParseFastaQCommand");
224                 exit(1);
225         }
226 }
227 //**********************************************************************************************************************
228
229 int ParseFastaQCommand::execute(){
230         try {
231                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
232                 
233                 //open Output Files
234         map<string, string> variables; 
235         variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaQFile));
236                 string fastaFile = getOutputFileName("fasta",variables);
237                 string qualFile = getOutputFileName("qfile",variables);
238                 ofstream outFasta, outQual;
239                 
240                 if (fasta) { m->openOutputFile(fastaFile, outFasta);  outputNames.push_back(fastaFile); outputTypes["fasta"].push_back(fastaFile);      }
241                 if (qual) { m->openOutputFile(qualFile, outQual);       outputNames.push_back(qualFile);  outputTypes["qfile"].push_back(qualFile);             }
242         
243         TrimOligos* trimOligos = NULL; TrimOligos* rtrimOligos = NULL;
244         pairedOligos = false; numBarcodes = 0; numPrimers= 0; numLinkers= 0; numSpacers = 0; numRPrimers = 0;
245         if (oligosfile != "")       {
246             readOligos(oligosfile);
247             //find group read belongs to
248             if (pairedOligos)   {   trimOligos = new TrimOligos(pdiffs, bdiffs, 0, 0, oligos.getPairedPrimers(), oligos.getPairedBarcodes()); numBarcodes = oligos.getPairedBarcodes().size(); numPrimers = oligos.getPairedPrimers().size(); }
249             else                {   trimOligos = new TrimOligos(pdiffs, bdiffs, ldiffs, sdiffs, oligos.getPrimers(), oligos.getBarcodes(), oligos.getReversePrimers(), oligos.getLinkers(), oligos.getSpacers());  numPrimers = oligos.getPrimers().size(); numBarcodes = oligos.getBarcodes().size();  }
250             
251             if (reorient) {
252                 rtrimOligos = new TrimOligos(pdiffs, bdiffs, 0, 0, oligos.getReorientedPairedPrimers(), oligos.getReorientedPairedBarcodes()); numBarcodes = oligos.getReorientedPairedBarcodes().size();
253             }
254
255         }
256         else if (groupfile != "")   { readGroup(groupfile);     }
257                 
258                 ifstream in;
259                 m->openInputFile(fastaQFile, in);
260         
261         //fill convert table - goes from solexa to sanger. Used fq_all2std.pl as a reference.
262         for (int i = -64; i < 65; i++) { 
263             char temp = (char) ((int)(33 + 10*log(1+pow(10,(i/10.0)))/log(10)+0.499));
264             convertTable.push_back(temp);
265         }
266                 
267         
268         int count = 0;
269                 while (!in.eof()) {
270                         
271                         if (m->control_pressed) { break; }
272             
273             bool ignore;
274             fastqRead2 thisRead = readFastq(in, ignore);
275             
276             if (!ignore) {
277                 vector<int> qualScores;
278                 if (qual) {
279                     qualScores = convertQual(thisRead.quality);
280                     outQual << ">" << thisRead.seq.getName() << endl;
281                     for (int i = 0; i < qualScores.size(); i++) { outQual << qualScores[i] << " "; }
282                     outQual << endl;
283                 }
284                 
285                 if (m->control_pressed) { break; }
286                 
287                 if (pacbio) {
288                     if (!qual) { qualScores = convertQual(thisRead.quality); } //convert if not done
289                     string sequence = thisRead.seq.getAligned();
290                     for (int i = 0; i < qualScores.size(); i++) {
291                         if (qualScores[i] == 0){ sequence[i] = 'N'; }
292                     }
293                     thisRead.seq.setAligned(sequence);
294                 }
295                 
296                 //print sequence info to files
297                 if (fasta) { thisRead.seq.printSequence(outFasta); }
298                 
299                 if (split > 1) {
300                     int barcodeIndex, primerIndex, trashCodeLength;
301                     if (oligosfile != "")      {  trashCodeLength = findGroup(thisRead, barcodeIndex, primerIndex, trimOligos, rtrimOligos, numBarcodes, numPrimers);    }
302                     else if (groupfile != "")  {  trashCodeLength = findGroup(thisRead, barcodeIndex, primerIndex, "groupMode");   }
303                     else {  m->mothurOut("[ERROR]: uh oh, we shouldn't be here...\n"); }
304                     
305                     if(trashCodeLength == 0){
306                         ofstream out;
307                         m->openOutputFileAppend(fastqFileNames[barcodeIndex][primerIndex], out);
308                         out << thisRead.wholeRead;
309                         out.close();
310                     }else{
311                         ofstream out;
312                         m->openOutputFileAppend(noMatchFile, out);
313                         out << thisRead.wholeRead;
314                         out.close();
315                     }
316                 }
317                 //report progress
318                 if((count+1) % 10000 == 0){     m->mothurOut(toString(count+1)); m->mothurOutEndLine();         }
319                 count++;
320                         }
321                 }
322                 
323                 in.close();
324                 if (fasta)      { outFasta.close();     }
325                 if (qual)       { outQual.close();      }
326         
327         //report progress
328                 if (!m->control_pressed) {   if((count) % 10000 != 0){  m->mothurOut(toString(count)); m->mothurOutEndLine();           }  }
329         
330         if (split > 1) {
331             
332             if (groupfile != "")        { delete groupMap;      }
333             else if (oligosfile != "")  { delete trimOligos; if (reorient) { delete rtrimOligos; }   }
334            
335                         map<string, string>::iterator it;
336                         set<string> namesToRemove;
337                         for(int i=0;i<fastqFileNames.size();i++){
338                                 for(int j=0;j<fastqFileNames[0].size();j++){
339                                         if (fastqFileNames[i][j] != "") {
340                                                 if (namesToRemove.count(fastqFileNames[i][j]) == 0) {
341                                                         if(m->isBlank(fastqFileNames[i][j])){
342                                                                 m->mothurRemove(fastqFileNames[i][j]);
343                                                                 namesToRemove.insert(fastqFileNames[i][j]);
344                             }
345                                                 }
346                                         }
347                                 }
348                         }
349             
350                         //remove names for outputFileNames, just cleans up the output
351                         for(int i = 0; i < outputNames.size(); i++) {
352                 if (namesToRemove.count(outputNames[i]) != 0) {
353                     outputNames.erase(outputNames.begin()+i);
354                     i--;
355                 }
356             }
357             if(m->isBlank(noMatchFile)){  m->mothurRemove(noMatchFile); }
358             else { outputNames.push_back(noMatchFile); outputTypes["fastq"].push_back(noMatchFile); }
359         }
360                 
361                 if (m->control_pressed) { outputTypes.clear(); outputNames.clear(); m->mothurRemove(fastaFile); m->mothurRemove(qualFile); return 0; }
362                 
363                 //set fasta file as new current fastafile
364                 string current = "";
365                 itTypes = outputTypes.find("fasta");
366                 if (itTypes != outputTypes.end()) {
367                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
368                 }
369                 
370                 itTypes = outputTypes.find("qfile");
371                 if (itTypes != outputTypes.end()) {
372                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); }
373                 }               
374                 
375                 m->mothurOutEndLine();
376                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
377                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
378                 m->mothurOutEndLine();
379
380                 return 0;
381         }
382         catch(exception& e) {
383                 m->errorOut(e, "ParseFastaQCommand", "execute");
384                 exit(1);
385         }
386 }
387 //**********************************************************************************************************************
388 fastqRead2 ParseFastaQCommand::readFastq(ifstream& in, bool& ignore){
389     try {
390         ignore = false;
391         string wholeRead = "";
392         
393         //read sequence name
394         string line = m->getline(in); m->gobble(in); if (split > 1) { wholeRead += line + "\n"; }
395         vector<string> pieces = m->splitWhiteSpace(line);
396         string name = "";  if (pieces.size() != 0) { name = pieces[0]; }
397         if (name == "") {  m->mothurOut("[WARNING]: Blank fasta name, ignoring read."); m->mothurOutEndLine(); ignore=true;  }
398         else if (name[0] != '@') { m->mothurOut("[WARNING]: reading " + name + " expected a name with @ as a leading character, ignoring read."); m->mothurOutEndLine(); ignore=true; }
399         else { name = name.substr(1); }
400         
401         //read sequence
402         string sequence = m->getline(in); m->gobble(in); if (split > 1) { wholeRead += sequence + "\n"; }
403         if (sequence == "") {  m->mothurOut("[WARNING]: missing sequence for " + name + ", ignoring."); ignore=true; }
404         
405         //read sequence name
406         line = m->getline(in); m->gobble(in); if (split > 1) { wholeRead += line + "\n"; }
407         pieces = m->splitWhiteSpace(line);
408         string name2 = "";  if (pieces.size() != 0) { name2 = pieces[0]; }
409         if (name2 == "") {  m->mothurOut("[WARNING]: expected a name with + as a leading character, ignoring."); ignore=true; }
410         else if (name2[0] != '+') { m->mothurOut("[WARNING]: reading " + name2 + " expected a name with + as a leading character, ignoring."); ignore=true; }
411         else { name2 = name2.substr(1); if (name2 == "") { name2 = name; } }
412         
413                 
414         //read quality scores
415         string quality = m->getline(in); m->gobble(in); if (split > 1) { wholeRead += quality + "\n"; }
416         if (quality == "") {  m->mothurOut("[WARNING]: missing quality for " + name2 + ", ignoring."); ignore=true; }
417         
418         //sanity check sequence length and number of quality scores match
419         if (name2 != "") { if (name != name2) { m->mothurOut("[WARNING]: names do not match. read " + name + " for fasta and " + name2 + " for quality, ignoring."); ignore=true; } }
420         if (quality.length() != sequence.length()) { m->mothurOut("[WARNING]: Lengths do not match for sequence " + name + ". Read " + toString(sequence.length()) + " characters for fasta and " + toString(quality.length()) + " characters for quality scores, ignoring read."); ignore=true; }
421         
422         m->checkName(name);
423         Sequence seq(name, sequence);
424         fastqRead2 read(seq, quality, wholeRead);
425             
426         if (m->debug) { m->mothurOut("[DEBUG]: " + read.seq.getName() + " " + read.seq.getAligned() + " " + quality + "\n"); }
427         
428         return read;
429     }
430     catch(exception& e) {
431         m->errorOut(e, "ParseFastaQCommand", "readFastq");
432         exit(1);
433     }
434 }
435
436 //**********************************************************************************************************************
437 vector<int> ParseFastaQCommand::convertQual(string qual) {
438         try {
439                 vector<int> qualScores;
440                 
441         bool negativeScores = false;
442         
443                 for (int i = 0; i < qual.length(); i++) { 
444             
445             int temp = 0;
446             temp = int(qual[i]);
447             if (format == "illumina") {
448                 temp -= 64; //char '@'
449             }else if (format == "illumina1.8+") {
450                 temp -= int('!'); //char '!'
451             }else if (format == "solexa") {
452                 temp = int(convertTable[temp]); //convert to sanger
453                 temp -= int('!'); //char '!'
454             }else {
455                 temp -= int('!'); //char '!'
456             }
457             if (temp < -5) { negativeScores = true; }
458                         qualScores.push_back(temp);
459                 }
460                 
461         if (negativeScores) { m->mothurOut("[ERROR]: finding negative quality scores, do you have the right format selected? http://en.wikipedia.org/wiki/FASTQ_format#Encoding \n");  m->control_pressed = true;  }
462         
463                 return qualScores;
464         }
465         catch(exception& e) {
466                 m->errorOut(e, "ParseFastaQCommand", "convertQual");
467                 exit(1);
468         }
469 }
470 //**********************************************************************************************************************
471 int ParseFastaQCommand::findGroup(fastqRead2 thisRead, int& barcode, int& primer, TrimOligos*& trimOligos, TrimOligos*& rtrimOligos, int numBarcodes, int numPrimers) {
472         try {
473         int success = 1;
474         string trashCode = "";
475         int currentSeqsDiffs = 0;
476         
477         Sequence currSeq(thisRead.seq.getName(), thisRead.seq.getAligned());
478         QualityScores currQual; currQual.setScores(convertQual(thisRead.quality));
479         
480         //for reorient
481         Sequence savedSeq(currSeq.getName(), currSeq.getAligned());
482         QualityScores savedQual(currQual.getName(), currQual.getScores());
483         
484         if(numLinkers != 0){
485             success = trimOligos->stripLinker(currSeq, currQual);
486             if(success > ldiffs)                {       trashCode += 'k';       }
487             else{ currentSeqsDiffs += success;  }
488             
489         }
490         
491         if(numBarcodes != 0){
492             success = trimOligos->stripBarcode(currSeq, currQual, barcode);
493             if(success > bdiffs)                {       trashCode += 'b';       }
494             else{ currentSeqsDiffs += success;  }
495         }
496         
497         if(numSpacers != 0){
498             success = trimOligos->stripSpacer(currSeq, currQual);
499             if(success > sdiffs)                {       trashCode += 's';       }
500             else{ currentSeqsDiffs += success;  }
501             
502         }
503         
504         if(numPrimers != 0){
505             success = trimOligos->stripForward(currSeq, currQual, primer, true);
506             if(success > pdiffs)                {       trashCode += 'f';       }
507             else{ currentSeqsDiffs += success;  }
508         }
509         
510         if (currentSeqsDiffs > tdiffs)  {       trashCode += 't';   }
511         
512         if(numRPrimers != 0){
513             success = trimOligos->stripReverse(currSeq, currQual);
514             if(!success)                                {       trashCode += 'r';       }
515         }
516         
517         if (reorient && (trashCode != "")) { //if you failed and want to check the reverse
518             int thisSuccess = 0;
519             string thisTrashCode = "";
520             int thisCurrentSeqsDiffs = 0;
521             
522             int thisBarcodeIndex = 0;
523             int thisPrimerIndex = 0;
524             //cout << currSeq.getName() << '\t' << savedSeq.getUnaligned() << endl;
525             if(numBarcodes != 0){
526                 thisSuccess = rtrimOligos->stripBarcode(savedSeq, savedQual, thisBarcodeIndex);
527                 if(thisSuccess > bdiffs)                { thisTrashCode += "b"; }
528                 else{ thisCurrentSeqsDiffs += thisSuccess;  }
529             }
530             //cout << currSeq.getName() << '\t' << savedSeq.getUnaligned() << endl;
531             if(numPrimers != 0){
532                 thisSuccess = rtrimOligos->stripForward(savedSeq, savedQual, thisPrimerIndex, true);
533                 if(thisSuccess > pdiffs)                { thisTrashCode += "f"; }
534                 else{ thisCurrentSeqsDiffs += thisSuccess;  }
535             }
536             
537             if (thisCurrentSeqsDiffs > tdiffs)  {       thisTrashCode += 't';   }
538             
539             if (thisTrashCode == "") {
540                 trashCode = thisTrashCode;
541                 success = thisSuccess;
542                 currentSeqsDiffs = thisCurrentSeqsDiffs;
543                 barcode = thisBarcodeIndex;
544                 primer = thisPrimerIndex;
545                 savedSeq.reverseComplement();
546                 currSeq.setAligned(savedSeq.getAligned());
547                 savedQual.flipQScores();
548                 currQual.setScores(savedQual.getScores());
549             }else { trashCode += "(" + thisTrashCode + ")";  }
550         }
551         
552         if (trashCode.length() == 0) { //is this sequence in the ignore group
553             string thisGroup = oligos.getGroupName(barcode, primer);
554             
555             int pos = thisGroup.find("ignore");
556             if (pos != string::npos) {  trashCode += "i"; }
557         }
558
559         
560         return trashCode.length();
561     }
562         catch(exception& e) {
563                 m->errorOut(e, "ParseFastaQCommand", "findGroup");
564                 exit(1);
565         }
566 }
567 //**********************************************************************************************************************
568 int ParseFastaQCommand::findGroup(fastqRead2 thisRead, int& barcode, int& primer, string groupMode) {
569         try {
570         string trashCode = "";
571         primer = 0;
572         
573         string group = groupMap->getGroup(thisRead.seq.getName());
574         if (group == "not found") {     trashCode += "g";   } //scrap for group
575                 
576         return trashCode.length();
577     }
578         catch(exception& e) {
579                 m->errorOut(e, "ParseFastaQCommand", "findGroup");
580                 exit(1);
581         }
582 }
583 //***************************************************************************************************************
584
585 bool ParseFastaQCommand::readOligos(string oligoFile){
586         try {
587         bool allBlank = false;
588         oligos.read(oligosfile);
589         
590         if (m->control_pressed) { return false; } //error in reading oligos
591         
592         if (oligos.hasPairedBarcodes()) {
593             pairedOligos = true;
594             numPrimers = oligos.getPairedPrimers().size();
595             numBarcodes = oligos.getPairedBarcodes().size();
596         }else {
597             pairedOligos = false;
598             numPrimers = oligos.getPrimers().size();
599             numBarcodes = oligos.getBarcodes().size();
600         }
601         
602         numLinkers = oligos.getLinkers().size();
603         numSpacers = oligos.getSpacers().size();
604         numRPrimers = oligos.getReversePrimers().size();
605         
606         vector<string> groupNames = oligos.getGroupNames();
607         if (groupNames.size() == 0) { allBlank = true;  }
608         
609         
610         fastqFileNames.resize(oligos.getBarcodeNames().size());
611                 for(int i=0;i<fastqFileNames.size();i++){
612             for(int j=0;j<oligos.getPrimerNames().size();j++){  fastqFileNames[i].push_back(""); }
613                 }
614         
615         set<string> uniqueNames; //used to cleanup outputFileNames
616         if (pairedOligos) {
617             map<int, oligosPair> barcodes = oligos.getPairedBarcodes();
618             map<int, oligosPair> primers = oligos.getPairedPrimers();
619             for(map<int, oligosPair>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
620                 for(map<int, oligosPair>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
621                     
622                     string primerName = oligos.getPrimerName(itPrimer->first);
623                     string barcodeName = oligos.getBarcodeName(itBar->first);
624                     
625                     if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing
626                     else if ((primerName == "") && (barcodeName == "")) { } //do nothing
627                     else {
628                         string comboGroupName = "";
629                         string fastaFileName = "";
630                         string qualFileName = "";
631                         string nameFileName = "";
632                         string countFileName = "";
633                         
634                         if(primerName == ""){
635                             comboGroupName = barcodeName;
636                         }else{
637                             if(barcodeName == ""){
638                                 comboGroupName = primerName;
639                             }
640                             else{
641                                 comboGroupName = barcodeName + "." + primerName;
642                             }
643                         }
644                         
645                         ofstream temp;
646                         map<string, string> variables;
647                         variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaQFile));
648                         variables["[group]"] = comboGroupName;
649                         string fastqFileName = getOutputFileName("fastq", variables);
650                         if (uniqueNames.count(fastqFileName) == 0) {
651                             outputNames.push_back(fastqFileName);
652                             outputTypes["fastq"].push_back(fastqFileName);
653                             uniqueNames.insert(fastqFileName);
654                         }
655                         
656                         fastqFileNames[itBar->first][itPrimer->first] = fastqFileName;
657                         m->openOutputFile(fastqFileName, temp);         temp.close();
658                     }
659                 }
660             }
661         }else {
662             map<string, int> barcodes = oligos.getBarcodes() ;
663             map<string, int> primers = oligos.getPrimers();
664             for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
665                 for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
666                     
667                     string primerName = oligos.getPrimerName(itPrimer->second);
668                     string barcodeName = oligos.getBarcodeName(itBar->second);
669                    
670                     if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing
671                     else if ((primerName == "") && (barcodeName == "")) { } //do nothing
672                     else {
673                         string comboGroupName = "";
674                         string fastaFileName = "";
675                         string qualFileName = "";
676                         string nameFileName = "";
677                         string countFileName = "";
678                         
679                         if(primerName == ""){
680                             comboGroupName = barcodeName;
681                         }else{
682                             if(barcodeName == ""){
683                                 comboGroupName = primerName;
684                             }
685                             else{
686                                 comboGroupName = barcodeName + "." + primerName;
687                             }
688                         }
689                         
690                         ofstream temp;
691                         map<string, string> variables;
692                         variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaQFile));
693                         variables["[group]"] = comboGroupName;
694                         string fastqFileName = getOutputFileName("fastq", variables);
695                         if (uniqueNames.count(fastqFileName) == 0) {
696                             outputNames.push_back(fastqFileName);
697                             outputTypes["fastq"].push_back(fastqFileName);
698                             uniqueNames.insert(fastqFileName);
699                         }
700                         
701                         fastqFileNames[itBar->second][itPrimer->second] = fastqFileName;
702                         m->openOutputFile(fastqFileName, temp);         temp.close();
703                     }
704                 }
705             }
706         }
707         
708         if (allBlank) {
709             m->mothurOut("[WARNING]: your oligos file does not contain any group names.  mothur will not create a groupfile."); m->mothurOutEndLine();
710             return false;
711         }
712        
713         ofstream temp;
714         map<string, string> variables;
715         variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaQFile));
716         variables["[group]"] = "scrap";
717         noMatchFile = getOutputFileName("fastq", variables);
718         m->openOutputFile(noMatchFile, temp);           temp.close();
719        
720                 return true;
721                 
722         }
723         catch(exception& e) {
724                 m->errorOut(e, "ParseFastaQCommand", "getOligos");
725                 exit(1);
726         }
727 }
728 //***************************************************************************************************************
729 bool ParseFastaQCommand::readGroup(string groupfile){
730         try {
731         fastqFileNames.clear();
732         
733         groupMap = new GroupMap();
734         groupMap->readMap(groupfile);
735         
736         //like barcodeNameVector - no primer names
737         vector<string> groups = groupMap->getNamesOfGroups();
738                 
739                 fastqFileNames.resize(groups.size());
740         for (int i = 0; i < fastqFileNames.size(); i++) {
741             for (int j = 0; j < 1; j++) {
742                 
743                 map<string, string> variables;
744                 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaQFile));
745                 variables["[group]"] = groups[i];
746                 string thisFilename = getOutputFileName("fastq",variables);
747                 outputNames.push_back(thisFilename);
748                 outputTypes["fastq"].push_back(thisFilename);
749                 
750                 ofstream temp;
751                 m->openOutputFileBinary(thisFilename, temp); temp.close();
752                 fastqFileNames[i].push_back(thisFilename);
753             }
754         }
755         
756         map<string, string> variables;
757         variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaQFile));
758         variables["[group]"] = "scrap";
759                 noMatchFile = getOutputFileName("fastq",variables);
760         m->mothurRemove(noMatchFile);
761                 
762                 return true;
763                 
764         }
765         catch(exception& e) {
766                 m->errorOut(e, "ParseFastaQCommand", "readGroup");
767                 exit(1);
768         }
769 }
770 //**********************************************************************************************************************
771
772
773