5 * Created by Pat Schloss on 6/6/09.
6 * Copyright 2009 Patrick D. Schloss. All rights reserved.
10 #include "trimseqscommand.h"
11 #include "needlemanoverlap.hpp"
12 #include "trimoligos.h"
14 //**********************************************************************************************************************
15 vector<string> TrimSeqsCommand::setParameters(){
17 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
18 CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(poligos);
19 CommandParameter pqfile("qfile", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pqfile);
20 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
21 CommandParameter pflip("flip", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pflip);
22 CommandParameter pmaxambig("maxambig", "Number", "", "-1", "", "", "",false,false); parameters.push_back(pmaxambig);
23 CommandParameter pmaxhomop("maxhomop", "Number", "", "0", "", "", "",false,false); parameters.push_back(pmaxhomop);
24 CommandParameter pminlength("minlength", "Number", "", "0", "", "", "",false,false); parameters.push_back(pminlength);
25 CommandParameter pmaxlength("maxlength", "Number", "", "0", "", "", "",false,false); parameters.push_back(pmaxlength);
26 CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ppdiffs);
27 CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pbdiffs);
28 CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pldiffs);
29 CommandParameter psdiffs("sdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(psdiffs);
30 CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ptdiffs);
31 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
32 CommandParameter pallfiles("allfiles", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pallfiles);
33 CommandParameter pkeepforward("keepforward", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pkeepforward);
34 CommandParameter pqtrim("qtrim", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pqtrim);
35 CommandParameter pqthreshold("qthreshold", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqthreshold);
36 CommandParameter pqaverage("qaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqaverage);
37 CommandParameter prollaverage("rollaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(prollaverage);
38 CommandParameter pqwindowaverage("qwindowaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqwindowaverage);
39 CommandParameter pqstepsize("qstepsize", "Number", "", "1", "", "", "",false,false); parameters.push_back(pqstepsize);
40 CommandParameter pqwindowsize("qwindowsize", "Number", "", "50", "", "", "",false,false); parameters.push_back(pqwindowsize);
41 CommandParameter pkeepfirst("keepfirst", "Number", "", "0", "", "", "",false,false); parameters.push_back(pkeepfirst);
42 CommandParameter premovelast("removelast", "Number", "", "0", "", "", "",false,false); parameters.push_back(premovelast);
43 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
44 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
46 vector<string> myArray;
47 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
51 m->errorOut(e, "TrimSeqsCommand", "setParameters");
55 //**********************************************************************************************************************
56 string TrimSeqsCommand::getHelpString(){
58 string helpString = "";
59 helpString += "The trim.seqs command reads a fastaFile and creates 2 new fasta files, .trim.fasta and scrap.fasta, as well as group files if you provide and oligos file.\n";
60 helpString += "The .trim.fasta contains sequences that meet your requirements, and the .scrap.fasta contains those which don't.\n";
61 helpString += "The trim.seqs command parameters are fasta, name, flip, oligos, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim, keepfirst, removelast and allfiles.\n";
62 helpString += "The fasta parameter is required.\n";
63 helpString += "The flip parameter will output the reverse compliment of your trimmed sequence. The default is false.\n";
64 helpString += "The oligos parameter allows you to provide an oligos file.\n";
65 helpString += "The name parameter allows you to provide a names file with your fasta file.\n";
66 helpString += "The maxambig parameter allows you to set the maximum number of ambigious bases allowed. The default is -1.\n";
67 helpString += "The maxhomop parameter allows you to set a maximum homopolymer length. \n";
68 helpString += "The minlength parameter allows you to set and minimum sequence length. \n";
69 helpString += "The maxlength parameter allows you to set and maximum sequence length. \n";
70 helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n";
71 helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";
72 helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
73 helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n";
74 helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n";
75 helpString += "The qfile parameter allows you to provide a quality file.\n";
76 helpString += "The qthreshold parameter allows you to set a minimum quality score allowed. \n";
77 helpString += "The qaverage parameter allows you to set a minimum average quality score allowed. \n";
78 helpString += "The qwindowsize parameter allows you to set a number of bases in a window. Default=50.\n";
79 helpString += "The qwindowaverage parameter allows you to set a minimum average quality score allowed over a window. \n";
80 helpString += "The rollaverage parameter allows you to set a minimum rolling average quality score allowed over a window. \n";
81 helpString += "The qstepsize parameter allows you to set a number of bases to move the window over. Default=1.\n";
82 helpString += "The allfiles parameter will create separate group and fasta file for each grouping. The default is F.\n";
83 helpString += "The keepforward parameter allows you to indicate whether you want the forward primer removed or not. The default is F, meaning remove the forward primer.\n";
84 helpString += "The qtrim parameter will trim sequence from the point that they fall below the qthreshold and put it in the .trim file if set to true. The default is T.\n";
85 helpString += "The keepfirst parameter trims the sequence to the first keepfirst number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements. \n";
86 helpString += "The removelast removes the last removelast number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements.\n";
87 helpString += "The trim.seqs command should be in the following format: \n";
88 helpString += "trim.seqs(fasta=yourFastaFile, flip=yourFlip, oligos=yourOligos, maxambig=yourMaxambig, \n";
89 helpString += "maxhomop=yourMaxhomop, minlength=youMinlength, maxlength=yourMaxlength) \n";
90 helpString += "Example trim.seqs(fasta=abrecovery.fasta, flip=..., oligos=..., maxambig=..., maxhomop=..., minlength=..., maxlength=...).\n";
91 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
92 helpString += "For more details please check out the wiki http://www.mothur.org/wiki/Trim.seqs .\n";
96 m->errorOut(e, "TrimSeqsCommand", "getHelpString");
102 //**********************************************************************************************************************
104 TrimSeqsCommand::TrimSeqsCommand(){
106 abort = true; calledHelp = true;
108 vector<string> tempOutNames;
109 outputTypes["fasta"] = tempOutNames;
110 outputTypes["qfile"] = tempOutNames;
111 outputTypes["group"] = tempOutNames;
112 outputTypes["name"] = tempOutNames;
114 catch(exception& e) {
115 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
119 //***************************************************************************************************************
121 TrimSeqsCommand::TrimSeqsCommand(string option) {
124 abort = false; calledHelp = false;
127 //allow user to run help
128 if(option == "help") { help(); abort = true; calledHelp = true; }
129 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
132 vector<string> myArray = setParameters();
134 OptionParser parser(option);
135 map<string,string> parameters = parser.getParameters();
137 ValidParameters validParameter;
138 map<string,string>::iterator it;
140 //check to make sure all parameters are valid for command
141 for (it = parameters.begin(); it != parameters.end(); it++) {
142 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
145 //initialize outputTypes
146 vector<string> tempOutNames;
147 outputTypes["fasta"] = tempOutNames;
148 outputTypes["qfile"] = tempOutNames;
149 outputTypes["group"] = tempOutNames;
150 outputTypes["name"] = tempOutNames;
152 //if the user changes the input directory command factory will send this info to us in the output parameter
153 string inputDir = validParameter.validFile(parameters, "inputdir", false);
154 if (inputDir == "not found"){ inputDir = ""; }
157 it = parameters.find("fasta");
158 //user has given a template file
159 if(it != parameters.end()){
160 path = m->hasPath(it->second);
161 //if the user has not given a path then, add inputdir. else leave path alone.
162 if (path == "") { parameters["fasta"] = inputDir + it->second; }
165 it = parameters.find("oligos");
166 //user has given a template file
167 if(it != parameters.end()){
168 path = m->hasPath(it->second);
169 //if the user has not given a path then, add inputdir. else leave path alone.
170 if (path == "") { parameters["oligos"] = inputDir + it->second; }
173 it = parameters.find("qfile");
174 //user has given a template file
175 if(it != parameters.end()){
176 path = m->hasPath(it->second);
177 //if the user has not given a path then, add inputdir. else leave path alone.
178 if (path == "") { parameters["qfile"] = inputDir + it->second; }
181 it = parameters.find("name");
182 //user has given a template file
183 if(it != parameters.end()){
184 path = m->hasPath(it->second);
185 //if the user has not given a path then, add inputdir. else leave path alone.
186 if (path == "") { parameters["name"] = inputDir + it->second; }
192 //check for required parameters
193 fastaFile = validParameter.validFile(parameters, "fasta", true);
194 if (fastaFile == "not found") {
195 fastaFile = m->getFastaFile();
196 if (fastaFile != "") { m->mothurOut("Using " + fastaFile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
197 else { m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
198 }else if (fastaFile == "not open") { abort = true; }
199 else { m->setFastaFile(fastaFile); }
201 //if the user changes the output directory command factory will send this info to us in the output parameter
202 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
204 outputDir += m->hasPath(fastaFile); //if user entered a file with a path then preserve it
208 //check for optional parameter and set defaults
209 // ...at some point should added some additional type checking...
211 temp = validParameter.validFile(parameters, "flip", false);
212 if (temp == "not found") { flip = 0; }
213 else { flip = m->isTrue(temp); }
215 temp = validParameter.validFile(parameters, "oligos", true);
216 if (temp == "not found"){ oligoFile = ""; }
217 else if(temp == "not open"){ abort = true; }
218 else { oligoFile = temp; m->setOligosFile(oligoFile); }
221 temp = validParameter.validFile(parameters, "maxambig", false); if (temp == "not found") { temp = "-1"; }
222 m->mothurConvert(temp, maxAmbig);
224 temp = validParameter.validFile(parameters, "maxhomop", false); if (temp == "not found") { temp = "0"; }
225 m->mothurConvert(temp, maxHomoP);
227 temp = validParameter.validFile(parameters, "minlength", false); if (temp == "not found") { temp = "0"; }
228 m->mothurConvert(temp, minLength);
230 temp = validParameter.validFile(parameters, "maxlength", false); if (temp == "not found") { temp = "0"; }
231 m->mothurConvert(temp, maxLength);
233 temp = validParameter.validFile(parameters, "bdiffs", false); if (temp == "not found") { temp = "0"; }
234 m->mothurConvert(temp, bdiffs);
236 temp = validParameter.validFile(parameters, "pdiffs", false); if (temp == "not found") { temp = "0"; }
237 m->mothurConvert(temp, pdiffs);
239 temp = validParameter.validFile(parameters, "ldiffs", false); if (temp == "not found") { temp = "0"; }
240 m->mothurConvert(temp, ldiffs);
242 temp = validParameter.validFile(parameters, "sdiffs", false); if (temp == "not found") { temp = "0"; }
243 m->mothurConvert(temp, sdiffs);
245 temp = validParameter.validFile(parameters, "tdiffs", false); if (temp == "not found") { int tempTotal = pdiffs + bdiffs + ldiffs + sdiffs; temp = toString(tempTotal); }
246 m->mothurConvert(temp, tdiffs);
248 if(tdiffs == 0){ tdiffs = bdiffs + pdiffs + ldiffs + sdiffs; }
250 temp = validParameter.validFile(parameters, "qfile", true);
251 if (temp == "not found") { qFileName = ""; }
252 else if(temp == "not open") { abort = true; }
253 else { qFileName = temp; m->setQualFile(qFileName); }
255 temp = validParameter.validFile(parameters, "name", true);
256 if (temp == "not found") { nameFile = ""; }
257 else if(temp == "not open") { nameFile = ""; abort = true; }
258 else { nameFile = temp; m->setNameFile(nameFile); }
260 temp = validParameter.validFile(parameters, "qthreshold", false); if (temp == "not found") { temp = "0"; }
261 m->mothurConvert(temp, qThreshold);
263 temp = validParameter.validFile(parameters, "qtrim", false); if (temp == "not found") { temp = "t"; }
264 qtrim = m->isTrue(temp);
266 temp = validParameter.validFile(parameters, "rollaverage", false); if (temp == "not found") { temp = "0"; }
267 convert(temp, qRollAverage);
269 temp = validParameter.validFile(parameters, "qwindowaverage", false);if (temp == "not found") { temp = "0"; }
270 convert(temp, qWindowAverage);
272 temp = validParameter.validFile(parameters, "qwindowsize", false); if (temp == "not found") { temp = "50"; }
273 convert(temp, qWindowSize);
275 temp = validParameter.validFile(parameters, "qstepsize", false); if (temp == "not found") { temp = "1"; }
276 convert(temp, qWindowStep);
278 temp = validParameter.validFile(parameters, "qaverage", false); if (temp == "not found") { temp = "0"; }
279 convert(temp, qAverage);
281 temp = validParameter.validFile(parameters, "keepfirst", false); if (temp == "not found") { temp = "0"; }
282 convert(temp, keepFirst);
284 temp = validParameter.validFile(parameters, "removelast", false); if (temp == "not found") { temp = "0"; }
285 convert(temp, removeLast);
287 temp = validParameter.validFile(parameters, "allfiles", false); if (temp == "not found") { temp = "F"; }
288 allFiles = m->isTrue(temp);
290 temp = validParameter.validFile(parameters, "keepforward", false); if (temp == "not found") { temp = "F"; }
291 keepforward = m->isTrue(temp);
293 temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
294 m->setProcessors(temp);
295 m->mothurConvert(temp, processors);
298 if(allFiles && (oligoFile == "")){
299 m->mothurOut("You selected allfiles, but didn't enter an oligos. Ignoring the allfiles request."); m->mothurOutEndLine();
301 if((qAverage != 0 && qThreshold != 0) && qFileName == ""){
302 m->mothurOut("You didn't provide a quality file name, quality criteria will be ignored."); m->mothurOutEndLine();
306 if(!flip && oligoFile=="" && !maxLength && !minLength && (maxAmbig==-1) && !maxHomoP && qFileName == ""){
307 m->mothurOut("You didn't set any options... quiting command."); m->mothurOutEndLine();
311 if (nameFile == "") {
312 vector<string> files; files.push_back(fastaFile);
313 parser.getNameFile(files);
318 catch(exception& e) {
319 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
323 //***************************************************************************************************************
325 int TrimSeqsCommand::execute(){
328 if (abort == true) { if (calledHelp) { return 0; } return 2; }
330 numFPrimers = 0; //this needs to be initialized
333 vector<vector<string> > fastaFileNames;
334 vector<vector<string> > qualFileNames;
335 vector<vector<string> > nameFileNames;
337 string trimSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.fasta";
338 outputNames.push_back(trimSeqFile); outputTypes["fasta"].push_back(trimSeqFile);
340 string scrapSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.fasta";
341 outputNames.push_back(scrapSeqFile); outputTypes["fasta"].push_back(scrapSeqFile);
343 string trimQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.qual";
344 string scrapQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.qual";
346 if (qFileName != "") {
347 outputNames.push_back(trimQualFile);
348 outputNames.push_back(scrapQualFile);
349 outputTypes["qfile"].push_back(trimQualFile);
350 outputTypes["qfile"].push_back(scrapQualFile);
353 string trimNameFile = outputDir + m->getRootName(m->getSimpleName(nameFile)) + "trim.names";
354 string scrapNameFile = outputDir + m->getRootName(m->getSimpleName(nameFile)) + "scrap.names";
356 if (nameFile != "") {
357 m->readNames(nameFile, nameMap);
358 outputNames.push_back(trimNameFile);
359 outputNames.push_back(scrapNameFile);
360 outputTypes["name"].push_back(trimNameFile);
361 outputTypes["name"].push_back(scrapNameFile);
364 if (m->control_pressed) { return 0; }
366 string outputGroupFileName;
368 createGroup = getOligos(fastaFileNames, qualFileNames, nameFileNames);
370 outputGroupFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "groups";
371 outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName);
375 //fills lines and qlines
376 setLines(fastaFile, qFileName);
378 //#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
380 driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
382 createProcessesCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames);
385 // driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
388 if (m->control_pressed) { return 0; }
391 map<string, string> uniqueFastaNames;// so we don't add the same groupfile multiple times
392 map<string, string>::iterator it;
393 set<string> namesToRemove;
394 for(int i=0;i<fastaFileNames.size();i++){
395 for(int j=0;j<fastaFileNames[0].size();j++){
396 if (fastaFileNames[i][j] != "") {
397 if (namesToRemove.count(fastaFileNames[i][j]) == 0) {
398 if(m->isBlank(fastaFileNames[i][j])){
399 m->mothurRemove(fastaFileNames[i][j]);
400 namesToRemove.insert(fastaFileNames[i][j]);
403 m->mothurRemove(qualFileNames[i][j]);
404 namesToRemove.insert(qualFileNames[i][j]);
408 m->mothurRemove(nameFileNames[i][j]);
409 namesToRemove.insert(nameFileNames[i][j]);
412 it = uniqueFastaNames.find(fastaFileNames[i][j]);
413 if (it == uniqueFastaNames.end()) {
414 uniqueFastaNames[fastaFileNames[i][j]] = barcodeNameVector[i];
422 //remove names for outputFileNames, just cleans up the output
423 vector<string> outputNames2;
424 for(int i = 0; i < outputNames.size(); i++) { if (namesToRemove.count(outputNames[i]) == 0) { outputNames2.push_back(outputNames[i]); } }
425 outputNames = outputNames2;
427 for (it = uniqueFastaNames.begin(); it != uniqueFastaNames.end(); it++) {
429 m->openInputFile(it->first, in);
432 string thisGroupName = outputDir + m->getRootName(m->getSimpleName(it->first)) + "groups";
433 outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName);
434 m->openOutputFile(thisGroupName, out);
437 if (m->control_pressed) { break; }
439 Sequence currSeq(in); m->gobble(in);
440 out << currSeq.getName() << '\t' << it->second << endl;
447 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
449 //output group counts
450 m->mothurOutEndLine();
452 if (groupCounts.size() != 0) { m->mothurOut("Group count: \n"); }
453 for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
454 total += it->second; m->mothurOut(it->first + "\t" + toString(it->second)); m->mothurOutEndLine();
456 if (total != 0) { m->mothurOut("Total of all groups is " + toString(total)); m->mothurOutEndLine(); }
458 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
460 //set fasta file as new current fastafile
462 itTypes = outputTypes.find("fasta");
463 if (itTypes != outputTypes.end()) {
464 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
467 itTypes = outputTypes.find("name");
468 if (itTypes != outputTypes.end()) {
469 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
472 itTypes = outputTypes.find("qfile");
473 if (itTypes != outputTypes.end()) {
474 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); }
477 itTypes = outputTypes.find("group");
478 if (itTypes != outputTypes.end()) {
479 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
482 m->mothurOutEndLine();
483 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
484 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
485 m->mothurOutEndLine();
490 catch(exception& e) {
491 m->errorOut(e, "TrimSeqsCommand", "execute");
496 /**************************************************************************************/
498 int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string trimFileName, string scrapFileName, string trimQFileName, string scrapQFileName, string trimNFileName, string scrapNFileName, string groupFileName, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames, linePair line, linePair qline) {
502 ofstream trimFASTAFile;
503 m->openOutputFile(trimFileName, trimFASTAFile);
505 ofstream scrapFASTAFile;
506 m->openOutputFile(scrapFileName, scrapFASTAFile);
508 ofstream trimQualFile;
509 ofstream scrapQualFile;
511 m->openOutputFile(trimQFileName, trimQualFile);
512 m->openOutputFile(scrapQFileName, scrapQualFile);
515 ofstream trimNameFile;
516 ofstream scrapNameFile;
518 m->openOutputFile(trimNFileName, trimNameFile);
519 m->openOutputFile(scrapNFileName, scrapNameFile);
523 ofstream outGroupsFile;
524 if (createGroup){ m->openOutputFile(groupFileName, outGroupsFile); }
526 for (int i = 0; i < fastaFileNames.size(); i++) { //clears old file
527 for (int j = 0; j < fastaFileNames[i].size(); j++) { //clears old file
528 if (fastaFileNames[i][j] != "") {
530 m->openOutputFile(fastaFileNames[i][j], temp); temp.close();
532 m->openOutputFile(qualFileNames[i][j], temp); temp.close();
536 m->openOutputFile(nameFileNames[i][j], temp); temp.close();
544 m->openInputFile(filename, inFASTA);
545 inFASTA.seekg(line.start);
548 if(qFileName != "") {
549 m->openInputFile(qFileName, qFile);
550 qFile.seekg(qline.start);
555 TrimOligos trimOligos(pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, revPrimer, linker, spacer);
559 if (m->control_pressed) {
560 inFASTA.close(); trimFASTAFile.close(); scrapFASTAFile.close();
561 if (createGroup) { outGroupsFile.close(); }
566 for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); }
572 string trashCode = "";
573 int currentSeqsDiffs = 0;
575 Sequence currSeq(inFASTA); m->gobble(inFASTA);
576 //cout << currSeq.getName() << '\t' << currSeq.getUnaligned().length() << endl;
577 QualityScores currQual;
579 currQual = QualityScores(qFile); m->gobble(qFile);
582 string origSeq = currSeq.getUnaligned();
585 int barcodeIndex = 0;
589 success = trimOligos.stripLinker(currSeq, currQual);
590 if(success > ldiffs) { trashCode += 'k'; }
591 else{ currentSeqsDiffs += success; }
595 if(barcodes.size() != 0){
596 success = trimOligos.stripBarcode(currSeq, currQual, barcodeIndex);
597 if(success > bdiffs) { trashCode += 'b'; }
598 else{ currentSeqsDiffs += success; }
602 success = trimOligos.stripSpacer(currSeq, currQual);
603 if(success > sdiffs) { trashCode += 's'; }
604 else{ currentSeqsDiffs += success; }
608 if(numFPrimers != 0){
609 success = trimOligos.stripForward(currSeq, currQual, primerIndex, keepforward);
610 if(success > pdiffs) { trashCode += 'f'; }
611 else{ currentSeqsDiffs += success; }
614 if (currentSeqsDiffs > tdiffs) { trashCode += 't'; }
616 if(numRPrimers != 0){
617 success = trimOligos.stripReverse(currSeq, currQual);
618 if(!success) { trashCode += 'r'; }
622 success = keepFirstTrim(currSeq, currQual);
626 success = removeLastTrim(currSeq, currQual);
627 if(!success) { trashCode += 'l'; }
632 int origLength = currSeq.getNumBases();
634 if(qThreshold != 0) { success = currQual.stripQualThreshold(currSeq, qThreshold); }
635 else if(qAverage != 0) { success = currQual.cullQualAverage(currSeq, qAverage); }
636 else if(qRollAverage != 0) { success = currQual.stripQualRollingAverage(currSeq, qRollAverage); }
637 else if(qWindowAverage != 0){ success = currQual.stripQualWindowAverage(currSeq, qWindowStep, qWindowSize, qWindowAverage); }
638 else { success = 1; }
640 //you don't want to trim, if it fails above then scrap it
641 if ((!qtrim) && (origLength != currSeq.getNumBases())) { success = 0; }
643 if(!success) { trashCode += 'q'; }
646 if(minLength > 0 || maxLength > 0){
647 success = cullLength(currSeq);
648 if(!success) { trashCode += 'l'; }
651 success = cullHomoP(currSeq);
652 if(!success) { trashCode += 'h'; }
655 success = cullAmbigs(currSeq);
656 if(!success) { trashCode += 'n'; }
659 if(flip){ // should go last
660 currSeq.reverseComplement();
662 currQual.flipQScores();
666 if(trashCode.length() == 0){
667 currSeq.setAligned(currSeq.getUnaligned());
668 currSeq.printSequence(trimFASTAFile);
671 currQual.printQScores(trimQualFile);
675 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
676 if (itName != nameMap.end()) { trimNameFile << itName->first << '\t' << itName->second << endl; }
677 else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
681 if(barcodes.size() != 0){
682 string thisGroup = barcodeNameVector[barcodeIndex];
683 if (primers.size() != 0) {
684 if (primerNameVector[primerIndex] != "") {
685 if(thisGroup != "") {
686 thisGroup += "." + primerNameVector[primerIndex];
688 thisGroup = primerNameVector[primerIndex];
693 outGroupsFile << currSeq.getName() << '\t' << thisGroup << endl;
695 if (nameFile != "") {
696 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
697 if (itName != nameMap.end()) {
698 vector<string> thisSeqsNames;
699 m->splitAtChar(itName->second, thisSeqsNames, ',');
700 for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self
701 outGroupsFile << thisSeqsNames[k] << '\t' << thisGroup << endl;
703 }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
706 map<string, int>::iterator it = groupCounts.find(thisGroup);
707 if (it == groupCounts.end()) { groupCounts[thisGroup] = 1; }
708 else { groupCounts[it->first]++; }
715 m->openOutputFileAppend(fastaFileNames[barcodeIndex][primerIndex], output);
716 currSeq.printSequence(output);
720 m->openOutputFileAppend(qualFileNames[barcodeIndex][primerIndex], output);
721 currQual.printQScores(output);
726 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
727 if (itName != nameMap.end()) {
728 m->openOutputFileAppend(nameFileNames[barcodeIndex][primerIndex], output);
729 output << itName->first << '\t' << itName->second << endl;
731 }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
736 if(nameFile != ""){ //needs to be before the currSeq name is changed
737 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
738 if (itName != nameMap.end()) { scrapNameFile << itName->first << '\t' << itName->second << endl; }
739 else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
741 currSeq.setName(currSeq.getName() + '|' + trashCode);
742 currSeq.setUnaligned(origSeq);
743 currSeq.setAligned(origSeq);
744 currSeq.printSequence(scrapFASTAFile);
746 currQual.printQScores(scrapQualFile);
752 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
753 unsigned long long pos = inFASTA.tellg();
754 if ((pos == -1) || (pos >= line.end)) { break; }
757 if (inFASTA.eof()) { break; }
761 if((count) % 1000 == 0){ m->mothurOut(toString(count)); m->mothurOutEndLine(); }
765 if((count) % 1000 != 0){ m->mothurOut(toString(count)); m->mothurOutEndLine(); }
769 trimFASTAFile.close();
770 scrapFASTAFile.close();
771 if (createGroup) { outGroupsFile.close(); }
772 if(qFileName != "") { qFile.close(); scrapQualFile.close(); trimQualFile.close(); }
773 if(nameFile != "") { scrapNameFile.close(); trimNameFile.close(); }
777 catch(exception& e) {
778 m->errorOut(e, "TrimSeqsCommand", "driverCreateTrim");
783 /**************************************************************************************************/
785 int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName, string trimFASTAFileName, string scrapFASTAFileName, string trimQualFileName, string scrapQualFileName, string trimNameFileName, string scrapNameFileName, string groupFile, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames) {
792 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
793 //loop through and create all the processes you want
794 while (process != processors) {
798 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
802 vector<vector<string> > tempFASTAFileNames = fastaFileNames;
803 vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
804 vector<vector<string> > tempNameFileNames = nameFileNames;
809 for(int i=0;i<tempFASTAFileNames.size();i++){
810 for(int j=0;j<tempFASTAFileNames[i].size();j++){
811 if (tempFASTAFileNames[i][j] != "") {
812 tempFASTAFileNames[i][j] += toString(getpid()) + ".temp";
813 m->openOutputFile(tempFASTAFileNames[i][j], temp); temp.close();
816 tempPrimerQualFileNames[i][j] += toString(getpid()) + ".temp";
817 m->openOutputFile(tempPrimerQualFileNames[i][j], temp); temp.close();
820 tempNameFileNames[i][j] += toString(getpid()) + ".temp";
821 m->openOutputFile(tempNameFileNames[i][j], temp); temp.close();
828 driverCreateTrim(filename,
830 (trimFASTAFileName + toString(getpid()) + ".temp"),
831 (scrapFASTAFileName + toString(getpid()) + ".temp"),
832 (trimQualFileName + toString(getpid()) + ".temp"),
833 (scrapQualFileName + toString(getpid()) + ".temp"),
834 (trimNameFileName + toString(getpid()) + ".temp"),
835 (scrapNameFileName + toString(getpid()) + ".temp"),
836 (groupFile + toString(getpid()) + ".temp"),
838 tempPrimerQualFileNames,
843 //pass groupCounts to parent
846 string tempFile = filename + toString(getpid()) + ".num.temp";
847 m->openOutputFile(tempFile, out);
849 out << groupCounts.size() << endl;
851 for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
852 out << it->first << '\t' << it->second << endl;
858 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
859 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
866 m->openOutputFile(trimFASTAFileName, temp); temp.close();
867 m->openOutputFile(scrapFASTAFileName, temp); temp.close();
869 m->openOutputFile(trimQualFileName, temp); temp.close();
870 m->openOutputFile(scrapQualFileName, temp); temp.close();
872 if (nameFile != "") {
873 m->openOutputFile(trimNameFileName, temp); temp.close();
874 m->openOutputFile(scrapNameFileName, temp); temp.close();
877 driverCreateTrim(filename, qFileName, trimFASTAFileName, scrapFASTAFileName, trimQualFileName, scrapQualFileName, trimNameFileName, scrapNameFileName, groupFile, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
879 //force parent to wait until all the processes are done
880 for (int i=0;i<processIDS.size();i++) {
881 int temp = processIDS[i];
885 //////////////////////////////////////////////////////////////////////////////////////////////////////
886 //Windows version shared memory, so be careful when passing variables through the trimData struct.
887 //Above fork() will clone, so memory is separate, but that's not the case with windows,
888 //////////////////////////////////////////////////////////////////////////////////////////////////////
890 vector<trimData*> pDataArray;
891 DWORD dwThreadIdArray[processors-1];
892 HANDLE hThreadArray[processors-1];
894 //Create processor worker threads.
895 for( int i=0; i<processors-1; i++){
897 string extension = "";
898 if (i != 0) { extension = toString(i) + ".temp"; processIDS.push_back(i); }
899 vector<vector<string> > tempFASTAFileNames = fastaFileNames;
900 vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
901 vector<vector<string> > tempNameFileNames = nameFileNames;
906 for(int i=0;i<tempFASTAFileNames.size();i++){
907 for(int j=0;j<tempFASTAFileNames[i].size();j++){
908 if (tempFASTAFileNames[i][j] != "") {
909 tempFASTAFileNames[i][j] += extension;
910 m->openOutputFile(tempFASTAFileNames[i][j], temp); temp.close();
913 tempPrimerQualFileNames[i][j] += extension;
914 m->openOutputFile(tempPrimerQualFileNames[i][j], temp); temp.close();
917 tempNameFileNames[i][j] += extension;
918 m->openOutputFile(tempNameFileNames[i][j], temp); temp.close();
926 trimData* tempTrim = new trimData(filename,
928 (trimFASTAFileName+extension),
929 (scrapFASTAFileName+extension),
930 (trimQualFileName+extension),
931 (scrapQualFileName+extension),
932 (trimNameFileName+extension),
933 (scrapNameFileName+extension),
934 (groupFile+extension),
936 tempPrimerQualFileNames,
938 lines[i].start, lines[i].end, qLines[i].start, qLines[i].end, m,
939 pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, primers, barcodes, revPrimer, linker, spacer,
940 primerNameVector, barcodeNameVector, createGroup, allFiles, keepforward, keepFirst, removeLast,
941 qWindowStep, qWindowSize, qWindowAverage, qtrim, qThreshold, qAverage, qRollAverage,
942 minLength, maxAmbig, maxHomoP, maxLength, flip, nameMap);
943 pDataArray.push_back(tempTrim);
945 hThreadArray[i] = CreateThread(NULL, 0, MyTrimThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);
950 m->openOutputFile(trimFASTAFileName, temp); temp.close();
951 m->openOutputFile(scrapFASTAFileName, temp); temp.close();
953 m->openOutputFile(trimQualFileName, temp); temp.close();
954 m->openOutputFile(scrapQualFileName, temp); temp.close();
956 if (nameFile != "") {
957 m->openOutputFile(trimNameFileName, temp); temp.close();
958 m->openOutputFile(scrapNameFileName, temp); temp.close();
961 driverCreateTrim(filename, qFileName, (trimFASTAFileName + toString(processors-1) + ".temp"), (scrapFASTAFileName + toString(processors-1) + ".temp"), (trimQualFileName + toString(processors-1) + ".temp"), (scrapQualFileName + toString(processors-1) + ".temp"), (trimNameFileName + toString(processors-1) + ".temp"), (scrapNameFileName + toString(processors-1) + ".temp"), (groupFile + toString(processors-1) + ".temp"), fastaFileNames, qualFileNames, nameFileNames, lines[processors-1], qLines[processors-1]);
962 processIDS.push_back(processors-1);
965 //Wait until all threads have terminated.
966 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
968 //Close all thread handles and free memory allocations.
969 for(int i=0; i < pDataArray.size(); i++){
970 for (map<string, int>::iterator it = pDataArray[i]->groupCounts.begin(); it != pDataArray[i]->groupCounts.end(); it++) {
971 map<string, int>::iterator it2 = groupCounts.find(it->first);
972 if (it2 == groupCounts.end()) { groupCounts[it->first] = it->second; }
973 else { groupCounts[it->first] += it->second; }
975 CloseHandle(hThreadArray[i]);
976 delete pDataArray[i];
983 for(int i=0;i<processIDS.size();i++){
985 m->mothurOut("Appending files from process " + toString(processIDS[i])); m->mothurOutEndLine();
987 m->appendFiles((trimFASTAFileName + toString(processIDS[i]) + ".temp"), trimFASTAFileName);
988 m->mothurRemove((trimFASTAFileName + toString(processIDS[i]) + ".temp"));
989 m->appendFiles((scrapFASTAFileName + toString(processIDS[i]) + ".temp"), scrapFASTAFileName);
990 m->mothurRemove((scrapFASTAFileName + toString(processIDS[i]) + ".temp"));
993 m->appendFiles((trimQualFileName + toString(processIDS[i]) + ".temp"), trimQualFileName);
994 m->mothurRemove((trimQualFileName + toString(processIDS[i]) + ".temp"));
995 m->appendFiles((scrapQualFileName + toString(processIDS[i]) + ".temp"), scrapQualFileName);
996 m->mothurRemove((scrapQualFileName + toString(processIDS[i]) + ".temp"));
1000 m->appendFiles((trimNameFileName + toString(processIDS[i]) + ".temp"), trimNameFileName);
1001 m->mothurRemove((trimNameFileName + toString(processIDS[i]) + ".temp"));
1002 m->appendFiles((scrapNameFileName + toString(processIDS[i]) + ".temp"), scrapNameFileName);
1003 m->mothurRemove((scrapNameFileName + toString(processIDS[i]) + ".temp"));
1007 m->appendFiles((groupFile + toString(processIDS[i]) + ".temp"), groupFile);
1008 m->mothurRemove((groupFile + toString(processIDS[i]) + ".temp"));
1013 for(int j=0;j<fastaFileNames.size();j++){
1014 for(int k=0;k<fastaFileNames[j].size();k++){
1015 if (fastaFileNames[j][k] != "") {
1016 m->appendFiles((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"), fastaFileNames[j][k]);
1017 m->mothurRemove((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"));
1019 if(qFileName != ""){
1020 m->appendFiles((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"), qualFileNames[j][k]);
1021 m->mothurRemove((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"));
1025 m->appendFiles((nameFileNames[j][k] + toString(processIDS[i]) + ".temp"), nameFileNames[j][k]);
1026 m->mothurRemove((nameFileNames[j][k] + toString(processIDS[i]) + ".temp"));
1033 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
1036 string tempFile = filename + toString(processIDS[i]) + ".num.temp";
1037 m->openInputFile(tempFile, in);
1041 in >> tempNum; m->gobble(in);
1045 in >> group >> tempNum; m->gobble(in);
1047 map<string, int>::iterator it = groupCounts.find(group);
1048 if (it == groupCounts.end()) { groupCounts[group] = tempNum; }
1049 else { groupCounts[it->first] += tempNum; }
1052 in.close(); m->mothurRemove(tempFile);
1059 catch(exception& e) {
1060 m->errorOut(e, "TrimSeqsCommand", "createProcessesCreateTrim");
1065 /**************************************************************************************************/
1067 int TrimSeqsCommand::setLines(string filename, string qfilename) {
1070 vector<unsigned long long> fastaFilePos;
1071 vector<unsigned long long> qfileFilePos;
1073 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
1074 //set file positions for fasta file
1075 fastaFilePos = m->divideFile(filename, processors);
1077 if (qfilename == "") { return processors; }
1079 //get name of first sequence in each chunk
1080 map<string, int> firstSeqNames;
1081 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
1083 m->openInputFile(filename, in);
1084 in.seekg(fastaFilePos[i]);
1087 firstSeqNames[temp.getName()] = i;
1092 //seach for filePos of each first name in the qfile and save in qfileFilePos
1094 m->openInputFile(qfilename, inQual);
1097 while(!inQual.eof()){
1098 input = m->getline(inQual);
1100 if (input.length() != 0) {
1101 if(input[0] == '>'){ //this is a sequence name line
1102 istringstream nameStream(input);
1104 string sname = ""; nameStream >> sname;
1105 sname = sname.substr(1);
1107 map<string, int>::iterator it = firstSeqNames.find(sname);
1109 if(it != firstSeqNames.end()) { //this is the start of a new chunk
1110 unsigned long long pos = inQual.tellg();
1111 qfileFilePos.push_back(pos - input.length() - 1);
1112 firstSeqNames.erase(it);
1117 if (firstSeqNames.size() == 0) { break; }
1122 if (firstSeqNames.size() != 0) {
1123 for (map<string, int>::iterator it = firstSeqNames.begin(); it != firstSeqNames.end(); it++) {
1124 m->mothurOut(it->first + " is in your fasta file and not in your quality file, not using quality file."); m->mothurOutEndLine();
1130 //get last file position of qfile
1132 unsigned long long size;
1134 //get num bytes in file
1135 pFile = fopen (qfilename.c_str(),"rb");
1136 if (pFile==NULL) perror ("Error opening file");
1138 fseek (pFile, 0, SEEK_END);
1143 qfileFilePos.push_back(size);
1145 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
1146 lines.push_back(linePair(fastaFilePos[i], fastaFilePos[(i+1)]));
1147 if (qfilename != "") { qLines.push_back(linePair(qfileFilePos[i], qfileFilePos[(i+1)])); }
1149 if(qfilename == "") { qLines = lines; } //files with duds
1155 if (processors == 1) { //save time
1156 //fastaFilePos.push_back(0); qfileFilePos.push_back(0);
1157 //fastaFilePos.push_back(1000); qfileFilePos.push_back(1000);
1158 lines.push_back(linePair(0, 1000));
1159 if (qfilename != "") { qLines.push_back(linePair(0, 1000)); }
1161 int numFastaSeqs = 0;
1162 fastaFilePos = m->setFilePosFasta(filename, numFastaSeqs);
1164 if (qfilename != "") {
1165 int numQualSeqs = 0;
1166 qfileFilePos = m->setFilePosFasta(qfilename, numQualSeqs);
1168 if (numFastaSeqs != numQualSeqs) {
1169 m->mothurOut("[ERROR]: You have " + toString(numFastaSeqs) + " sequences in your fasta file, but " + toString(numQualSeqs) + " sequences in your quality file."); m->mothurOutEndLine(); m->control_pressed = true;
1173 //figure out how many sequences you have to process
1174 int numSeqsPerProcessor = numFastaSeqs / processors;
1175 for (int i = 0; i < processors; i++) {
1176 int startIndex = i * numSeqsPerProcessor;
1177 if(i == (processors - 1)){ numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor; }
1178 lines.push_back(linePair(fastaFilePos[startIndex], numSeqsPerProcessor));
1179 cout << fastaFilePos[startIndex] << '\t' << numSeqsPerProcessor << endl;
1180 if (qfilename != "") { qLines.push_back(linePair(qfileFilePos[startIndex], numSeqsPerProcessor)); }
1183 if(qfilename == "") { qLines = lines; } //files with duds
1189 catch(exception& e) {
1190 m->errorOut(e, "TrimSeqsCommand", "setLines");
1195 //***************************************************************************************************************
1197 bool TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<vector<string> >& qualFileNames, vector<vector<string> >& nameFileNames){
1200 m->openInputFile(oligoFile, inOligos);
1204 string type, oligo, group;
1206 int indexPrimer = 0;
1207 int indexBarcode = 0;
1209 while(!inOligos.eof()){
1214 while (!inOligos.eof()) { char c = inOligos.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there
1215 m->gobble(inOligos);
1218 m->gobble(inOligos);
1219 //make type case insensitive
1220 for(int i=0;i<type.length();i++){ type[i] = toupper(type[i]); }
1224 for(int i=0;i<oligo.length();i++){
1225 oligo[i] = toupper(oligo[i]);
1226 if(oligo[i] == 'U') { oligo[i] = 'T'; }
1229 if(type == "FORWARD"){
1232 // get rest of line in case there is a primer name
1233 while (!inOligos.eof()) {
1234 char c = inOligos.get();
1235 if (c == 10 || c == 13){ break; }
1236 else if (c == 32 || c == 9){;} //space or tab
1237 else { group += c; }
1240 //check for repeat barcodes
1241 map<string, int>::iterator itPrime = primers.find(oligo);
1242 if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); }
1244 primers[oligo]=indexPrimer; indexPrimer++;
1245 primerNameVector.push_back(group);
1247 else if(type == "REVERSE"){
1248 Sequence oligoRC("reverse", oligo);
1249 oligoRC.reverseComplement();
1250 revPrimer.push_back(oligoRC.getUnaligned());
1252 else if(type == "BARCODE"){
1255 //check for repeat barcodes
1256 map<string, int>::iterator itBar = barcodes.find(oligo);
1257 if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); }
1259 barcodes[oligo]=indexBarcode; indexBarcode++;
1260 barcodeNameVector.push_back(group);
1261 }else if(type == "LINKER"){
1262 linker.push_back(oligo);
1263 }else if(type == "SPACER"){
1264 spacer.push_back(oligo);
1266 else{ m->mothurOut(type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); }
1268 m->gobble(inOligos);
1272 if(barcodeNameVector.size() == 0 && primerNameVector[0] == ""){ allFiles = 0; }
1274 //add in potential combos
1275 if(barcodeNameVector.size() == 0){
1277 barcodeNameVector.push_back("");
1280 if(primerNameVector.size() == 0){
1282 primerNameVector.push_back("");
1285 fastaFileNames.resize(barcodeNameVector.size());
1286 for(int i=0;i<fastaFileNames.size();i++){
1287 fastaFileNames[i].assign(primerNameVector.size(), "");
1289 if(qFileName != "") { qualFileNames = fastaFileNames; }
1290 if(nameFile != "") { nameFileNames = fastaFileNames; }
1293 set<string> uniqueNames; //used to cleanup outputFileNames
1294 for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
1295 for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
1297 string primerName = primerNameVector[itPrimer->second];
1298 string barcodeName = barcodeNameVector[itBar->second];
1300 string comboGroupName = "";
1301 string fastaFileName = "";
1302 string qualFileName = "";
1303 string nameFileName = "";
1305 if(primerName == ""){
1306 comboGroupName = barcodeNameVector[itBar->second];
1309 if(barcodeName == ""){
1310 comboGroupName = primerNameVector[itPrimer->second];
1313 comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second];
1319 fastaFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + comboGroupName + ".fasta";
1320 if (uniqueNames.count(fastaFileName) == 0) {
1321 outputNames.push_back(fastaFileName);
1322 outputTypes["fasta"].push_back(fastaFileName);
1323 uniqueNames.insert(fastaFileName);
1326 fastaFileNames[itBar->second][itPrimer->second] = fastaFileName;
1327 m->openOutputFile(fastaFileName, temp); temp.close();
1329 if(qFileName != ""){
1330 qualFileName = outputDir + m->getRootName(m->getSimpleName(qFileName)) + comboGroupName + ".qual";
1331 if (uniqueNames.count(qualFileName) == 0) {
1332 outputNames.push_back(qualFileName);
1333 outputTypes["qfile"].push_back(qualFileName);
1336 qualFileNames[itBar->second][itPrimer->second] = qualFileName;
1337 m->openOutputFile(qualFileName, temp); temp.close();
1341 nameFileName = outputDir + m->getRootName(m->getSimpleName(nameFile)) + comboGroupName + ".names";
1342 if (uniqueNames.count(nameFileName) == 0) {
1343 outputNames.push_back(nameFileName);
1344 outputTypes["name"].push_back(nameFileName);
1347 nameFileNames[itBar->second][itPrimer->second] = nameFileName;
1348 m->openOutputFile(nameFileName, temp); temp.close();
1354 numFPrimers = primers.size();
1355 numRPrimers = revPrimer.size();
1356 numLinkers = linker.size();
1357 numSpacers = spacer.size();
1359 bool allBlank = true;
1360 for (int i = 0; i < barcodeNameVector.size(); i++) {
1361 if (barcodeNameVector[i] != "") {
1366 for (int i = 0; i < primerNameVector.size(); i++) {
1367 if (primerNameVector[i] != "") {
1374 m->mothurOut("[WARNING]: your oligos file does not contain any group names. mothur will not create a groupfile."); m->mothurOutEndLine();
1382 catch(exception& e) {
1383 m->errorOut(e, "TrimSeqsCommand", "getOligos");
1387 //***************************************************************************************************************
1389 bool TrimSeqsCommand::keepFirstTrim(Sequence& sequence, QualityScores& qscores){
1392 if(qscores.getName() != ""){
1393 qscores.trimQScores(-1, keepFirst);
1395 sequence.trim(keepFirst);
1398 catch(exception& e) {
1399 m->errorOut(e, "keepFirstTrim", "countDiffs");
1405 //***************************************************************************************************************
1407 bool TrimSeqsCommand::removeLastTrim(Sequence& sequence, QualityScores& qscores){
1411 int length = sequence.getNumBases() - removeLast;
1414 if(qscores.getName() != ""){
1415 qscores.trimQScores(-1, length);
1417 sequence.trim(length);
1426 catch(exception& e) {
1427 m->errorOut(e, "removeLastTrim", "countDiffs");
1433 //***************************************************************************************************************
1435 bool TrimSeqsCommand::cullLength(Sequence& seq){
1438 int length = seq.getNumBases();
1439 bool success = 0; //guilty until proven innocent
1441 if(length >= minLength && maxLength == 0) { success = 1; }
1442 else if(length >= minLength && length <= maxLength) { success = 1; }
1443 else { success = 0; }
1448 catch(exception& e) {
1449 m->errorOut(e, "TrimSeqsCommand", "cullLength");
1455 //***************************************************************************************************************
1457 bool TrimSeqsCommand::cullHomoP(Sequence& seq){
1459 int longHomoP = seq.getLongHomoPolymer();
1460 bool success = 0; //guilty until proven innocent
1462 if(longHomoP <= maxHomoP){ success = 1; }
1463 else { success = 0; }
1467 catch(exception& e) {
1468 m->errorOut(e, "TrimSeqsCommand", "cullHomoP");
1474 //***************************************************************************************************************
1476 bool TrimSeqsCommand::cullAmbigs(Sequence& seq){
1478 int numNs = seq.getAmbigBases();
1479 bool success = 0; //guilty until proven innocent
1481 if(numNs <= maxAmbig) { success = 1; }
1482 else { success = 0; }
1486 catch(exception& e) {
1487 m->errorOut(e, "TrimSeqsCommand", "cullAmbigs");
1492 //***************************************************************************************************************