5 * Created by Pat Schloss on 6/6/09.
6 * Copyright 2009 Patrick D. Schloss. All rights reserved.
10 #include "trimseqscommand.h"
11 #include "needlemanoverlap.hpp"
12 #include "trimoligos.h"
14 //**********************************************************************************************************************
15 vector<string> TrimSeqsCommand::setParameters(){
17 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
18 CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(poligos);
19 CommandParameter pqfile("qfile", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pqfile);
20 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
21 CommandParameter pflip("flip", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pflip);
22 CommandParameter pmaxambig("maxambig", "Number", "", "-1", "", "", "",false,false); parameters.push_back(pmaxambig);
23 CommandParameter pmaxhomop("maxhomop", "Number", "", "0", "", "", "",false,false); parameters.push_back(pmaxhomop);
24 CommandParameter pminlength("minlength", "Number", "", "0", "", "", "",false,false); parameters.push_back(pminlength);
25 CommandParameter pmaxlength("maxlength", "Number", "", "0", "", "", "",false,false); parameters.push_back(pmaxlength);
26 CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ppdiffs);
27 CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pbdiffs);
28 CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pldiffs);
29 CommandParameter psdiffs("sdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(psdiffs);
30 CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ptdiffs);
31 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
32 CommandParameter pallfiles("allfiles", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pallfiles);
33 CommandParameter pkeepforward("keepforward", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pkeepforward);
34 CommandParameter pqtrim("qtrim", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pqtrim);
35 CommandParameter pqthreshold("qthreshold", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqthreshold);
36 CommandParameter pqaverage("qaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqaverage);
37 CommandParameter prollaverage("rollaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(prollaverage);
38 CommandParameter pqwindowaverage("qwindowaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqwindowaverage);
39 CommandParameter pqstepsize("qstepsize", "Number", "", "1", "", "", "",false,false); parameters.push_back(pqstepsize);
40 CommandParameter pqwindowsize("qwindowsize", "Number", "", "50", "", "", "",false,false); parameters.push_back(pqwindowsize);
41 CommandParameter pkeepfirst("keepfirst", "Number", "", "0", "", "", "",false,false); parameters.push_back(pkeepfirst);
42 CommandParameter premovelast("removelast", "Number", "", "0", "", "", "",false,false); parameters.push_back(premovelast);
43 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
44 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
46 vector<string> myArray;
47 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
51 m->errorOut(e, "TrimSeqsCommand", "setParameters");
55 //**********************************************************************************************************************
56 string TrimSeqsCommand::getHelpString(){
58 string helpString = "";
59 helpString += "The trim.seqs command reads a fastaFile and creates 2 new fasta files, .trim.fasta and scrap.fasta, as well as group files if you provide and oligos file.\n";
60 helpString += "The .trim.fasta contains sequences that meet your requirements, and the .scrap.fasta contains those which don't.\n";
61 helpString += "The trim.seqs command parameters are fasta, name, flip, oligos, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim, keepfirst, removelast and allfiles.\n";
62 helpString += "The fasta parameter is required.\n";
63 helpString += "The flip parameter will output the reverse compliment of your trimmed sequence. The default is false.\n";
64 helpString += "The oligos parameter allows you to provide an oligos file.\n";
65 helpString += "The name parameter allows you to provide a names file with your fasta file.\n";
66 helpString += "The maxambig parameter allows you to set the maximum number of ambigious bases allowed. The default is -1.\n";
67 helpString += "The maxhomop parameter allows you to set a maximum homopolymer length. \n";
68 helpString += "The minlength parameter allows you to set and minimum sequence length. \n";
69 helpString += "The maxlength parameter allows you to set and maximum sequence length. \n";
70 helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n";
71 helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";
72 helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
73 helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n";
74 helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n";
75 helpString += "The qfile parameter allows you to provide a quality file.\n";
76 helpString += "The qthreshold parameter allows you to set a minimum quality score allowed. \n";
77 helpString += "The qaverage parameter allows you to set a minimum average quality score allowed. \n";
78 helpString += "The qwindowsize parameter allows you to set a number of bases in a window. Default=50.\n";
79 helpString += "The qwindowaverage parameter allows you to set a minimum average quality score allowed over a window. \n";
80 helpString += "The rollaverage parameter allows you to set a minimum rolling average quality score allowed over a window. \n";
81 helpString += "The qstepsize parameter allows you to set a number of bases to move the window over. Default=1.\n";
82 helpString += "The allfiles parameter will create separate group and fasta file for each grouping. The default is F.\n";
83 helpString += "The keepforward parameter allows you to indicate whether you want the forward primer removed or not. The default is F, meaning remove the forward primer.\n";
84 helpString += "The qtrim parameter will trim sequence from the point that they fall below the qthreshold and put it in the .trim file if set to true. The default is T.\n";
85 helpString += "The keepfirst parameter trims the sequence to the first keepfirst number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements. \n";
86 helpString += "The removelast removes the last removelast number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements.\n";
87 helpString += "The trim.seqs command should be in the following format: \n";
88 helpString += "trim.seqs(fasta=yourFastaFile, flip=yourFlip, oligos=yourOligos, maxambig=yourMaxambig, \n";
89 helpString += "maxhomop=yourMaxhomop, minlength=youMinlength, maxlength=yourMaxlength) \n";
90 helpString += "Example trim.seqs(fasta=abrecovery.fasta, flip=..., oligos=..., maxambig=..., maxhomop=..., minlength=..., maxlength=...).\n";
91 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
92 helpString += "For more details please check out the wiki http://www.mothur.org/wiki/Trim.seqs .\n";
96 m->errorOut(e, "TrimSeqsCommand", "getHelpString");
102 //**********************************************************************************************************************
104 TrimSeqsCommand::TrimSeqsCommand(){
106 abort = true; calledHelp = true;
108 vector<string> tempOutNames;
109 outputTypes["fasta"] = tempOutNames;
110 outputTypes["qfile"] = tempOutNames;
111 outputTypes["group"] = tempOutNames;
112 outputTypes["name"] = tempOutNames;
114 catch(exception& e) {
115 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
119 //***************************************************************************************************************
121 TrimSeqsCommand::TrimSeqsCommand(string option) {
124 abort = false; calledHelp = false;
127 //allow user to run help
128 if(option == "help") { help(); abort = true; calledHelp = true; }
129 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
132 vector<string> myArray = setParameters();
134 OptionParser parser(option);
135 map<string,string> parameters = parser.getParameters();
137 ValidParameters validParameter;
138 map<string,string>::iterator it;
140 //check to make sure all parameters are valid for command
141 for (it = parameters.begin(); it != parameters.end(); it++) {
142 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
145 //initialize outputTypes
146 vector<string> tempOutNames;
147 outputTypes["fasta"] = tempOutNames;
148 outputTypes["qfile"] = tempOutNames;
149 outputTypes["group"] = tempOutNames;
150 outputTypes["name"] = tempOutNames;
152 //if the user changes the input directory command factory will send this info to us in the output parameter
153 string inputDir = validParameter.validFile(parameters, "inputdir", false);
154 if (inputDir == "not found"){ inputDir = ""; }
157 it = parameters.find("fasta");
158 //user has given a template file
159 if(it != parameters.end()){
160 path = m->hasPath(it->second);
161 //if the user has not given a path then, add inputdir. else leave path alone.
162 if (path == "") { parameters["fasta"] = inputDir + it->second; }
165 it = parameters.find("oligos");
166 //user has given a template file
167 if(it != parameters.end()){
168 path = m->hasPath(it->second);
169 //if the user has not given a path then, add inputdir. else leave path alone.
170 if (path == "") { parameters["oligos"] = inputDir + it->second; }
173 it = parameters.find("qfile");
174 //user has given a template file
175 if(it != parameters.end()){
176 path = m->hasPath(it->second);
177 //if the user has not given a path then, add inputdir. else leave path alone.
178 if (path == "") { parameters["qfile"] = inputDir + it->second; }
181 it = parameters.find("name");
182 //user has given a template file
183 if(it != parameters.end()){
184 path = m->hasPath(it->second);
185 //if the user has not given a path then, add inputdir. else leave path alone.
186 if (path == "") { parameters["name"] = inputDir + it->second; }
192 //check for required parameters
193 fastaFile = validParameter.validFile(parameters, "fasta", true);
194 if (fastaFile == "not found") {
195 fastaFile = m->getFastaFile();
196 if (fastaFile != "") { m->mothurOut("Using " + fastaFile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
197 else { m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
198 }else if (fastaFile == "not open") { abort = true; }
199 else { m->setFastaFile(fastaFile); }
201 //if the user changes the output directory command factory will send this info to us in the output parameter
202 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
204 outputDir += m->hasPath(fastaFile); //if user entered a file with a path then preserve it
208 //check for optional parameter and set defaults
209 // ...at some point should added some additional type checking...
211 temp = validParameter.validFile(parameters, "flip", false);
212 if (temp == "not found") { flip = 0; }
213 else { flip = m->isTrue(temp); }
215 temp = validParameter.validFile(parameters, "oligos", true);
216 if (temp == "not found"){ oligoFile = ""; }
217 else if(temp == "not open"){ abort = true; }
218 else { oligoFile = temp; m->setOligosFile(oligoFile); }
221 temp = validParameter.validFile(parameters, "maxambig", false); if (temp == "not found") { temp = "-1"; }
222 m->mothurConvert(temp, maxAmbig);
224 temp = validParameter.validFile(parameters, "maxhomop", false); if (temp == "not found") { temp = "0"; }
225 m->mothurConvert(temp, maxHomoP);
227 temp = validParameter.validFile(parameters, "minlength", false); if (temp == "not found") { temp = "0"; }
228 m->mothurConvert(temp, minLength);
230 temp = validParameter.validFile(parameters, "maxlength", false); if (temp == "not found") { temp = "0"; }
231 m->mothurConvert(temp, maxLength);
233 temp = validParameter.validFile(parameters, "bdiffs", false); if (temp == "not found") { temp = "0"; }
234 m->mothurConvert(temp, bdiffs);
236 temp = validParameter.validFile(parameters, "pdiffs", false); if (temp == "not found") { temp = "0"; }
237 m->mothurConvert(temp, pdiffs);
239 temp = validParameter.validFile(parameters, "ldiffs", false); if (temp == "not found") { temp = "0"; }
240 m->mothurConvert(temp, ldiffs);
242 temp = validParameter.validFile(parameters, "sdiffs", false); if (temp == "not found") { temp = "0"; }
243 m->mothurConvert(temp, sdiffs);
245 temp = validParameter.validFile(parameters, "tdiffs", false); if (temp == "not found") { int tempTotal = pdiffs + bdiffs + ldiffs + sdiffs; temp = toString(tempTotal); }
246 m->mothurConvert(temp, tdiffs);
248 if(tdiffs == 0){ tdiffs = bdiffs + pdiffs + ldiffs + sdiffs; }
250 temp = validParameter.validFile(parameters, "qfile", true);
251 if (temp == "not found") { qFileName = ""; }
252 else if(temp == "not open") { abort = true; }
253 else { qFileName = temp; m->setQualFile(qFileName); }
255 temp = validParameter.validFile(parameters, "name", true);
256 if (temp == "not found") { nameFile = ""; }
257 else if(temp == "not open") { nameFile = ""; abort = true; }
258 else { nameFile = temp; m->setNameFile(nameFile); }
260 temp = validParameter.validFile(parameters, "qthreshold", false); if (temp == "not found") { temp = "0"; }
261 m->mothurConvert(temp, qThreshold);
263 temp = validParameter.validFile(parameters, "qtrim", false); if (temp == "not found") { temp = "t"; }
264 qtrim = m->isTrue(temp);
266 temp = validParameter.validFile(parameters, "rollaverage", false); if (temp == "not found") { temp = "0"; }
267 convert(temp, qRollAverage);
269 temp = validParameter.validFile(parameters, "qwindowaverage", false);if (temp == "not found") { temp = "0"; }
270 convert(temp, qWindowAverage);
272 temp = validParameter.validFile(parameters, "qwindowsize", false); if (temp == "not found") { temp = "50"; }
273 convert(temp, qWindowSize);
275 temp = validParameter.validFile(parameters, "qstepsize", false); if (temp == "not found") { temp = "1"; }
276 convert(temp, qWindowStep);
278 temp = validParameter.validFile(parameters, "qaverage", false); if (temp == "not found") { temp = "0"; }
279 convert(temp, qAverage);
281 temp = validParameter.validFile(parameters, "keepfirst", false); if (temp == "not found") { temp = "0"; }
282 convert(temp, keepFirst);
284 temp = validParameter.validFile(parameters, "removelast", false); if (temp == "not found") { temp = "0"; }
285 convert(temp, removeLast);
287 temp = validParameter.validFile(parameters, "allfiles", false); if (temp == "not found") { temp = "F"; }
288 allFiles = m->isTrue(temp);
290 temp = validParameter.validFile(parameters, "keepforward", false); if (temp == "not found") { temp = "F"; }
291 keepforward = m->isTrue(temp);
293 temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
294 m->setProcessors(temp);
295 m->mothurConvert(temp, processors);
298 if(allFiles && (oligoFile == "")){
299 m->mothurOut("You selected allfiles, but didn't enter an oligos. Ignoring the allfiles request."); m->mothurOutEndLine();
301 if((qAverage != 0 && qThreshold != 0) && qFileName == ""){
302 m->mothurOut("You didn't provide a quality file name, quality criteria will be ignored."); m->mothurOutEndLine();
306 if(!flip && oligoFile=="" && !maxLength && !minLength && (maxAmbig==-1) && !maxHomoP && qFileName == ""){
307 m->mothurOut("You didn't set any options... quiting command."); m->mothurOutEndLine();
311 if (nameFile == "") {
312 vector<string> files; files.push_back(fastaFile);
313 parser.getNameFile(files);
318 catch(exception& e) {
319 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
323 //***************************************************************************************************************
325 int TrimSeqsCommand::execute(){
328 if (abort == true) { if (calledHelp) { return 0; } return 2; }
330 numFPrimers = 0; //this needs to be initialized
335 vector<vector<string> > fastaFileNames;
336 vector<vector<string> > qualFileNames;
337 vector<vector<string> > nameFileNames;
339 string trimSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.fasta";
340 outputNames.push_back(trimSeqFile); outputTypes["fasta"].push_back(trimSeqFile);
342 string scrapSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.fasta";
343 outputNames.push_back(scrapSeqFile); outputTypes["fasta"].push_back(scrapSeqFile);
345 string trimQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.qual";
346 string scrapQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.qual";
348 if (qFileName != "") {
349 outputNames.push_back(trimQualFile);
350 outputNames.push_back(scrapQualFile);
351 outputTypes["qfile"].push_back(trimQualFile);
352 outputTypes["qfile"].push_back(scrapQualFile);
355 string trimNameFile = outputDir + m->getRootName(m->getSimpleName(nameFile)) + "trim.names";
356 string scrapNameFile = outputDir + m->getRootName(m->getSimpleName(nameFile)) + "scrap.names";
358 if (nameFile != "") {
359 m->readNames(nameFile, nameMap);
360 outputNames.push_back(trimNameFile);
361 outputNames.push_back(scrapNameFile);
362 outputTypes["name"].push_back(trimNameFile);
363 outputTypes["name"].push_back(scrapNameFile);
366 if (m->control_pressed) { return 0; }
368 string outputGroupFileName;
370 createGroup = getOligos(fastaFileNames, qualFileNames, nameFileNames);
372 outputGroupFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "groups";
373 outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName);
377 //fills lines and qlines
378 setLines(fastaFile, qFileName);
381 driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
383 createProcessesCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames);
387 if (m->control_pressed) { return 0; }
390 map<string, string> uniqueFastaNames;// so we don't add the same groupfile multiple times
391 map<string, string>::iterator it;
392 set<string> namesToRemove;
393 for(int i=0;i<fastaFileNames.size();i++){
394 for(int j=0;j<fastaFileNames[0].size();j++){
395 if (fastaFileNames[i][j] != "") {
396 if (namesToRemove.count(fastaFileNames[i][j]) == 0) {
397 if(m->isBlank(fastaFileNames[i][j])){
398 m->mothurRemove(fastaFileNames[i][j]);
399 namesToRemove.insert(fastaFileNames[i][j]);
402 m->mothurRemove(qualFileNames[i][j]);
403 namesToRemove.insert(qualFileNames[i][j]);
407 m->mothurRemove(nameFileNames[i][j]);
408 namesToRemove.insert(nameFileNames[i][j]);
411 it = uniqueFastaNames.find(fastaFileNames[i][j]);
412 if (it == uniqueFastaNames.end()) {
413 uniqueFastaNames[fastaFileNames[i][j]] = barcodeNameVector[i];
421 //remove names for outputFileNames, just cleans up the output
422 vector<string> outputNames2;
423 for(int i = 0; i < outputNames.size(); i++) { if (namesToRemove.count(outputNames[i]) == 0) { outputNames2.push_back(outputNames[i]); } }
424 outputNames = outputNames2;
426 for (it = uniqueFastaNames.begin(); it != uniqueFastaNames.end(); it++) {
428 m->openInputFile(it->first, in);
431 string thisGroupName = outputDir + m->getRootName(m->getSimpleName(it->first)) + "groups";
432 outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName);
433 m->openOutputFile(thisGroupName, out);
436 if (m->control_pressed) { break; }
438 Sequence currSeq(in); m->gobble(in);
439 out << currSeq.getName() << '\t' << it->second << endl;
446 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
448 //output group counts
449 m->mothurOutEndLine();
451 if (groupCounts.size() != 0) { m->mothurOut("Group count: \n"); }
452 for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
453 total += it->second; m->mothurOut(it->first + "\t" + toString(it->second)); m->mothurOutEndLine();
455 if (total != 0) { m->mothurOut("Total of all groups is " + toString(total)); m->mothurOutEndLine(); }
457 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
459 //set fasta file as new current fastafile
461 itTypes = outputTypes.find("fasta");
462 if (itTypes != outputTypes.end()) {
463 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
466 itTypes = outputTypes.find("name");
467 if (itTypes != outputTypes.end()) {
468 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
471 itTypes = outputTypes.find("qfile");
472 if (itTypes != outputTypes.end()) {
473 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); }
476 itTypes = outputTypes.find("group");
477 if (itTypes != outputTypes.end()) {
478 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
481 m->mothurOutEndLine();
482 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
483 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
484 m->mothurOutEndLine();
489 catch(exception& e) {
490 m->errorOut(e, "TrimSeqsCommand", "execute");
495 /**************************************************************************************/
497 int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string trimFileName, string scrapFileName, string trimQFileName, string scrapQFileName, string trimNFileName, string scrapNFileName, string groupFileName, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames, linePair line, linePair qline) {
501 ofstream trimFASTAFile;
502 m->openOutputFile(trimFileName, trimFASTAFile);
504 ofstream scrapFASTAFile;
505 m->openOutputFile(scrapFileName, scrapFASTAFile);
507 ofstream trimQualFile;
508 ofstream scrapQualFile;
510 m->openOutputFile(trimQFileName, trimQualFile);
511 m->openOutputFile(scrapQFileName, scrapQualFile);
514 ofstream trimNameFile;
515 ofstream scrapNameFile;
517 m->openOutputFile(trimNFileName, trimNameFile);
518 m->openOutputFile(scrapNFileName, scrapNameFile);
522 ofstream outGroupsFile;
523 if (createGroup){ m->openOutputFile(groupFileName, outGroupsFile); }
525 for (int i = 0; i < fastaFileNames.size(); i++) { //clears old file
526 for (int j = 0; j < fastaFileNames[i].size(); j++) { //clears old file
527 if (fastaFileNames[i][j] != "") {
529 m->openOutputFile(fastaFileNames[i][j], temp); temp.close();
531 m->openOutputFile(qualFileNames[i][j], temp); temp.close();
535 m->openOutputFile(nameFileNames[i][j], temp); temp.close();
543 m->openInputFile(filename, inFASTA);
544 inFASTA.seekg(line.start);
547 if(qFileName != "") {
548 m->openInputFile(qFileName, qFile);
549 qFile.seekg(qline.start);
554 TrimOligos trimOligos(pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, revPrimer, linker, spacer);
558 if (m->control_pressed) {
559 inFASTA.close(); trimFASTAFile.close(); scrapFASTAFile.close();
560 if (createGroup) { outGroupsFile.close(); }
565 for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); }
571 string trashCode = "";
572 int currentSeqsDiffs = 0;
574 Sequence currSeq(inFASTA); m->gobble(inFASTA);
575 //cout << currSeq.getName() << '\t' << currSeq.getUnaligned().length() << endl;
576 QualityScores currQual;
578 currQual = QualityScores(qFile); m->gobble(qFile);
581 string origSeq = currSeq.getUnaligned();
584 int barcodeIndex = 0;
588 success = trimOligos.stripLinker(currSeq, currQual);
589 if(success > ldiffs) { trashCode += 'k'; }
590 else{ currentSeqsDiffs += success; }
594 if(barcodes.size() != 0){
595 success = trimOligos.stripBarcode(currSeq, currQual, barcodeIndex);
596 if(success > bdiffs) { trashCode += 'b'; }
597 else{ currentSeqsDiffs += success; }
601 success = trimOligos.stripSpacer(currSeq, currQual);
602 if(success > sdiffs) { trashCode += 's'; }
603 else{ currentSeqsDiffs += success; }
607 if(numFPrimers != 0){
608 success = trimOligos.stripForward(currSeq, currQual, primerIndex, keepforward);
609 if(success > pdiffs) { trashCode += 'f'; }
610 else{ currentSeqsDiffs += success; }
613 if (currentSeqsDiffs > tdiffs) { trashCode += 't'; }
615 if(numRPrimers != 0){
616 success = trimOligos.stripReverse(currSeq, currQual);
617 if(!success) { trashCode += 'r'; }
621 success = keepFirstTrim(currSeq, currQual);
625 success = removeLastTrim(currSeq, currQual);
626 if(!success) { trashCode += 'l'; }
631 int origLength = currSeq.getNumBases();
633 if(qThreshold != 0) { success = currQual.stripQualThreshold(currSeq, qThreshold); }
634 else if(qAverage != 0) { success = currQual.cullQualAverage(currSeq, qAverage); }
635 else if(qRollAverage != 0) { success = currQual.stripQualRollingAverage(currSeq, qRollAverage); }
636 else if(qWindowAverage != 0){ success = currQual.stripQualWindowAverage(currSeq, qWindowStep, qWindowSize, qWindowAverage); }
637 else { success = 1; }
639 //you don't want to trim, if it fails above then scrap it
640 if ((!qtrim) && (origLength != currSeq.getNumBases())) { success = 0; }
642 if(!success) { trashCode += 'q'; }
645 if(minLength > 0 || maxLength > 0){
646 success = cullLength(currSeq);
647 if(!success) { trashCode += 'l'; }
650 success = cullHomoP(currSeq);
651 if(!success) { trashCode += 'h'; }
654 success = cullAmbigs(currSeq);
655 if(!success) { trashCode += 'n'; }
658 if(flip){ // should go last
659 currSeq.reverseComplement();
661 currQual.flipQScores();
665 if(trashCode.length() == 0){
666 currSeq.setAligned(currSeq.getUnaligned());
667 currSeq.printSequence(trimFASTAFile);
670 currQual.printQScores(trimQualFile);
674 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
675 if (itName != nameMap.end()) { trimNameFile << itName->first << '\t' << itName->second << endl; }
676 else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
680 if(barcodes.size() != 0){
681 string thisGroup = barcodeNameVector[barcodeIndex];
682 if (primers.size() != 0) {
683 if (primerNameVector[primerIndex] != "") {
684 if(thisGroup != "") {
685 thisGroup += "." + primerNameVector[primerIndex];
687 thisGroup = primerNameVector[primerIndex];
692 outGroupsFile << currSeq.getName() << '\t' << thisGroup << endl;
694 if (nameFile != "") {
695 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
696 if (itName != nameMap.end()) {
697 vector<string> thisSeqsNames;
698 m->splitAtChar(itName->second, thisSeqsNames, ',');
699 for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self
700 outGroupsFile << thisSeqsNames[k] << '\t' << thisGroup << endl;
702 }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
705 map<string, int>::iterator it = groupCounts.find(thisGroup);
706 if (it == groupCounts.end()) { groupCounts[thisGroup] = 1; }
707 else { groupCounts[it->first]++; }
714 m->openOutputFileAppend(fastaFileNames[barcodeIndex][primerIndex], output);
715 currSeq.printSequence(output);
719 m->openOutputFileAppend(qualFileNames[barcodeIndex][primerIndex], output);
720 currQual.printQScores(output);
725 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
726 if (itName != nameMap.end()) {
727 m->openOutputFileAppend(nameFileNames[barcodeIndex][primerIndex], output);
728 output << itName->first << '\t' << itName->second << endl;
730 }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
735 if(nameFile != ""){ //needs to be before the currSeq name is changed
736 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
737 if (itName != nameMap.end()) { scrapNameFile << itName->first << '\t' << itName->second << endl; }
738 else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
740 currSeq.setName(currSeq.getName() + '|' + trashCode);
741 currSeq.setUnaligned(origSeq);
742 currSeq.setAligned(origSeq);
743 currSeq.printSequence(scrapFASTAFile);
745 currQual.printQScores(scrapQualFile);
751 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
752 unsigned long long pos = inFASTA.tellg();
753 if ((pos == -1) || (pos >= line.end)) { break; }
756 if (inFASTA.eof()) { break; }
760 if((count) % 1000 == 0){ m->mothurOut(toString(count)); m->mothurOutEndLine(); }
764 if((count) % 1000 != 0){ m->mothurOut(toString(count)); m->mothurOutEndLine(); }
768 trimFASTAFile.close();
769 scrapFASTAFile.close();
770 if (createGroup) { outGroupsFile.close(); }
771 if(qFileName != "") { qFile.close(); scrapQualFile.close(); trimQualFile.close(); }
772 if(nameFile != "") { scrapNameFile.close(); trimNameFile.close(); }
776 catch(exception& e) {
777 m->errorOut(e, "TrimSeqsCommand", "driverCreateTrim");
782 /**************************************************************************************************/
784 int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName, string trimFASTAFileName, string scrapFASTAFileName, string trimQualFileName, string scrapQualFileName, string trimNameFileName, string scrapNameFileName, string groupFile, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames) {
791 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
792 //loop through and create all the processes you want
793 while (process != processors) {
797 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
801 vector<vector<string> > tempFASTAFileNames = fastaFileNames;
802 vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
803 vector<vector<string> > tempNameFileNames = nameFileNames;
808 for(int i=0;i<tempFASTAFileNames.size();i++){
809 for(int j=0;j<tempFASTAFileNames[i].size();j++){
810 if (tempFASTAFileNames[i][j] != "") {
811 tempFASTAFileNames[i][j] += toString(getpid()) + ".temp";
812 m->openOutputFile(tempFASTAFileNames[i][j], temp); temp.close();
815 tempPrimerQualFileNames[i][j] += toString(getpid()) + ".temp";
816 m->openOutputFile(tempPrimerQualFileNames[i][j], temp); temp.close();
819 tempNameFileNames[i][j] += toString(getpid()) + ".temp";
820 m->openOutputFile(tempNameFileNames[i][j], temp); temp.close();
827 driverCreateTrim(filename,
829 (trimFASTAFileName + toString(getpid()) + ".temp"),
830 (scrapFASTAFileName + toString(getpid()) + ".temp"),
831 (trimQualFileName + toString(getpid()) + ".temp"),
832 (scrapQualFileName + toString(getpid()) + ".temp"),
833 (trimNameFileName + toString(getpid()) + ".temp"),
834 (scrapNameFileName + toString(getpid()) + ".temp"),
835 (groupFile + toString(getpid()) + ".temp"),
837 tempPrimerQualFileNames,
842 //pass groupCounts to parent
845 string tempFile = filename + toString(getpid()) + ".num.temp";
846 m->openOutputFile(tempFile, out);
848 out << groupCounts.size() << endl;
850 for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
851 out << it->first << '\t' << it->second << endl;
857 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
858 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
865 m->openOutputFile(trimFASTAFileName, temp); temp.close();
866 m->openOutputFile(scrapFASTAFileName, temp); temp.close();
868 m->openOutputFile(trimQualFileName, temp); temp.close();
869 m->openOutputFile(scrapQualFileName, temp); temp.close();
871 if (nameFile != "") {
872 m->openOutputFile(trimNameFileName, temp); temp.close();
873 m->openOutputFile(scrapNameFileName, temp); temp.close();
876 driverCreateTrim(filename, qFileName, trimFASTAFileName, scrapFASTAFileName, trimQualFileName, scrapQualFileName, trimNameFileName, scrapNameFileName, groupFile, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
878 //force parent to wait until all the processes are done
879 for (int i=0;i<processIDS.size();i++) {
880 int temp = processIDS[i];
884 //////////////////////////////////////////////////////////////////////////////////////////////////////
885 //Windows version shared memory, so be careful when passing variables through the trimData struct.
886 //Above fork() will clone, so memory is separate, but that's not the case with windows,
887 //////////////////////////////////////////////////////////////////////////////////////////////////////
889 vector<trimData*> pDataArray;
890 DWORD dwThreadIdArray[processors-1];
891 HANDLE hThreadArray[processors-1];
893 //Create processor worker threads.
894 for( int i=0; i<processors-1; i++){
896 string extension = "";
897 if (i != 0) { extension = toString(i) + ".temp"; processIDS.push_back(i); }
898 vector<vector<string> > tempFASTAFileNames = fastaFileNames;
899 vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
900 vector<vector<string> > tempNameFileNames = nameFileNames;
905 for(int i=0;i<tempFASTAFileNames.size();i++){
906 for(int j=0;j<tempFASTAFileNames[i].size();j++){
907 if (tempFASTAFileNames[i][j] != "") {
908 tempFASTAFileNames[i][j] += extension;
909 m->openOutputFile(tempFASTAFileNames[i][j], temp); temp.close();
912 tempPrimerQualFileNames[i][j] += extension;
913 m->openOutputFile(tempPrimerQualFileNames[i][j], temp); temp.close();
916 tempNameFileNames[i][j] += extension;
917 m->openOutputFile(tempNameFileNames[i][j], temp); temp.close();
925 trimData* tempTrim = new trimData(filename,
927 (trimFASTAFileName+extension),
928 (scrapFASTAFileName+extension),
929 (trimQualFileName+extension),
930 (scrapQualFileName+extension),
931 (trimNameFileName+extension),
932 (scrapNameFileName+extension),
933 (groupFile+extension),
935 tempPrimerQualFileNames,
937 lines[i].start, lines[i].end, qLines[i].start, qLines[i].end, m,
938 pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, primers, barcodes, revPrimer, linker, spacer,
939 primerNameVector, barcodeNameVector, createGroup, allFiles, keepforward, keepFirst, removeLast,
940 qWindowStep, qWindowSize, qWindowAverage, qtrim, qThreshold, qAverage, qRollAverage,
941 minLength, maxAmbig, maxHomoP, maxLength, flip, nameMap);
942 pDataArray.push_back(tempTrim);
944 hThreadArray[i] = CreateThread(NULL, 0, MyTrimThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);
949 m->openOutputFile(trimFASTAFileName, temp); temp.close();
950 m->openOutputFile(scrapFASTAFileName, temp); temp.close();
952 m->openOutputFile(trimQualFileName, temp); temp.close();
953 m->openOutputFile(scrapQualFileName, temp); temp.close();
955 if (nameFile != "") {
956 m->openOutputFile(trimNameFileName, temp); temp.close();
957 m->openOutputFile(scrapNameFileName, temp); temp.close();
960 driverCreateTrim(filename, qFileName, (trimFASTAFileName + toString(processors-1) + ".temp"), (scrapFASTAFileName + toString(processors-1) + ".temp"), (trimQualFileName + toString(processors-1) + ".temp"), (scrapQualFileName + toString(processors-1) + ".temp"), (trimNameFileName + toString(processors-1) + ".temp"), (scrapNameFileName + toString(processors-1) + ".temp"), (groupFile + toString(processors-1) + ".temp"), fastaFileNames, qualFileNames, nameFileNames, lines[processors-1], qLines[processors-1]);
961 processIDS.push_back(processors-1);
964 //Wait until all threads have terminated.
965 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
967 //Close all thread handles and free memory allocations.
968 for(int i=0; i < pDataArray.size(); i++){
969 for (map<string, int>::iterator it = pDataArray[i]->groupCounts.begin(); it != pDataArray[i]->groupCounts.end(); it++) {
970 map<string, int>::iterator it2 = groupCounts.find(it->first);
971 if (it2 == groupCounts.end()) { groupCounts[it->first] = it->second; }
972 else { groupCounts[it->first] += it->second; }
974 CloseHandle(hThreadArray[i]);
975 delete pDataArray[i];
982 for(int i=0;i<processIDS.size();i++){
984 m->mothurOut("Appending files from process " + toString(processIDS[i])); m->mothurOutEndLine();
986 m->appendFiles((trimFASTAFileName + toString(processIDS[i]) + ".temp"), trimFASTAFileName);
987 m->mothurRemove((trimFASTAFileName + toString(processIDS[i]) + ".temp"));
988 m->appendFiles((scrapFASTAFileName + toString(processIDS[i]) + ".temp"), scrapFASTAFileName);
989 m->mothurRemove((scrapFASTAFileName + toString(processIDS[i]) + ".temp"));
992 m->appendFiles((trimQualFileName + toString(processIDS[i]) + ".temp"), trimQualFileName);
993 m->mothurRemove((trimQualFileName + toString(processIDS[i]) + ".temp"));
994 m->appendFiles((scrapQualFileName + toString(processIDS[i]) + ".temp"), scrapQualFileName);
995 m->mothurRemove((scrapQualFileName + toString(processIDS[i]) + ".temp"));
999 m->appendFiles((trimNameFileName + toString(processIDS[i]) + ".temp"), trimNameFileName);
1000 m->mothurRemove((trimNameFileName + toString(processIDS[i]) + ".temp"));
1001 m->appendFiles((scrapNameFileName + toString(processIDS[i]) + ".temp"), scrapNameFileName);
1002 m->mothurRemove((scrapNameFileName + toString(processIDS[i]) + ".temp"));
1006 m->appendFiles((groupFile + toString(processIDS[i]) + ".temp"), groupFile);
1007 m->mothurRemove((groupFile + toString(processIDS[i]) + ".temp"));
1012 for(int j=0;j<fastaFileNames.size();j++){
1013 for(int k=0;k<fastaFileNames[j].size();k++){
1014 if (fastaFileNames[j][k] != "") {
1015 m->appendFiles((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"), fastaFileNames[j][k]);
1016 m->mothurRemove((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"));
1018 if(qFileName != ""){
1019 m->appendFiles((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"), qualFileNames[j][k]);
1020 m->mothurRemove((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"));
1024 m->appendFiles((nameFileNames[j][k] + toString(processIDS[i]) + ".temp"), nameFileNames[j][k]);
1025 m->mothurRemove((nameFileNames[j][k] + toString(processIDS[i]) + ".temp"));
1032 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1035 string tempFile = filename + toString(processIDS[i]) + ".num.temp";
1036 m->openInputFile(tempFile, in);
1040 in >> tempNum; m->gobble(in);
1044 in >> group >> tempNum; m->gobble(in);
1046 map<string, int>::iterator it = groupCounts.find(group);
1047 if (it == groupCounts.end()) { groupCounts[group] = tempNum; }
1048 else { groupCounts[it->first] += tempNum; }
1051 in.close(); m->mothurRemove(tempFile);
1058 catch(exception& e) {
1059 m->errorOut(e, "TrimSeqsCommand", "createProcessesCreateTrim");
1064 /**************************************************************************************************/
1066 int TrimSeqsCommand::setLines(string filename, string qfilename) {
1069 vector<unsigned long long> fastaFilePos;
1070 vector<unsigned long long> qfileFilePos;
1072 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1073 //set file positions for fasta file
1074 fastaFilePos = m->divideFile(filename, processors);
1076 //get name of first sequence in each chunk
1077 map<string, int> firstSeqNames;
1078 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
1080 m->openInputFile(filename, in);
1081 in.seekg(fastaFilePos[i]);
1084 firstSeqNames[temp.getName()] = i;
1089 if(qfilename != "") {
1090 //seach for filePos of each first name in the qfile and save in qfileFilePos
1092 m->openInputFile(qfilename, inQual);
1095 while(!inQual.eof()){
1096 input = m->getline(inQual);
1098 if (input.length() != 0) {
1099 if(input[0] == '>'){ //this is a sequence name line
1100 istringstream nameStream(input);
1102 string sname = ""; nameStream >> sname;
1103 sname = sname.substr(1);
1105 map<string, int>::iterator it = firstSeqNames.find(sname);
1107 if(it != firstSeqNames.end()) { //this is the start of a new chunk
1108 unsigned long long pos = inQual.tellg();
1109 qfileFilePos.push_back(pos - input.length() - 1);
1110 firstSeqNames.erase(it);
1115 if (firstSeqNames.size() == 0) { break; }
1120 if (firstSeqNames.size() != 0) {
1121 for (map<string, int>::iterator it = firstSeqNames.begin(); it != firstSeqNames.end(); it++) {
1122 m->mothurOut(it->first + " is in your fasta file and not in your quality file, not using quality file."); m->mothurOutEndLine();
1128 //get last file position of qfile
1130 unsigned long long size;
1132 //get num bytes in file
1133 pFile = fopen (qfilename.c_str(),"rb");
1134 if (pFile==NULL) perror ("Error opening file");
1136 fseek (pFile, 0, SEEK_END);
1141 qfileFilePos.push_back(size);
1144 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
1145 lines.push_back(linePair(fastaFilePos[i], fastaFilePos[(i+1)]));
1146 if (qfilename != "") { qLines.push_back(linePair(qfileFilePos[i], qfileFilePos[(i+1)])); }
1148 if(qfilename == "") { qLines = lines; } //files with duds
1154 if (processors == 1) { //save time
1155 //fastaFilePos.push_back(0); qfileFilePos.push_back(0);
1156 //fastaFilePos.push_back(1000); qfileFilePos.push_back(1000);
1157 lines.push_back(linePair(0, 1000));
1158 if (qfilename != "") { qLines.push_back(linePair(0, 1000)); }
1160 int numFastaSeqs = 0;
1161 fastaFilePos = m->setFilePosFasta(filename, numFastaSeqs);
1162 if (fastaFilePos.size() < processors) { processors = fastaFilePos.size(); }
1164 if (qfilename != "") {
1165 int numQualSeqs = 0;
1166 qfileFilePos = m->setFilePosFasta(qfilename, numQualSeqs);
1168 if (numFastaSeqs != numQualSeqs) {
1169 m->mothurOut("[ERROR]: You have " + toString(numFastaSeqs) + " sequences in your fasta file, but " + toString(numQualSeqs) + " sequences in your quality file."); m->mothurOutEndLine(); m->control_pressed = true;
1173 //figure out how many sequences you have to process
1174 int numSeqsPerProcessor = numFastaSeqs / processors;
1175 for (int i = 0; i < processors; i++) {
1176 int startIndex = i * numSeqsPerProcessor;
1177 if(i == (processors - 1)){ numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor; }
1178 lines.push_back(linePair(fastaFilePos[startIndex], numSeqsPerProcessor));
1179 cout << fastaFilePos[startIndex] << '\t' << numSeqsPerProcessor << endl;
1180 if (qfilename != "") { qLines.push_back(linePair(qfileFilePos[startIndex], numSeqsPerProcessor)); }
1183 if(qfilename == "") { qLines = lines; } //files with duds
1189 catch(exception& e) {
1190 m->errorOut(e, "TrimSeqsCommand", "setLines");
1195 //***************************************************************************************************************
1197 bool TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<vector<string> >& qualFileNames, vector<vector<string> >& nameFileNames){
1200 m->openInputFile(oligoFile, inOligos);
1204 string type, oligo, group;
1206 int indexPrimer = 0;
1207 int indexBarcode = 0;
1209 while(!inOligos.eof()){
1214 while (!inOligos.eof()) { char c = inOligos.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there
1215 m->gobble(inOligos);
1218 m->gobble(inOligos);
1219 //make type case insensitive
1220 for(int i=0;i<type.length();i++){ type[i] = toupper(type[i]); }
1224 for(int i=0;i<oligo.length();i++){
1225 oligo[i] = toupper(oligo[i]);
1226 if(oligo[i] == 'U') { oligo[i] = 'T'; }
1229 if(type == "FORWARD"){
1232 // get rest of line in case there is a primer name
1233 while (!inOligos.eof()) {
1234 char c = inOligos.get();
1235 if (c == 10 || c == 13){ break; }
1236 else if (c == 32 || c == 9){;} //space or tab
1237 else { group += c; }
1240 //check for repeat barcodes
1241 map<string, int>::iterator itPrime = primers.find(oligo);
1242 if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); }
1244 primers[oligo]=indexPrimer; indexPrimer++;
1245 primerNameVector.push_back(group);
1247 else if(type == "REVERSE"){
1248 //Sequence oligoRC("reverse", oligo);
1249 //oligoRC.reverseComplement();
1250 string oligoRC = reverseOligo(oligo);
1251 revPrimer.push_back(oligoRC);
1253 else if(type == "BARCODE"){
1256 //check for repeat barcodes
1257 map<string, int>::iterator itBar = barcodes.find(oligo);
1258 if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); }
1260 barcodes[oligo]=indexBarcode; indexBarcode++;
1261 barcodeNameVector.push_back(group);
1262 }else if(type == "LINKER"){
1263 linker.push_back(oligo);
1264 }else if(type == "SPACER"){
1265 spacer.push_back(oligo);
1267 else{ m->mothurOut(type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); }
1269 m->gobble(inOligos);
1273 if(barcodeNameVector.size() == 0 && primerNameVector[0] == ""){ allFiles = 0; }
1275 //add in potential combos
1276 if(barcodeNameVector.size() == 0){
1278 barcodeNameVector.push_back("");
1281 if(primerNameVector.size() == 0){
1283 primerNameVector.push_back("");
1286 fastaFileNames.resize(barcodeNameVector.size());
1287 for(int i=0;i<fastaFileNames.size();i++){
1288 fastaFileNames[i].assign(primerNameVector.size(), "");
1290 if(qFileName != "") { qualFileNames = fastaFileNames; }
1291 if(nameFile != "") { nameFileNames = fastaFileNames; }
1294 set<string> uniqueNames; //used to cleanup outputFileNames
1295 for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
1296 for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
1298 string primerName = primerNameVector[itPrimer->second];
1299 string barcodeName = barcodeNameVector[itBar->second];
1301 string comboGroupName = "";
1302 string fastaFileName = "";
1303 string qualFileName = "";
1304 string nameFileName = "";
1306 if(primerName == ""){
1307 comboGroupName = barcodeNameVector[itBar->second];
1310 if(barcodeName == ""){
1311 comboGroupName = primerNameVector[itPrimer->second];
1314 comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second];
1320 fastaFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + comboGroupName + ".fasta";
1321 if (uniqueNames.count(fastaFileName) == 0) {
1322 outputNames.push_back(fastaFileName);
1323 outputTypes["fasta"].push_back(fastaFileName);
1324 uniqueNames.insert(fastaFileName);
1327 fastaFileNames[itBar->second][itPrimer->second] = fastaFileName;
1328 m->openOutputFile(fastaFileName, temp); temp.close();
1330 if(qFileName != ""){
1331 qualFileName = outputDir + m->getRootName(m->getSimpleName(qFileName)) + comboGroupName + ".qual";
1332 if (uniqueNames.count(qualFileName) == 0) {
1333 outputNames.push_back(qualFileName);
1334 outputTypes["qfile"].push_back(qualFileName);
1337 qualFileNames[itBar->second][itPrimer->second] = qualFileName;
1338 m->openOutputFile(qualFileName, temp); temp.close();
1342 nameFileName = outputDir + m->getRootName(m->getSimpleName(nameFile)) + comboGroupName + ".names";
1343 if (uniqueNames.count(nameFileName) == 0) {
1344 outputNames.push_back(nameFileName);
1345 outputTypes["name"].push_back(nameFileName);
1348 nameFileNames[itBar->second][itPrimer->second] = nameFileName;
1349 m->openOutputFile(nameFileName, temp); temp.close();
1355 numFPrimers = primers.size();
1356 numRPrimers = revPrimer.size();
1357 numLinkers = linker.size();
1358 numSpacers = spacer.size();
1360 bool allBlank = true;
1361 for (int i = 0; i < barcodeNameVector.size(); i++) {
1362 if (barcodeNameVector[i] != "") {
1367 for (int i = 0; i < primerNameVector.size(); i++) {
1368 if (primerNameVector[i] != "") {
1375 m->mothurOut("[WARNING]: your oligos file does not contain any group names. mothur will not create a groupfile."); m->mothurOutEndLine();
1383 catch(exception& e) {
1384 m->errorOut(e, "TrimSeqsCommand", "getOligos");
1388 //***************************************************************************************************************
1390 bool TrimSeqsCommand::keepFirstTrim(Sequence& sequence, QualityScores& qscores){
1393 if(qscores.getName() != ""){
1394 qscores.trimQScores(-1, keepFirst);
1396 sequence.trim(keepFirst);
1399 catch(exception& e) {
1400 m->errorOut(e, "keepFirstTrim", "countDiffs");
1406 //***************************************************************************************************************
1408 bool TrimSeqsCommand::removeLastTrim(Sequence& sequence, QualityScores& qscores){
1412 int length = sequence.getNumBases() - removeLast;
1415 if(qscores.getName() != ""){
1416 qscores.trimQScores(-1, length);
1418 sequence.trim(length);
1427 catch(exception& e) {
1428 m->errorOut(e, "removeLastTrim", "countDiffs");
1434 //***************************************************************************************************************
1436 bool TrimSeqsCommand::cullLength(Sequence& seq){
1439 int length = seq.getNumBases();
1440 bool success = 0; //guilty until proven innocent
1442 if(length >= minLength && maxLength == 0) { success = 1; }
1443 else if(length >= minLength && length <= maxLength) { success = 1; }
1444 else { success = 0; }
1449 catch(exception& e) {
1450 m->errorOut(e, "TrimSeqsCommand", "cullLength");
1456 //***************************************************************************************************************
1458 bool TrimSeqsCommand::cullHomoP(Sequence& seq){
1460 int longHomoP = seq.getLongHomoPolymer();
1461 bool success = 0; //guilty until proven innocent
1463 if(longHomoP <= maxHomoP){ success = 1; }
1464 else { success = 0; }
1468 catch(exception& e) {
1469 m->errorOut(e, "TrimSeqsCommand", "cullHomoP");
1474 //********************************************************************/
1475 string TrimSeqsCommand::reverseOligo(string oligo){
1477 string reverse = "";
1479 for(int i=oligo.length()-1;i>=0;i--){
1481 if(oligo[i] == 'A') { reverse += 'T'; }
1482 else if(oligo[i] == 'T'){ reverse += 'A'; }
1483 else if(oligo[i] == 'U'){ reverse += 'A'; }
1485 else if(oligo[i] == 'G'){ reverse += 'C'; }
1486 else if(oligo[i] == 'C'){ reverse += 'G'; }
1488 else if(oligo[i] == 'R'){ reverse += 'Y'; }
1489 else if(oligo[i] == 'Y'){ reverse += 'R'; }
1491 else if(oligo[i] == 'M'){ reverse += 'K'; }
1492 else if(oligo[i] == 'K'){ reverse += 'M'; }
1494 else if(oligo[i] == 'W'){ reverse += 'W'; }
1495 else if(oligo[i] == 'S'){ reverse += 'S'; }
1497 else if(oligo[i] == 'B'){ reverse += 'V'; }
1498 else if(oligo[i] == 'V'){ reverse += 'B'; }
1500 else if(oligo[i] == 'D'){ reverse += 'H'; }
1501 else if(oligo[i] == 'H'){ reverse += 'D'; }
1503 else { reverse += 'N'; }
1509 catch(exception& e) {
1510 m->errorOut(e, "TrimSeqsCommand", "reverseOligo");
1515 //***************************************************************************************************************
1517 bool TrimSeqsCommand::cullAmbigs(Sequence& seq){
1519 int numNs = seq.getAmbigBases();
1520 bool success = 0; //guilty until proven innocent
1522 if(numNs <= maxAmbig) { success = 1; }
1523 else { success = 0; }
1527 catch(exception& e) {
1528 m->errorOut(e, "TrimSeqsCommand", "cullAmbigs");
1533 //***************************************************************************************************************