5 * Created by Pat Schloss on 6/6/09.
6 * Copyright 2009 Patrick D. Schloss. All rights reserved.
10 #include "trimseqscommand.h"
11 #include "needlemanoverlap.hpp"
12 #include "trimoligos.h"
14 //**********************************************************************************************************************
15 vector<string> TrimSeqsCommand::setParameters(){
17 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
18 CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(poligos);
19 CommandParameter pqfile("qfile", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pqfile);
20 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
21 CommandParameter pflip("flip", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pflip);
22 CommandParameter pmaxambig("maxambig", "Number", "", "-1", "", "", "",false,false); parameters.push_back(pmaxambig);
23 CommandParameter pmaxhomop("maxhomop", "Number", "", "0", "", "", "",false,false); parameters.push_back(pmaxhomop);
24 CommandParameter pminlength("minlength", "Number", "", "0", "", "", "",false,false); parameters.push_back(pminlength);
25 CommandParameter pmaxlength("maxlength", "Number", "", "0", "", "", "",false,false); parameters.push_back(pmaxlength);
26 CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ppdiffs);
27 CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pbdiffs);
28 CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pldiffs);
29 CommandParameter psdiffs("sdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(psdiffs);
30 CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ptdiffs);
31 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
32 CommandParameter pallfiles("allfiles", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pallfiles);
33 CommandParameter pkeepforward("keepforward", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pkeepforward);
34 CommandParameter pqtrim("qtrim", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pqtrim);
35 CommandParameter pqthreshold("qthreshold", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqthreshold);
36 CommandParameter pqaverage("qaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqaverage);
37 CommandParameter prollaverage("rollaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(prollaverage);
38 CommandParameter pqwindowaverage("qwindowaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqwindowaverage);
39 CommandParameter pqstepsize("qstepsize", "Number", "", "1", "", "", "",false,false); parameters.push_back(pqstepsize);
40 CommandParameter pqwindowsize("qwindowsize", "Number", "", "50", "", "", "",false,false); parameters.push_back(pqwindowsize);
41 CommandParameter pkeepfirst("keepfirst", "Number", "", "0", "", "", "",false,false); parameters.push_back(pkeepfirst);
42 CommandParameter premovelast("removelast", "Number", "", "0", "", "", "",false,false); parameters.push_back(premovelast);
43 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
44 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
46 vector<string> myArray;
47 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
51 m->errorOut(e, "TrimSeqsCommand", "setParameters");
55 //**********************************************************************************************************************
56 string TrimSeqsCommand::getHelpString(){
58 string helpString = "";
59 helpString += "The trim.seqs command reads a fastaFile and creates 2 new fasta files, .trim.fasta and scrap.fasta, as well as group files if you provide and oligos file.\n";
60 helpString += "The .trim.fasta contains sequences that meet your requirements, and the .scrap.fasta contains those which don't.\n";
61 helpString += "The trim.seqs command parameters are fasta, name, flip, oligos, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim, keepfirst, removelast and allfiles.\n";
62 helpString += "The fasta parameter is required.\n";
63 helpString += "The flip parameter will output the reverse compliment of your trimmed sequence. The default is false.\n";
64 helpString += "The oligos parameter allows you to provide an oligos file.\n";
65 helpString += "The name parameter allows you to provide a names file with your fasta file.\n";
66 helpString += "The maxambig parameter allows you to set the maximum number of ambigious bases allowed. The default is -1.\n";
67 helpString += "The maxhomop parameter allows you to set a maximum homopolymer length. \n";
68 helpString += "The minlength parameter allows you to set and minimum sequence length. \n";
69 helpString += "The maxlength parameter allows you to set and maximum sequence length. \n";
70 helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n";
71 helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";
72 helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
73 helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n";
74 helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n";
75 helpString += "The qfile parameter allows you to provide a quality file.\n";
76 helpString += "The qthreshold parameter allows you to set a minimum quality score allowed. \n";
77 helpString += "The qaverage parameter allows you to set a minimum average quality score allowed. \n";
78 helpString += "The qwindowsize parameter allows you to set a number of bases in a window. Default=50.\n";
79 helpString += "The qwindowaverage parameter allows you to set a minimum average quality score allowed over a window. \n";
80 helpString += "The rollaverage parameter allows you to set a minimum rolling average quality score allowed over a window. \n";
81 helpString += "The qstepsize parameter allows you to set a number of bases to move the window over. Default=1.\n";
82 helpString += "The allfiles parameter will create separate group and fasta file for each grouping. The default is F.\n";
83 helpString += "The keepforward parameter allows you to indicate whether you want the forward primer removed or not. The default is F, meaning remove the forward primer.\n";
84 helpString += "The qtrim parameter will trim sequence from the point that they fall below the qthreshold and put it in the .trim file if set to true. The default is T.\n";
85 helpString += "The keepfirst parameter trims the sequence to the first keepfirst number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements. \n";
86 helpString += "The removelast removes the last removelast number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements.\n";
87 helpString += "The trim.seqs command should be in the following format: \n";
88 helpString += "trim.seqs(fasta=yourFastaFile, flip=yourFlip, oligos=yourOligos, maxambig=yourMaxambig, \n";
89 helpString += "maxhomop=yourMaxhomop, minlength=youMinlength, maxlength=yourMaxlength) \n";
90 helpString += "Example trim.seqs(fasta=abrecovery.fasta, flip=..., oligos=..., maxambig=..., maxhomop=..., minlength=..., maxlength=...).\n";
91 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
92 helpString += "For more details please check out the wiki http://www.mothur.org/wiki/Trim.seqs .\n";
96 m->errorOut(e, "TrimSeqsCommand", "getHelpString");
102 //**********************************************************************************************************************
104 TrimSeqsCommand::TrimSeqsCommand(){
106 abort = true; calledHelp = true;
108 vector<string> tempOutNames;
109 outputTypes["fasta"] = tempOutNames;
110 outputTypes["qfile"] = tempOutNames;
111 outputTypes["group"] = tempOutNames;
112 outputTypes["name"] = tempOutNames;
114 catch(exception& e) {
115 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
119 //***************************************************************************************************************
121 TrimSeqsCommand::TrimSeqsCommand(string option) {
124 abort = false; calledHelp = false;
127 //allow user to run help
128 if(option == "help") { help(); abort = true; calledHelp = true; }
129 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
132 vector<string> myArray = setParameters();
134 OptionParser parser(option);
135 map<string,string> parameters = parser.getParameters();
137 ValidParameters validParameter;
138 map<string,string>::iterator it;
140 //check to make sure all parameters are valid for command
141 for (it = parameters.begin(); it != parameters.end(); it++) {
142 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
145 //initialize outputTypes
146 vector<string> tempOutNames;
147 outputTypes["fasta"] = tempOutNames;
148 outputTypes["qfile"] = tempOutNames;
149 outputTypes["group"] = tempOutNames;
150 outputTypes["name"] = tempOutNames;
152 //if the user changes the input directory command factory will send this info to us in the output parameter
153 string inputDir = validParameter.validFile(parameters, "inputdir", false);
154 if (inputDir == "not found"){ inputDir = ""; }
157 it = parameters.find("fasta");
158 //user has given a template file
159 if(it != parameters.end()){
160 path = m->hasPath(it->second);
161 //if the user has not given a path then, add inputdir. else leave path alone.
162 if (path == "") { parameters["fasta"] = inputDir + it->second; }
165 it = parameters.find("oligos");
166 //user has given a template file
167 if(it != parameters.end()){
168 path = m->hasPath(it->second);
169 //if the user has not given a path then, add inputdir. else leave path alone.
170 if (path == "") { parameters["oligos"] = inputDir + it->second; }
173 it = parameters.find("qfile");
174 //user has given a template file
175 if(it != parameters.end()){
176 path = m->hasPath(it->second);
177 //if the user has not given a path then, add inputdir. else leave path alone.
178 if (path == "") { parameters["qfile"] = inputDir + it->second; }
181 it = parameters.find("name");
182 //user has given a template file
183 if(it != parameters.end()){
184 path = m->hasPath(it->second);
185 //if the user has not given a path then, add inputdir. else leave path alone.
186 if (path == "") { parameters["name"] = inputDir + it->second; }
192 //check for required parameters
193 fastaFile = validParameter.validFile(parameters, "fasta", true);
194 if (fastaFile == "not found") {
195 fastaFile = m->getFastaFile();
196 if (fastaFile != "") { m->mothurOut("Using " + fastaFile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
197 else { m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
198 }else if (fastaFile == "not open") { abort = true; }
199 else { m->setFastaFile(fastaFile); }
201 //if the user changes the output directory command factory will send this info to us in the output parameter
202 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
204 outputDir += m->hasPath(fastaFile); //if user entered a file with a path then preserve it
208 //check for optional parameter and set defaults
209 // ...at some point should added some additional type checking...
211 temp = validParameter.validFile(parameters, "flip", false);
212 if (temp == "not found") { flip = 0; }
213 else { flip = m->isTrue(temp); }
215 temp = validParameter.validFile(parameters, "oligos", true);
216 if (temp == "not found"){ oligoFile = ""; }
217 else if(temp == "not open"){ abort = true; }
218 else { oligoFile = temp; m->setOligosFile(oligoFile); }
221 temp = validParameter.validFile(parameters, "maxambig", false); if (temp == "not found") { temp = "-1"; }
222 m->mothurConvert(temp, maxAmbig);
224 temp = validParameter.validFile(parameters, "maxhomop", false); if (temp == "not found") { temp = "0"; }
225 m->mothurConvert(temp, maxHomoP);
227 temp = validParameter.validFile(parameters, "minlength", false); if (temp == "not found") { temp = "0"; }
228 m->mothurConvert(temp, minLength);
230 temp = validParameter.validFile(parameters, "maxlength", false); if (temp == "not found") { temp = "0"; }
231 m->mothurConvert(temp, maxLength);
233 temp = validParameter.validFile(parameters, "bdiffs", false); if (temp == "not found") { temp = "0"; }
234 m->mothurConvert(temp, bdiffs);
236 temp = validParameter.validFile(parameters, "pdiffs", false); if (temp == "not found") { temp = "0"; }
237 m->mothurConvert(temp, pdiffs);
239 temp = validParameter.validFile(parameters, "ldiffs", false); if (temp == "not found") { temp = "0"; }
240 m->mothurConvert(temp, ldiffs);
242 temp = validParameter.validFile(parameters, "sdiffs", false); if (temp == "not found") { temp = "0"; }
243 m->mothurConvert(temp, sdiffs);
245 temp = validParameter.validFile(parameters, "tdiffs", false); if (temp == "not found") { int tempTotal = pdiffs + bdiffs + ldiffs + sdiffs; temp = toString(tempTotal); }
246 m->mothurConvert(temp, tdiffs);
248 if(tdiffs == 0){ tdiffs = bdiffs + pdiffs + ldiffs + sdiffs; }
250 temp = validParameter.validFile(parameters, "qfile", true);
251 if (temp == "not found") { qFileName = ""; }
252 else if(temp == "not open") { abort = true; }
253 else { qFileName = temp; m->setQualFile(qFileName); }
255 temp = validParameter.validFile(parameters, "name", true);
256 if (temp == "not found") { nameFile = ""; }
257 else if(temp == "not open") { nameFile = ""; abort = true; }
258 else { nameFile = temp; m->setNameFile(nameFile); }
260 temp = validParameter.validFile(parameters, "qthreshold", false); if (temp == "not found") { temp = "0"; }
261 m->mothurConvert(temp, qThreshold);
263 temp = validParameter.validFile(parameters, "qtrim", false); if (temp == "not found") { temp = "t"; }
264 qtrim = m->isTrue(temp);
266 temp = validParameter.validFile(parameters, "rollaverage", false); if (temp == "not found") { temp = "0"; }
267 convert(temp, qRollAverage);
269 temp = validParameter.validFile(parameters, "qwindowaverage", false);if (temp == "not found") { temp = "0"; }
270 convert(temp, qWindowAverage);
272 temp = validParameter.validFile(parameters, "qwindowsize", false); if (temp == "not found") { temp = "50"; }
273 convert(temp, qWindowSize);
275 temp = validParameter.validFile(parameters, "qstepsize", false); if (temp == "not found") { temp = "1"; }
276 convert(temp, qWindowStep);
278 temp = validParameter.validFile(parameters, "qaverage", false); if (temp == "not found") { temp = "0"; }
279 convert(temp, qAverage);
281 temp = validParameter.validFile(parameters, "keepfirst", false); if (temp == "not found") { temp = "0"; }
282 convert(temp, keepFirst);
284 temp = validParameter.validFile(parameters, "removelast", false); if (temp == "not found") { temp = "0"; }
285 convert(temp, removeLast);
287 temp = validParameter.validFile(parameters, "allfiles", false); if (temp == "not found") { temp = "F"; }
288 allFiles = m->isTrue(temp);
290 temp = validParameter.validFile(parameters, "keepforward", false); if (temp == "not found") { temp = "F"; }
291 keepforward = m->isTrue(temp);
293 temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
294 m->setProcessors(temp);
295 m->mothurConvert(temp, processors);
298 if(allFiles && (oligoFile == "")){
299 m->mothurOut("You selected allfiles, but didn't enter an oligos. Ignoring the allfiles request."); m->mothurOutEndLine();
301 if((qAverage != 0 && qThreshold != 0) && qFileName == ""){
302 m->mothurOut("You didn't provide a quality file name, quality criteria will be ignored."); m->mothurOutEndLine();
306 if(!flip && oligoFile=="" && !maxLength && !minLength && (maxAmbig==-1) && !maxHomoP && qFileName == ""){
307 m->mothurOut("You didn't set any options... quiting command."); m->mothurOutEndLine();
311 if (nameFile == "") {
312 vector<string> files; files.push_back(fastaFile);
313 parser.getNameFile(files);
318 catch(exception& e) {
319 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
323 //***************************************************************************************************************
325 int TrimSeqsCommand::execute(){
328 if (abort == true) { if (calledHelp) { return 0; } return 2; }
330 numFPrimers = 0; //this needs to be initialized
335 vector<vector<string> > fastaFileNames;
336 vector<vector<string> > qualFileNames;
337 vector<vector<string> > nameFileNames;
339 string trimSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.fasta";
340 outputNames.push_back(trimSeqFile); outputTypes["fasta"].push_back(trimSeqFile);
342 string scrapSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.fasta";
343 outputNames.push_back(scrapSeqFile); outputTypes["fasta"].push_back(scrapSeqFile);
345 string trimQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.qual";
346 string scrapQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.qual";
348 if (qFileName != "") {
349 outputNames.push_back(trimQualFile);
350 outputNames.push_back(scrapQualFile);
351 outputTypes["qfile"].push_back(trimQualFile);
352 outputTypes["qfile"].push_back(scrapQualFile);
355 string trimNameFile = outputDir + m->getRootName(m->getSimpleName(nameFile)) + "trim.names";
356 string scrapNameFile = outputDir + m->getRootName(m->getSimpleName(nameFile)) + "scrap.names";
358 if (nameFile != "") {
359 m->readNames(nameFile, nameMap);
360 outputNames.push_back(trimNameFile);
361 outputNames.push_back(scrapNameFile);
362 outputTypes["name"].push_back(trimNameFile);
363 outputTypes["name"].push_back(scrapNameFile);
366 if (m->control_pressed) { return 0; }
368 string outputGroupFileName;
370 createGroup = getOligos(fastaFileNames, qualFileNames, nameFileNames);
372 outputGroupFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "groups";
373 outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName);
377 //fills lines and qlines
378 setLines(fastaFile, qFileName);
381 driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
383 createProcessesCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames);
387 if (m->control_pressed) { return 0; }
390 map<string, string> uniqueFastaNames;// so we don't add the same groupfile multiple times
391 map<string, string>::iterator it;
392 set<string> namesToRemove;
393 for(int i=0;i<fastaFileNames.size();i++){
394 for(int j=0;j<fastaFileNames[0].size();j++){
395 if (fastaFileNames[i][j] != "") {
396 if (namesToRemove.count(fastaFileNames[i][j]) == 0) {
397 if(m->isBlank(fastaFileNames[i][j])){
398 m->mothurRemove(fastaFileNames[i][j]);
399 namesToRemove.insert(fastaFileNames[i][j]);
402 m->mothurRemove(qualFileNames[i][j]);
403 namesToRemove.insert(qualFileNames[i][j]);
407 m->mothurRemove(nameFileNames[i][j]);
408 namesToRemove.insert(nameFileNames[i][j]);
411 it = uniqueFastaNames.find(fastaFileNames[i][j]);
412 if (it == uniqueFastaNames.end()) {
413 uniqueFastaNames[fastaFileNames[i][j]] = barcodeNameVector[i];
421 //remove names for outputFileNames, just cleans up the output
422 vector<string> outputNames2;
423 for(int i = 0; i < outputNames.size(); i++) { if (namesToRemove.count(outputNames[i]) == 0) { outputNames2.push_back(outputNames[i]); } }
424 outputNames = outputNames2;
426 for (it = uniqueFastaNames.begin(); it != uniqueFastaNames.end(); it++) {
428 m->openInputFile(it->first, in);
431 string thisGroupName = outputDir + m->getRootName(m->getSimpleName(it->first)) + "groups";
432 outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName);
433 m->openOutputFile(thisGroupName, out);
436 if (m->control_pressed) { break; }
438 Sequence currSeq(in); m->gobble(in);
439 out << currSeq.getName() << '\t' << it->second << endl;
441 if (nameFile != "") {
442 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
443 if (itName != nameMap.end()) {
444 vector<string> thisSeqsNames;
445 m->splitAtChar(itName->second, thisSeqsNames, ',');
446 for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self
447 out << thisSeqsNames[k] << '\t' << it->second << endl;
449 }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
457 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
459 //output group counts
460 m->mothurOutEndLine();
462 if (groupCounts.size() != 0) { m->mothurOut("Group count: \n"); }
463 for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
464 total += it->second; m->mothurOut(it->first + "\t" + toString(it->second)); m->mothurOutEndLine();
466 if (total != 0) { m->mothurOut("Total of all groups is " + toString(total)); m->mothurOutEndLine(); }
468 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
470 //set fasta file as new current fastafile
472 itTypes = outputTypes.find("fasta");
473 if (itTypes != outputTypes.end()) {
474 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
477 itTypes = outputTypes.find("name");
478 if (itTypes != outputTypes.end()) {
479 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
482 itTypes = outputTypes.find("qfile");
483 if (itTypes != outputTypes.end()) {
484 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); }
487 itTypes = outputTypes.find("group");
488 if (itTypes != outputTypes.end()) {
489 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
492 m->mothurOutEndLine();
493 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
494 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
495 m->mothurOutEndLine();
500 catch(exception& e) {
501 m->errorOut(e, "TrimSeqsCommand", "execute");
506 /**************************************************************************************/
508 int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string trimFileName, string scrapFileName, string trimQFileName, string scrapQFileName, string trimNFileName, string scrapNFileName, string groupFileName, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames, linePair line, linePair qline) {
512 ofstream trimFASTAFile;
513 m->openOutputFile(trimFileName, trimFASTAFile);
515 ofstream scrapFASTAFile;
516 m->openOutputFile(scrapFileName, scrapFASTAFile);
518 ofstream trimQualFile;
519 ofstream scrapQualFile;
521 m->openOutputFile(trimQFileName, trimQualFile);
522 m->openOutputFile(scrapQFileName, scrapQualFile);
525 ofstream trimNameFile;
526 ofstream scrapNameFile;
528 m->openOutputFile(trimNFileName, trimNameFile);
529 m->openOutputFile(scrapNFileName, scrapNameFile);
533 ofstream outGroupsFile;
534 if (createGroup){ m->openOutputFile(groupFileName, outGroupsFile); }
536 for (int i = 0; i < fastaFileNames.size(); i++) { //clears old file
537 for (int j = 0; j < fastaFileNames[i].size(); j++) { //clears old file
538 if (fastaFileNames[i][j] != "") {
540 m->openOutputFile(fastaFileNames[i][j], temp); temp.close();
542 m->openOutputFile(qualFileNames[i][j], temp); temp.close();
546 m->openOutputFile(nameFileNames[i][j], temp); temp.close();
554 m->openInputFile(filename, inFASTA);
555 inFASTA.seekg(line.start);
558 if(qFileName != "") {
559 m->openInputFile(qFileName, qFile);
560 qFile.seekg(qline.start);
565 TrimOligos trimOligos(pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, revPrimer, linker, spacer);
569 if (m->control_pressed) {
570 inFASTA.close(); trimFASTAFile.close(); scrapFASTAFile.close();
571 if (createGroup) { outGroupsFile.close(); }
576 for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); }
582 string trashCode = "";
583 int currentSeqsDiffs = 0;
585 Sequence currSeq(inFASTA); m->gobble(inFASTA);
586 //cout << currSeq.getName() << '\t' << currSeq.getUnaligned().length() << endl;
587 QualityScores currQual;
589 currQual = QualityScores(qFile); m->gobble(qFile);
592 string origSeq = currSeq.getUnaligned();
595 int barcodeIndex = 0;
599 success = trimOligos.stripLinker(currSeq, currQual);
600 if(success > ldiffs) { trashCode += 'k'; }
601 else{ currentSeqsDiffs += success; }
605 if(barcodes.size() != 0){
606 success = trimOligos.stripBarcode(currSeq, currQual, barcodeIndex);
607 if(success > bdiffs) { trashCode += 'b'; }
608 else{ currentSeqsDiffs += success; }
612 success = trimOligos.stripSpacer(currSeq, currQual);
613 if(success > sdiffs) { trashCode += 's'; }
614 else{ currentSeqsDiffs += success; }
618 if(numFPrimers != 0){
619 success = trimOligos.stripForward(currSeq, currQual, primerIndex, keepforward);
620 if(success > pdiffs) { trashCode += 'f'; }
621 else{ currentSeqsDiffs += success; }
624 if (currentSeqsDiffs > tdiffs) { trashCode += 't'; }
626 if(numRPrimers != 0){
627 success = trimOligos.stripReverse(currSeq, currQual);
628 if(!success) { trashCode += 'r'; }
632 success = keepFirstTrim(currSeq, currQual);
636 success = removeLastTrim(currSeq, currQual);
637 if(!success) { trashCode += 'l'; }
642 int origLength = currSeq.getNumBases();
644 if(qThreshold != 0) { success = currQual.stripQualThreshold(currSeq, qThreshold); }
645 else if(qAverage != 0) { success = currQual.cullQualAverage(currSeq, qAverage); }
646 else if(qRollAverage != 0) { success = currQual.stripQualRollingAverage(currSeq, qRollAverage); }
647 else if(qWindowAverage != 0){ success = currQual.stripQualWindowAverage(currSeq, qWindowStep, qWindowSize, qWindowAverage); }
648 else { success = 1; }
650 //you don't want to trim, if it fails above then scrap it
651 if ((!qtrim) && (origLength != currSeq.getNumBases())) { success = 0; }
653 if(!success) { trashCode += 'q'; }
656 if(minLength > 0 || maxLength > 0){
657 success = cullLength(currSeq);
658 if(!success) { trashCode += 'l'; }
661 success = cullHomoP(currSeq);
662 if(!success) { trashCode += 'h'; }
665 success = cullAmbigs(currSeq);
666 if(!success) { trashCode += 'n'; }
669 if(flip){ // should go last
670 currSeq.reverseComplement();
672 currQual.flipQScores();
676 if(trashCode.length() == 0){
677 currSeq.setAligned(currSeq.getUnaligned());
678 currSeq.printSequence(trimFASTAFile);
681 currQual.printQScores(trimQualFile);
685 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
686 if (itName != nameMap.end()) { trimNameFile << itName->first << '\t' << itName->second << endl; }
687 else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
691 if(barcodes.size() != 0){
692 string thisGroup = barcodeNameVector[barcodeIndex];
693 if (primers.size() != 0) {
694 if (primerNameVector[primerIndex] != "") {
695 if(thisGroup != "") {
696 thisGroup += "." + primerNameVector[primerIndex];
698 thisGroup = primerNameVector[primerIndex];
703 outGroupsFile << currSeq.getName() << '\t' << thisGroup << endl;
705 if (nameFile != "") {
706 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
707 if (itName != nameMap.end()) {
708 vector<string> thisSeqsNames;
709 m->splitAtChar(itName->second, thisSeqsNames, ',');
710 for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self
711 outGroupsFile << thisSeqsNames[k] << '\t' << thisGroup << endl;
713 }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
716 map<string, int>::iterator it = groupCounts.find(thisGroup);
717 if (it == groupCounts.end()) { groupCounts[thisGroup] = 1; }
718 else { groupCounts[it->first]++; }
725 m->openOutputFileAppend(fastaFileNames[barcodeIndex][primerIndex], output);
726 currSeq.printSequence(output);
730 m->openOutputFileAppend(qualFileNames[barcodeIndex][primerIndex], output);
731 currQual.printQScores(output);
736 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
737 if (itName != nameMap.end()) {
738 m->openOutputFileAppend(nameFileNames[barcodeIndex][primerIndex], output);
739 output << itName->first << '\t' << itName->second << endl;
741 }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
746 if(nameFile != ""){ //needs to be before the currSeq name is changed
747 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
748 if (itName != nameMap.end()) { scrapNameFile << itName->first << '\t' << itName->second << endl; }
749 else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
751 currSeq.setName(currSeq.getName() + '|' + trashCode);
752 currSeq.setUnaligned(origSeq);
753 currSeq.setAligned(origSeq);
754 currSeq.printSequence(scrapFASTAFile);
756 currQual.printQScores(scrapQualFile);
762 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
763 unsigned long long pos = inFASTA.tellg();
764 if ((pos == -1) || (pos >= line.end)) { break; }
767 if (inFASTA.eof()) { break; }
771 if((count) % 1000 == 0){ m->mothurOut(toString(count)); m->mothurOutEndLine(); }
775 if((count) % 1000 != 0){ m->mothurOut(toString(count)); m->mothurOutEndLine(); }
779 trimFASTAFile.close();
780 scrapFASTAFile.close();
781 if (createGroup) { outGroupsFile.close(); }
782 if(qFileName != "") { qFile.close(); scrapQualFile.close(); trimQualFile.close(); }
783 if(nameFile != "") { scrapNameFile.close(); trimNameFile.close(); }
787 catch(exception& e) {
788 m->errorOut(e, "TrimSeqsCommand", "driverCreateTrim");
793 /**************************************************************************************************/
795 int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName, string trimFASTAFileName, string scrapFASTAFileName, string trimQualFileName, string scrapQualFileName, string trimNameFileName, string scrapNameFileName, string groupFile, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames) {
802 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
803 //loop through and create all the processes you want
804 while (process != processors) {
808 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
812 vector<vector<string> > tempFASTAFileNames = fastaFileNames;
813 vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
814 vector<vector<string> > tempNameFileNames = nameFileNames;
819 for(int i=0;i<tempFASTAFileNames.size();i++){
820 for(int j=0;j<tempFASTAFileNames[i].size();j++){
821 if (tempFASTAFileNames[i][j] != "") {
822 tempFASTAFileNames[i][j] += toString(getpid()) + ".temp";
823 m->openOutputFile(tempFASTAFileNames[i][j], temp); temp.close();
826 tempPrimerQualFileNames[i][j] += toString(getpid()) + ".temp";
827 m->openOutputFile(tempPrimerQualFileNames[i][j], temp); temp.close();
830 tempNameFileNames[i][j] += toString(getpid()) + ".temp";
831 m->openOutputFile(tempNameFileNames[i][j], temp); temp.close();
838 driverCreateTrim(filename,
840 (trimFASTAFileName + toString(getpid()) + ".temp"),
841 (scrapFASTAFileName + toString(getpid()) + ".temp"),
842 (trimQualFileName + toString(getpid()) + ".temp"),
843 (scrapQualFileName + toString(getpid()) + ".temp"),
844 (trimNameFileName + toString(getpid()) + ".temp"),
845 (scrapNameFileName + toString(getpid()) + ".temp"),
846 (groupFile + toString(getpid()) + ".temp"),
848 tempPrimerQualFileNames,
853 //pass groupCounts to parent
856 string tempFile = filename + toString(getpid()) + ".num.temp";
857 m->openOutputFile(tempFile, out);
859 out << groupCounts.size() << endl;
861 for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
862 out << it->first << '\t' << it->second << endl;
868 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
869 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
876 m->openOutputFile(trimFASTAFileName, temp); temp.close();
877 m->openOutputFile(scrapFASTAFileName, temp); temp.close();
879 m->openOutputFile(trimQualFileName, temp); temp.close();
880 m->openOutputFile(scrapQualFileName, temp); temp.close();
882 if (nameFile != "") {
883 m->openOutputFile(trimNameFileName, temp); temp.close();
884 m->openOutputFile(scrapNameFileName, temp); temp.close();
887 driverCreateTrim(filename, qFileName, trimFASTAFileName, scrapFASTAFileName, trimQualFileName, scrapQualFileName, trimNameFileName, scrapNameFileName, groupFile, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
889 //force parent to wait until all the processes are done
890 for (int i=0;i<processIDS.size();i++) {
891 int temp = processIDS[i];
895 //////////////////////////////////////////////////////////////////////////////////////////////////////
896 //Windows version shared memory, so be careful when passing variables through the trimData struct.
897 //Above fork() will clone, so memory is separate, but that's not the case with windows,
898 //////////////////////////////////////////////////////////////////////////////////////////////////////
900 vector<trimData*> pDataArray;
901 DWORD dwThreadIdArray[processors-1];
902 HANDLE hThreadArray[processors-1];
904 //Create processor worker threads.
905 for( int i=0; i<processors-1; i++){
907 string extension = "";
908 if (i != 0) { extension = toString(i) + ".temp"; processIDS.push_back(i); }
909 vector<vector<string> > tempFASTAFileNames = fastaFileNames;
910 vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
911 vector<vector<string> > tempNameFileNames = nameFileNames;
916 for(int i=0;i<tempFASTAFileNames.size();i++){
917 for(int j=0;j<tempFASTAFileNames[i].size();j++){
918 if (tempFASTAFileNames[i][j] != "") {
919 tempFASTAFileNames[i][j] += extension;
920 m->openOutputFile(tempFASTAFileNames[i][j], temp); temp.close();
923 tempPrimerQualFileNames[i][j] += extension;
924 m->openOutputFile(tempPrimerQualFileNames[i][j], temp); temp.close();
927 tempNameFileNames[i][j] += extension;
928 m->openOutputFile(tempNameFileNames[i][j], temp); temp.close();
936 trimData* tempTrim = new trimData(filename,
938 (trimFASTAFileName+extension),
939 (scrapFASTAFileName+extension),
940 (trimQualFileName+extension),
941 (scrapQualFileName+extension),
942 (trimNameFileName+extension),
943 (scrapNameFileName+extension),
944 (groupFile+extension),
946 tempPrimerQualFileNames,
948 lines[i].start, lines[i].end, qLines[i].start, qLines[i].end, m,
949 pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, primers, barcodes, revPrimer, linker, spacer,
950 primerNameVector, barcodeNameVector, createGroup, allFiles, keepforward, keepFirst, removeLast,
951 qWindowStep, qWindowSize, qWindowAverage, qtrim, qThreshold, qAverage, qRollAverage,
952 minLength, maxAmbig, maxHomoP, maxLength, flip, nameMap);
953 pDataArray.push_back(tempTrim);
955 hThreadArray[i] = CreateThread(NULL, 0, MyTrimThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);
960 m->openOutputFile(trimFASTAFileName, temp); temp.close();
961 m->openOutputFile(scrapFASTAFileName, temp); temp.close();
963 m->openOutputFile(trimQualFileName, temp); temp.close();
964 m->openOutputFile(scrapQualFileName, temp); temp.close();
966 if (nameFile != "") {
967 m->openOutputFile(trimNameFileName, temp); temp.close();
968 m->openOutputFile(scrapNameFileName, temp); temp.close();
971 driverCreateTrim(filename, qFileName, (trimFASTAFileName + toString(processors-1) + ".temp"), (scrapFASTAFileName + toString(processors-1) + ".temp"), (trimQualFileName + toString(processors-1) + ".temp"), (scrapQualFileName + toString(processors-1) + ".temp"), (trimNameFileName + toString(processors-1) + ".temp"), (scrapNameFileName + toString(processors-1) + ".temp"), (groupFile + toString(processors-1) + ".temp"), fastaFileNames, qualFileNames, nameFileNames, lines[processors-1], qLines[processors-1]);
972 processIDS.push_back(processors-1);
975 //Wait until all threads have terminated.
976 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
978 //Close all thread handles and free memory allocations.
979 for(int i=0; i < pDataArray.size(); i++){
980 for (map<string, int>::iterator it = pDataArray[i]->groupCounts.begin(); it != pDataArray[i]->groupCounts.end(); it++) {
981 map<string, int>::iterator it2 = groupCounts.find(it->first);
982 if (it2 == groupCounts.end()) { groupCounts[it->first] = it->second; }
983 else { groupCounts[it->first] += it->second; }
985 CloseHandle(hThreadArray[i]);
986 delete pDataArray[i];
993 for(int i=0;i<processIDS.size();i++){
995 m->mothurOut("Appending files from process " + toString(processIDS[i])); m->mothurOutEndLine();
997 m->appendFiles((trimFASTAFileName + toString(processIDS[i]) + ".temp"), trimFASTAFileName);
998 m->mothurRemove((trimFASTAFileName + toString(processIDS[i]) + ".temp"));
999 m->appendFiles((scrapFASTAFileName + toString(processIDS[i]) + ".temp"), scrapFASTAFileName);
1000 m->mothurRemove((scrapFASTAFileName + toString(processIDS[i]) + ".temp"));
1002 if(qFileName != ""){
1003 m->appendFiles((trimQualFileName + toString(processIDS[i]) + ".temp"), trimQualFileName);
1004 m->mothurRemove((trimQualFileName + toString(processIDS[i]) + ".temp"));
1005 m->appendFiles((scrapQualFileName + toString(processIDS[i]) + ".temp"), scrapQualFileName);
1006 m->mothurRemove((scrapQualFileName + toString(processIDS[i]) + ".temp"));
1010 m->appendFiles((trimNameFileName + toString(processIDS[i]) + ".temp"), trimNameFileName);
1011 m->mothurRemove((trimNameFileName + toString(processIDS[i]) + ".temp"));
1012 m->appendFiles((scrapNameFileName + toString(processIDS[i]) + ".temp"), scrapNameFileName);
1013 m->mothurRemove((scrapNameFileName + toString(processIDS[i]) + ".temp"));
1017 m->appendFiles((groupFile + toString(processIDS[i]) + ".temp"), groupFile);
1018 m->mothurRemove((groupFile + toString(processIDS[i]) + ".temp"));
1023 for(int j=0;j<fastaFileNames.size();j++){
1024 for(int k=0;k<fastaFileNames[j].size();k++){
1025 if (fastaFileNames[j][k] != "") {
1026 m->appendFiles((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"), fastaFileNames[j][k]);
1027 m->mothurRemove((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"));
1029 if(qFileName != ""){
1030 m->appendFiles((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"), qualFileNames[j][k]);
1031 m->mothurRemove((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"));
1035 m->appendFiles((nameFileNames[j][k] + toString(processIDS[i]) + ".temp"), nameFileNames[j][k]);
1036 m->mothurRemove((nameFileNames[j][k] + toString(processIDS[i]) + ".temp"));
1043 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1046 string tempFile = filename + toString(processIDS[i]) + ".num.temp";
1047 m->openInputFile(tempFile, in);
1051 in >> tempNum; m->gobble(in);
1055 in >> group >> tempNum; m->gobble(in);
1057 map<string, int>::iterator it = groupCounts.find(group);
1058 if (it == groupCounts.end()) { groupCounts[group] = tempNum; }
1059 else { groupCounts[it->first] += tempNum; }
1062 in.close(); m->mothurRemove(tempFile);
1069 catch(exception& e) {
1070 m->errorOut(e, "TrimSeqsCommand", "createProcessesCreateTrim");
1075 /**************************************************************************************************/
1077 int TrimSeqsCommand::setLines(string filename, string qfilename) {
1080 vector<unsigned long long> fastaFilePos;
1081 vector<unsigned long long> qfileFilePos;
1083 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1084 //set file positions for fasta file
1085 fastaFilePos = m->divideFile(filename, processors);
1087 //get name of first sequence in each chunk
1088 map<string, int> firstSeqNames;
1089 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
1091 m->openInputFile(filename, in);
1092 in.seekg(fastaFilePos[i]);
1095 firstSeqNames[temp.getName()] = i;
1100 if(qfilename != "") {
1101 //seach for filePos of each first name in the qfile and save in qfileFilePos
1103 m->openInputFile(qfilename, inQual);
1106 while(!inQual.eof()){
1107 input = m->getline(inQual);
1109 if (input.length() != 0) {
1110 if(input[0] == '>'){ //this is a sequence name line
1111 istringstream nameStream(input);
1113 string sname = ""; nameStream >> sname;
1114 sname = sname.substr(1);
1116 map<string, int>::iterator it = firstSeqNames.find(sname);
1118 if(it != firstSeqNames.end()) { //this is the start of a new chunk
1119 unsigned long long pos = inQual.tellg();
1120 qfileFilePos.push_back(pos - input.length() - 1);
1121 firstSeqNames.erase(it);
1126 if (firstSeqNames.size() == 0) { break; }
1131 if (firstSeqNames.size() != 0) {
1132 for (map<string, int>::iterator it = firstSeqNames.begin(); it != firstSeqNames.end(); it++) {
1133 m->mothurOut(it->first + " is in your fasta file and not in your quality file, not using quality file."); m->mothurOutEndLine();
1139 //get last file position of qfile
1141 unsigned long long size;
1143 //get num bytes in file
1144 pFile = fopen (qfilename.c_str(),"rb");
1145 if (pFile==NULL) perror ("Error opening file");
1147 fseek (pFile, 0, SEEK_END);
1152 qfileFilePos.push_back(size);
1155 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
1156 lines.push_back(linePair(fastaFilePos[i], fastaFilePos[(i+1)]));
1157 if (qfilename != "") { qLines.push_back(linePair(qfileFilePos[i], qfileFilePos[(i+1)])); }
1159 if(qfilename == "") { qLines = lines; } //files with duds
1165 if (processors == 1) { //save time
1166 //fastaFilePos.push_back(0); qfileFilePos.push_back(0);
1167 //fastaFilePos.push_back(1000); qfileFilePos.push_back(1000);
1168 lines.push_back(linePair(0, 1000));
1169 if (qfilename != "") { qLines.push_back(linePair(0, 1000)); }
1171 int numFastaSeqs = 0;
1172 fastaFilePos = m->setFilePosFasta(filename, numFastaSeqs);
1173 if (fastaFilePos.size() < processors) { processors = fastaFilePos.size(); }
1175 if (qfilename != "") {
1176 int numQualSeqs = 0;
1177 qfileFilePos = m->setFilePosFasta(qfilename, numQualSeqs);
1179 if (numFastaSeqs != numQualSeqs) {
1180 m->mothurOut("[ERROR]: You have " + toString(numFastaSeqs) + " sequences in your fasta file, but " + toString(numQualSeqs) + " sequences in your quality file."); m->mothurOutEndLine(); m->control_pressed = true;
1184 //figure out how many sequences you have to process
1185 int numSeqsPerProcessor = numFastaSeqs / processors;
1186 for (int i = 0; i < processors; i++) {
1187 int startIndex = i * numSeqsPerProcessor;
1188 if(i == (processors - 1)){ numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor; }
1189 lines.push_back(linePair(fastaFilePos[startIndex], numSeqsPerProcessor));
1190 cout << fastaFilePos[startIndex] << '\t' << numSeqsPerProcessor << endl;
1191 if (qfilename != "") { qLines.push_back(linePair(qfileFilePos[startIndex], numSeqsPerProcessor)); }
1194 if(qfilename == "") { qLines = lines; } //files with duds
1199 catch(exception& e) {
1200 m->errorOut(e, "TrimSeqsCommand", "setLines");
1205 //***************************************************************************************************************
1207 bool TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<vector<string> >& qualFileNames, vector<vector<string> >& nameFileNames){
1210 m->openInputFile(oligoFile, inOligos);
1214 string type, oligo, group;
1216 int indexPrimer = 0;
1217 int indexBarcode = 0;
1219 while(!inOligos.eof()){
1224 while (!inOligos.eof()) { char c = inOligos.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there
1225 m->gobble(inOligos);
1228 m->gobble(inOligos);
1229 //make type case insensitive
1230 for(int i=0;i<type.length();i++){ type[i] = toupper(type[i]); }
1234 for(int i=0;i<oligo.length();i++){
1235 oligo[i] = toupper(oligo[i]);
1236 if(oligo[i] == 'U') { oligo[i] = 'T'; }
1239 if(type == "FORWARD"){
1242 // get rest of line in case there is a primer name
1243 while (!inOligos.eof()) {
1244 char c = inOligos.get();
1245 if (c == 10 || c == 13){ break; }
1246 else if (c == 32 || c == 9){;} //space or tab
1247 else { group += c; }
1250 //check for repeat barcodes
1251 map<string, int>::iterator itPrime = primers.find(oligo);
1252 if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); }
1254 primers[oligo]=indexPrimer; indexPrimer++;
1255 primerNameVector.push_back(group);
1257 else if(type == "REVERSE"){
1258 //Sequence oligoRC("reverse", oligo);
1259 //oligoRC.reverseComplement();
1260 string oligoRC = reverseOligo(oligo);
1261 revPrimer.push_back(oligoRC);
1263 else if(type == "BARCODE"){
1266 //check for repeat barcodes
1267 map<string, int>::iterator itBar = barcodes.find(oligo);
1268 if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); }
1270 barcodes[oligo]=indexBarcode; indexBarcode++;
1271 barcodeNameVector.push_back(group);
1272 }else if(type == "LINKER"){
1273 linker.push_back(oligo);
1274 }else if(type == "SPACER"){
1275 spacer.push_back(oligo);
1277 else{ m->mothurOut(type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); }
1279 m->gobble(inOligos);
1283 if(barcodeNameVector.size() == 0 && primerNameVector[0] == ""){ allFiles = 0; }
1285 //add in potential combos
1286 if(barcodeNameVector.size() == 0){
1288 barcodeNameVector.push_back("");
1291 if(primerNameVector.size() == 0){
1293 primerNameVector.push_back("");
1296 fastaFileNames.resize(barcodeNameVector.size());
1297 for(int i=0;i<fastaFileNames.size();i++){
1298 fastaFileNames[i].assign(primerNameVector.size(), "");
1300 if(qFileName != "") { qualFileNames = fastaFileNames; }
1301 if(nameFile != "") { nameFileNames = fastaFileNames; }
1304 set<string> uniqueNames; //used to cleanup outputFileNames
1305 for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
1306 for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
1308 string primerName = primerNameVector[itPrimer->second];
1309 string barcodeName = barcodeNameVector[itBar->second];
1311 string comboGroupName = "";
1312 string fastaFileName = "";
1313 string qualFileName = "";
1314 string nameFileName = "";
1316 if(primerName == ""){
1317 comboGroupName = barcodeNameVector[itBar->second];
1320 if(barcodeName == ""){
1321 comboGroupName = primerNameVector[itPrimer->second];
1324 comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second];
1330 fastaFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + comboGroupName + ".fasta";
1331 if (uniqueNames.count(fastaFileName) == 0) {
1332 outputNames.push_back(fastaFileName);
1333 outputTypes["fasta"].push_back(fastaFileName);
1334 uniqueNames.insert(fastaFileName);
1337 fastaFileNames[itBar->second][itPrimer->second] = fastaFileName;
1338 m->openOutputFile(fastaFileName, temp); temp.close();
1340 if(qFileName != ""){
1341 qualFileName = outputDir + m->getRootName(m->getSimpleName(qFileName)) + comboGroupName + ".qual";
1342 if (uniqueNames.count(qualFileName) == 0) {
1343 outputNames.push_back(qualFileName);
1344 outputTypes["qfile"].push_back(qualFileName);
1347 qualFileNames[itBar->second][itPrimer->second] = qualFileName;
1348 m->openOutputFile(qualFileName, temp); temp.close();
1352 nameFileName = outputDir + m->getRootName(m->getSimpleName(nameFile)) + comboGroupName + ".names";
1353 if (uniqueNames.count(nameFileName) == 0) {
1354 outputNames.push_back(nameFileName);
1355 outputTypes["name"].push_back(nameFileName);
1358 nameFileNames[itBar->second][itPrimer->second] = nameFileName;
1359 m->openOutputFile(nameFileName, temp); temp.close();
1365 numFPrimers = primers.size();
1366 numRPrimers = revPrimer.size();
1367 numLinkers = linker.size();
1368 numSpacers = spacer.size();
1370 bool allBlank = true;
1371 for (int i = 0; i < barcodeNameVector.size(); i++) {
1372 if (barcodeNameVector[i] != "") {
1377 for (int i = 0; i < primerNameVector.size(); i++) {
1378 if (primerNameVector[i] != "") {
1385 m->mothurOut("[WARNING]: your oligos file does not contain any group names. mothur will not create a groupfile."); m->mothurOutEndLine();
1393 catch(exception& e) {
1394 m->errorOut(e, "TrimSeqsCommand", "getOligos");
1398 //***************************************************************************************************************
1400 bool TrimSeqsCommand::keepFirstTrim(Sequence& sequence, QualityScores& qscores){
1403 if(qscores.getName() != ""){
1404 qscores.trimQScores(-1, keepFirst);
1406 sequence.trim(keepFirst);
1409 catch(exception& e) {
1410 m->errorOut(e, "keepFirstTrim", "countDiffs");
1416 //***************************************************************************************************************
1418 bool TrimSeqsCommand::removeLastTrim(Sequence& sequence, QualityScores& qscores){
1422 int length = sequence.getNumBases() - removeLast;
1425 if(qscores.getName() != ""){
1426 qscores.trimQScores(-1, length);
1428 sequence.trim(length);
1437 catch(exception& e) {
1438 m->errorOut(e, "removeLastTrim", "countDiffs");
1444 //***************************************************************************************************************
1446 bool TrimSeqsCommand::cullLength(Sequence& seq){
1449 int length = seq.getNumBases();
1450 bool success = 0; //guilty until proven innocent
1452 if(length >= minLength && maxLength == 0) { success = 1; }
1453 else if(length >= minLength && length <= maxLength) { success = 1; }
1454 else { success = 0; }
1459 catch(exception& e) {
1460 m->errorOut(e, "TrimSeqsCommand", "cullLength");
1466 //***************************************************************************************************************
1468 bool TrimSeqsCommand::cullHomoP(Sequence& seq){
1470 int longHomoP = seq.getLongHomoPolymer();
1471 bool success = 0; //guilty until proven innocent
1473 if(longHomoP <= maxHomoP){ success = 1; }
1474 else { success = 0; }
1478 catch(exception& e) {
1479 m->errorOut(e, "TrimSeqsCommand", "cullHomoP");
1484 //********************************************************************/
1485 string TrimSeqsCommand::reverseOligo(string oligo){
1487 string reverse = "";
1489 for(int i=oligo.length()-1;i>=0;i--){
1491 if(oligo[i] == 'A') { reverse += 'T'; }
1492 else if(oligo[i] == 'T'){ reverse += 'A'; }
1493 else if(oligo[i] == 'U'){ reverse += 'A'; }
1495 else if(oligo[i] == 'G'){ reverse += 'C'; }
1496 else if(oligo[i] == 'C'){ reverse += 'G'; }
1498 else if(oligo[i] == 'R'){ reverse += 'Y'; }
1499 else if(oligo[i] == 'Y'){ reverse += 'R'; }
1501 else if(oligo[i] == 'M'){ reverse += 'K'; }
1502 else if(oligo[i] == 'K'){ reverse += 'M'; }
1504 else if(oligo[i] == 'W'){ reverse += 'W'; }
1505 else if(oligo[i] == 'S'){ reverse += 'S'; }
1507 else if(oligo[i] == 'B'){ reverse += 'V'; }
1508 else if(oligo[i] == 'V'){ reverse += 'B'; }
1510 else if(oligo[i] == 'D'){ reverse += 'H'; }
1511 else if(oligo[i] == 'H'){ reverse += 'D'; }
1513 else { reverse += 'N'; }
1519 catch(exception& e) {
1520 m->errorOut(e, "TrimSeqsCommand", "reverseOligo");
1525 //***************************************************************************************************************
1527 bool TrimSeqsCommand::cullAmbigs(Sequence& seq){
1529 int numNs = seq.getAmbigBases();
1530 bool success = 0; //guilty until proven innocent
1532 if(numNs <= maxAmbig) { success = 1; }
1533 else { success = 0; }
1537 catch(exception& e) {
1538 m->errorOut(e, "TrimSeqsCommand", "cullAmbigs");
1543 //***************************************************************************************************************