5 * Created by Pat Schloss on 6/6/09.
6 * Copyright 2009 Patrick D. Schloss. All rights reserved.
10 #include "trimseqscommand.h"
11 #include "needlemanoverlap.hpp"
12 #include "trimoligos.h"
15 //**********************************************************************************************************************
16 vector<string> TrimSeqsCommand::setParameters(){
18 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","fasta",false,true,true); parameters.push_back(pfasta);
19 CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none","group",false,false,true); parameters.push_back(poligos);
20 CommandParameter pqfile("qfile", "InputTypes", "", "", "none", "none", "none","qfile",false,false,true); parameters.push_back(pqfile);
21 CommandParameter pname("name", "InputTypes", "", "", "namecount", "none", "none","name",false,false,true); parameters.push_back(pname);
22 CommandParameter pcount("count", "InputTypes", "", "", "namecount", "none", "none","count",false,false,true); parameters.push_back(pcount);
23 CommandParameter pflip("flip", "Boolean", "", "F", "", "", "","",false,false,true); parameters.push_back(pflip);
24 CommandParameter preorient("checkorient", "Boolean", "", "F", "", "", "","",false,false,true); parameters.push_back(preorient);
25 CommandParameter pmaxambig("maxambig", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(pmaxambig);
26 CommandParameter pmaxhomop("maxhomop", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pmaxhomop);
27 CommandParameter pminlength("minlength", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pminlength);
28 CommandParameter pmaxlength("maxlength", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pmaxlength);
29 CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(ppdiffs);
30 CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(pbdiffs);
31 CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pldiffs);
32 CommandParameter psdiffs("sdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(psdiffs);
33 CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ptdiffs);
34 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
35 CommandParameter pallfiles("allfiles", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pallfiles);
36 CommandParameter pkeepforward("keepforward", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pkeepforward);
37 CommandParameter pqtrim("qtrim", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pqtrim);
38 CommandParameter pqthreshold("qthreshold", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pqthreshold);
39 CommandParameter pqaverage("qaverage", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pqaverage);
40 CommandParameter prollaverage("rollaverage", "Number", "", "0", "", "", "","",false,false); parameters.push_back(prollaverage);
41 CommandParameter pqwindowaverage("qwindowaverage", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pqwindowaverage);
42 CommandParameter pqstepsize("qstepsize", "Number", "", "1", "", "", "","",false,false); parameters.push_back(pqstepsize);
43 CommandParameter pqwindowsize("qwindowsize", "Number", "", "50", "", "", "","",false,false); parameters.push_back(pqwindowsize);
44 CommandParameter pkeepfirst("keepfirst", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pkeepfirst);
45 CommandParameter premovelast("removelast", "Number", "", "0", "", "", "","",false,false); parameters.push_back(premovelast);
46 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
47 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
49 vector<string> myArray;
50 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
54 m->errorOut(e, "TrimSeqsCommand", "setParameters");
58 //**********************************************************************************************************************
59 string TrimSeqsCommand::getHelpString(){
61 string helpString = "";
62 helpString += "The trim.seqs command reads a fastaFile and creates 2 new fasta files, .trim.fasta and scrap.fasta, as well as group files if you provide and oligos file.\n";
63 helpString += "The .trim.fasta contains sequences that meet your requirements, and the .scrap.fasta contains those which don't.\n";
64 helpString += "The trim.seqs command parameters are fasta, name, count, flip, checkorient, oligos, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim, keepfirst, removelast and allfiles.\n";
65 helpString += "The fasta parameter is required.\n";
66 helpString += "The flip parameter will output the reverse compliment of your trimmed sequence. The default is false.\n";
67 helpString += "The checkorient parameter will check the reverse compliment of the sequence if the barcodes and primers cannot be found in the forward. The default is false.\n";
68 helpString += "The oligos parameter allows you to provide an oligos file.\n";
69 helpString += "The name parameter allows you to provide a names file with your fasta file.\n";
70 helpString += "The count parameter allows you to provide a count file with your fasta file.\n";
71 helpString += "The maxambig parameter allows you to set the maximum number of ambigious bases allowed. The default is -1.\n";
72 helpString += "The maxhomop parameter allows you to set a maximum homopolymer length. \n";
73 helpString += "The minlength parameter allows you to set and minimum sequence length. \n";
74 helpString += "The maxlength parameter allows you to set and maximum sequence length. \n";
75 helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n";
76 helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";
77 helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
78 helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n";
79 helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n";
80 helpString += "The qfile parameter allows you to provide a quality file.\n";
81 helpString += "The qthreshold parameter allows you to set a minimum quality score allowed. \n";
82 helpString += "The qaverage parameter allows you to set a minimum average quality score allowed. \n";
83 helpString += "The qwindowsize parameter allows you to set a number of bases in a window. Default=50.\n";
84 helpString += "The qwindowaverage parameter allows you to set a minimum average quality score allowed over a window. \n";
85 helpString += "The rollaverage parameter allows you to set a minimum rolling average quality score allowed over a window. \n";
86 helpString += "The qstepsize parameter allows you to set a number of bases to move the window over. Default=1.\n";
87 helpString += "The allfiles parameter will create separate group and fasta file for each grouping. The default is F.\n";
88 helpString += "The keepforward parameter allows you to indicate whether you want the forward primer removed or not. The default is F, meaning remove the forward primer.\n";
89 helpString += "The qtrim parameter will trim sequence from the point that they fall below the qthreshold and put it in the .trim file if set to true. The default is T.\n";
90 helpString += "The keepfirst parameter trims the sequence to the first keepfirst number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements. \n";
91 helpString += "The removelast removes the last removelast number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements.\n";
92 helpString += "The trim.seqs command should be in the following format: \n";
93 helpString += "trim.seqs(fasta=yourFastaFile, flip=yourFlip, oligos=yourOligos, maxambig=yourMaxambig, \n";
94 helpString += "maxhomop=yourMaxhomop, minlength=youMinlength, maxlength=yourMaxlength) \n";
95 helpString += "Example trim.seqs(fasta=abrecovery.fasta, flip=..., oligos=..., maxambig=..., maxhomop=..., minlength=..., maxlength=...).\n";
96 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
97 helpString += "For more details please check out the wiki http://www.mothur.org/wiki/Trim.seqs .\n";
100 catch(exception& e) {
101 m->errorOut(e, "TrimSeqsCommand", "getHelpString");
105 //**********************************************************************************************************************
106 string TrimSeqsCommand::getOutputPattern(string type) {
110 if (type == "qfile") { pattern = "[filename],[tag],qual"; }
111 else if (type == "fasta") { pattern = "[filename],[tag],fasta"; }
112 else if (type == "group") { pattern = "[filename],groups"; }
113 else if (type == "name") { pattern = "[filename],[tag],names"; }
114 else if (type == "count") { pattern = "[filename],[tag],count_table-[filename],count_table"; }
115 else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
119 catch(exception& e) {
120 m->errorOut(e, "TrimSeqsCommand", "getOutputPattern");
124 //**********************************************************************************************************************
126 TrimSeqsCommand::TrimSeqsCommand(){
128 abort = true; calledHelp = true;
130 vector<string> tempOutNames;
131 outputTypes["fasta"] = tempOutNames;
132 outputTypes["qfile"] = tempOutNames;
133 outputTypes["group"] = tempOutNames;
134 outputTypes["name"] = tempOutNames;
135 outputTypes["count"] = tempOutNames;
137 catch(exception& e) {
138 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
142 //***************************************************************************************************************
144 TrimSeqsCommand::TrimSeqsCommand(string option) {
147 abort = false; calledHelp = false;
150 //allow user to run help
151 if(option == "help") { help(); abort = true; calledHelp = true; }
152 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
155 vector<string> myArray = setParameters();
157 OptionParser parser(option);
158 map<string,string> parameters = parser.getParameters();
160 ValidParameters validParameter;
161 map<string,string>::iterator it;
163 //check to make sure all parameters are valid for command
164 for (it = parameters.begin(); it != parameters.end(); it++) {
165 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
168 //initialize outputTypes
169 vector<string> tempOutNames;
170 outputTypes["fasta"] = tempOutNames;
171 outputTypes["qfile"] = tempOutNames;
172 outputTypes["group"] = tempOutNames;
173 outputTypes["name"] = tempOutNames;
174 outputTypes["count"] = tempOutNames;
176 //if the user changes the input directory command factory will send this info to us in the output parameter
177 string inputDir = validParameter.validFile(parameters, "inputdir", false);
178 if (inputDir == "not found"){ inputDir = ""; }
181 it = parameters.find("fasta");
182 //user has given a template file
183 if(it != parameters.end()){
184 path = m->hasPath(it->second);
185 //if the user has not given a path then, add inputdir. else leave path alone.
186 if (path == "") { parameters["fasta"] = inputDir + it->second; }
189 it = parameters.find("oligos");
190 //user has given a template file
191 if(it != parameters.end()){
192 path = m->hasPath(it->second);
193 //if the user has not given a path then, add inputdir. else leave path alone.
194 if (path == "") { parameters["oligos"] = inputDir + it->second; }
197 it = parameters.find("qfile");
198 //user has given a template file
199 if(it != parameters.end()){
200 path = m->hasPath(it->second);
201 //if the user has not given a path then, add inputdir. else leave path alone.
202 if (path == "") { parameters["qfile"] = inputDir + it->second; }
205 it = parameters.find("name");
206 //user has given a template file
207 if(it != parameters.end()){
208 path = m->hasPath(it->second);
209 //if the user has not given a path then, add inputdir. else leave path alone.
210 if (path == "") { parameters["name"] = inputDir + it->second; }
213 it = parameters.find("count");
214 //user has given a template file
215 if(it != parameters.end()){
216 path = m->hasPath(it->second);
217 //if the user has not given a path then, add inputdir. else leave path alone.
218 if (path == "") { parameters["count"] = inputDir + it->second; }
224 //check for required parameters
225 fastaFile = validParameter.validFile(parameters, "fasta", true);
226 if (fastaFile == "not found") {
227 fastaFile = m->getFastaFile();
228 if (fastaFile != "") { m->mothurOut("Using " + fastaFile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
229 else { m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
230 }else if (fastaFile == "not open") { abort = true; }
231 else { m->setFastaFile(fastaFile); }
233 //if the user changes the output directory command factory will send this info to us in the output parameter
234 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
236 outputDir += m->hasPath(fastaFile); //if user entered a file with a path then preserve it
240 //check for optional parameter and set defaults
241 // ...at some point should added some additional type checking...
243 temp = validParameter.validFile(parameters, "flip", false);
244 if (temp == "not found") { flip = 0; }
245 else { flip = m->isTrue(temp); }
247 temp = validParameter.validFile(parameters, "oligos", true);
248 if (temp == "not found"){ oligoFile = ""; }
249 else if(temp == "not open"){ abort = true; }
250 else { oligoFile = temp; m->setOligosFile(oligoFile); }
253 temp = validParameter.validFile(parameters, "maxambig", false); if (temp == "not found") { temp = "-1"; }
254 m->mothurConvert(temp, maxAmbig);
256 temp = validParameter.validFile(parameters, "maxhomop", false); if (temp == "not found") { temp = "0"; }
257 m->mothurConvert(temp, maxHomoP);
259 temp = validParameter.validFile(parameters, "minlength", false); if (temp == "not found") { temp = "0"; }
260 m->mothurConvert(temp, minLength);
262 temp = validParameter.validFile(parameters, "maxlength", false); if (temp == "not found") { temp = "0"; }
263 m->mothurConvert(temp, maxLength);
265 temp = validParameter.validFile(parameters, "bdiffs", false); if (temp == "not found") { temp = "0"; }
266 m->mothurConvert(temp, bdiffs);
268 temp = validParameter.validFile(parameters, "pdiffs", false); if (temp == "not found") { temp = "0"; }
269 m->mothurConvert(temp, pdiffs);
271 temp = validParameter.validFile(parameters, "ldiffs", false); if (temp == "not found") { temp = "0"; }
272 m->mothurConvert(temp, ldiffs);
274 temp = validParameter.validFile(parameters, "sdiffs", false); if (temp == "not found") { temp = "0"; }
275 m->mothurConvert(temp, sdiffs);
277 temp = validParameter.validFile(parameters, "tdiffs", false); if (temp == "not found") { int tempTotal = pdiffs + bdiffs + ldiffs + sdiffs; temp = toString(tempTotal); }
278 m->mothurConvert(temp, tdiffs);
280 if(tdiffs == 0){ tdiffs = bdiffs + pdiffs + ldiffs + sdiffs; }
282 temp = validParameter.validFile(parameters, "qfile", true);
283 if (temp == "not found") { qFileName = ""; }
284 else if(temp == "not open") { abort = true; }
285 else { qFileName = temp; m->setQualFile(qFileName); }
287 temp = validParameter.validFile(parameters, "name", true);
288 if (temp == "not found") { nameFile = ""; }
289 else if(temp == "not open") { nameFile = ""; abort = true; }
290 else { nameFile = temp; m->setNameFile(nameFile); }
292 countfile = validParameter.validFile(parameters, "count", true);
293 if (countfile == "not open") { abort = true; countfile = ""; }
294 else if (countfile == "not found") { countfile = ""; }
295 else { m->setCountTableFile(countfile); }
297 if ((countfile != "") && (nameFile != "")) { m->mothurOut("You must enter ONLY ONE of the following: count or name."); m->mothurOutEndLine(); abort = true; }
299 temp = validParameter.validFile(parameters, "qthreshold", false); if (temp == "not found") { temp = "0"; }
300 m->mothurConvert(temp, qThreshold);
302 temp = validParameter.validFile(parameters, "qtrim", false); if (temp == "not found") { temp = "t"; }
303 qtrim = m->isTrue(temp);
305 temp = validParameter.validFile(parameters, "rollaverage", false); if (temp == "not found") { temp = "0"; }
306 convert(temp, qRollAverage);
308 temp = validParameter.validFile(parameters, "qwindowaverage", false);if (temp == "not found") { temp = "0"; }
309 convert(temp, qWindowAverage);
311 temp = validParameter.validFile(parameters, "qwindowsize", false); if (temp == "not found") { temp = "50"; }
312 convert(temp, qWindowSize);
314 temp = validParameter.validFile(parameters, "qstepsize", false); if (temp == "not found") { temp = "1"; }
315 convert(temp, qWindowStep);
317 temp = validParameter.validFile(parameters, "qaverage", false); if (temp == "not found") { temp = "0"; }
318 convert(temp, qAverage);
320 temp = validParameter.validFile(parameters, "keepfirst", false); if (temp == "not found") { temp = "0"; }
321 convert(temp, keepFirst);
323 temp = validParameter.validFile(parameters, "removelast", false); if (temp == "not found") { temp = "0"; }
324 convert(temp, removeLast);
326 temp = validParameter.validFile(parameters, "allfiles", false); if (temp == "not found") { temp = "F"; }
327 allFiles = m->isTrue(temp);
329 temp = validParameter.validFile(parameters, "keepforward", false); if (temp == "not found") { temp = "F"; }
330 keepforward = m->isTrue(temp);
332 temp = validParameter.validFile(parameters, "checkorient", false); if (temp == "not found") { temp = "F"; }
333 reorient = m->isTrue(temp);
335 temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
336 m->setProcessors(temp);
337 m->mothurConvert(temp, processors);
340 if(allFiles && (oligoFile == "")){
341 m->mothurOut("You selected allfiles, but didn't enter an oligos. Ignoring the allfiles request."); m->mothurOutEndLine();
343 if((qAverage != 0 && qThreshold != 0) && qFileName == ""){
344 m->mothurOut("You didn't provide a quality file name, quality criteria will be ignored."); m->mothurOutEndLine();
348 if(!flip && oligoFile=="" && !maxLength && !minLength && (maxAmbig==-1) && !maxHomoP && qFileName == ""){
349 m->mothurOut("You didn't set any options... quiting command."); m->mothurOutEndLine();
353 if (countfile == "") {
354 if (nameFile == "") {
355 vector<string> files; files.push_back(fastaFile);
356 parser.getNameFile(files);
362 catch(exception& e) {
363 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
367 //***************************************************************************************************************
369 int TrimSeqsCommand::execute(){
372 if (abort == true) { if (calledHelp) { return 0; } return 2; }
374 pairedOligos = false;
375 numFPrimers = 0; //this needs to be initialized
380 vector<vector<string> > fastaFileNames;
381 vector<vector<string> > qualFileNames;
382 vector<vector<string> > nameFileNames;
384 map<string, string> variables;
385 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFile));
386 variables["[tag]"] = "trim";
387 string trimSeqFile = getOutputFileName("fasta",variables);
388 string trimQualFile = getOutputFileName("qfile",variables);
389 outputNames.push_back(trimSeqFile); outputTypes["fasta"].push_back(trimSeqFile);
391 variables["[tag]"] = "scrap";
392 string scrapSeqFile = getOutputFileName("fasta",variables);
393 string scrapQualFile = getOutputFileName("qfile",variables);
394 outputNames.push_back(scrapSeqFile); outputTypes["fasta"].push_back(scrapSeqFile);
396 if (qFileName != "") {
397 outputNames.push_back(trimQualFile);
398 outputNames.push_back(scrapQualFile);
399 outputTypes["qfile"].push_back(trimQualFile);
400 outputTypes["qfile"].push_back(scrapQualFile);
403 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(nameFile));
404 variables["[tag]"] = "trim";
405 string trimNameFile = getOutputFileName("name",variables);
406 variables["[tag]"] = "scrap";
407 string scrapNameFile = getOutputFileName("name",variables);
409 if (nameFile != "") {
410 m->readNames(nameFile, nameMap);
411 outputNames.push_back(trimNameFile);
412 outputNames.push_back(scrapNameFile);
413 outputTypes["name"].push_back(trimNameFile);
414 outputTypes["name"].push_back(scrapNameFile);
417 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(countfile));
418 variables["[tag]"] = "trim";
419 string trimCountFile = getOutputFileName("count",variables);
420 variables["[tag]"] = "scrap";
421 string scrapCountFile = getOutputFileName("count",variables);
423 if (countfile != "") {
425 ct.readTable(countfile);
426 nameCount = ct.getNameMap();
427 outputNames.push_back(trimCountFile);
428 outputNames.push_back(scrapCountFile);
429 outputTypes["count"].push_back(trimCountFile);
430 outputTypes["count"].push_back(scrapCountFile);
434 if (m->control_pressed) { return 0; }
436 string outputGroupFileName;
438 createGroup = getOligos(fastaFileNames, qualFileNames, nameFileNames);
439 if ((createGroup) && (countfile == "")){
440 map<string, string> myvariables;
441 myvariables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFile));
442 outputGroupFileName = getOutputFileName("group",myvariables);
443 outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName);
447 if (m->control_pressed) { return 0; }
449 //fills lines and qlines
450 setLines(fastaFile, qFileName);
453 driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, trimCountFile, scrapCountFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
455 createProcessesCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, trimCountFile, scrapCountFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames);
459 if (m->control_pressed) { return 0; }
462 map<string, string> uniqueFastaNames;// so we don't add the same groupfile multiple times
463 map<string, string>::iterator it;
464 set<string> namesToRemove;
465 for(int i=0;i<fastaFileNames.size();i++){
466 for(int j=0;j<fastaFileNames[0].size();j++){
467 if (fastaFileNames[i][j] != "") {
468 if (namesToRemove.count(fastaFileNames[i][j]) == 0) {
469 if(m->isBlank(fastaFileNames[i][j])){
470 m->mothurRemove(fastaFileNames[i][j]);
471 namesToRemove.insert(fastaFileNames[i][j]);
474 m->mothurRemove(qualFileNames[i][j]);
475 namesToRemove.insert(qualFileNames[i][j]);
479 m->mothurRemove(nameFileNames[i][j]);
480 namesToRemove.insert(nameFileNames[i][j]);
483 it = uniqueFastaNames.find(fastaFileNames[i][j]);
484 if (it == uniqueFastaNames.end()) {
485 uniqueFastaNames[fastaFileNames[i][j]] = barcodeNameVector[i];
493 //remove names for outputFileNames, just cleans up the output
494 vector<string> outputNames2;
495 for(int i = 0; i < outputNames.size(); i++) { if (namesToRemove.count(outputNames[i]) == 0) { outputNames2.push_back(outputNames[i]); } }
496 outputNames = outputNames2;
498 for (it = uniqueFastaNames.begin(); it != uniqueFastaNames.end(); it++) {
500 m->openInputFile(it->first, in);
503 map<string, string> myvariables;
504 myvariables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(it->first));
505 string thisGroupName = "";
506 if (countfile == "") { thisGroupName = getOutputFileName("group",myvariables); outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName); }
507 else { thisGroupName = getOutputFileName("count",myvariables); outputNames.push_back(thisGroupName); outputTypes["count"].push_back(thisGroupName); }
508 m->openOutputFile(thisGroupName, out);
510 if (countfile != "") { out << "Representative_Sequence\ttotal\t" << it->second << endl; }
513 if (m->control_pressed) { break; }
515 Sequence currSeq(in); m->gobble(in);
516 if (countfile == "") {
517 out << currSeq.getName() << '\t' << it->second << endl;
519 if (nameFile != "") {
520 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
521 if (itName != nameMap.end()) {
522 vector<string> thisSeqsNames;
523 m->splitAtChar(itName->second, thisSeqsNames, ',');
524 for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self
525 out << thisSeqsNames[k] << '\t' << it->second << endl;
527 }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
530 map<string, int>::iterator itTotalReps = nameCount.find(currSeq.getName());
531 if (itTotalReps != nameCount.end()) { out << currSeq.getName() << '\t' << itTotalReps->second << '\t' << itTotalReps->second << endl; }
532 else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your count file, please correct."); m->mothurOutEndLine(); }
539 if (countfile != "") { //create countfile with group info included
540 CountTable* ct = new CountTable();
541 ct->readTable(trimCountFile);
542 map<string, int> justTrimmedNames = ct->getNameMap();
546 for (map<string, int>::iterator itCount = groupCounts.begin(); itCount != groupCounts.end(); itCount++) { newCt.addGroup(itCount->first); }
547 vector<int> tempCounts; tempCounts.resize(groupCounts.size(), 0);
548 for (map<string, int>::iterator itNames = justTrimmedNames.begin(); itNames != justTrimmedNames.end(); itNames++) {
549 newCt.push_back(itNames->first, tempCounts); //add it to the table with no abundance so we can set the groups abundance
550 map<string, string>::iterator it2 = groupMap.find(itNames->first);
551 if (it2 != groupMap.end()) { newCt.setAbund(itNames->first, it2->second, itNames->second); }
552 else { m->mothurOut("[ERROR]: missing group info for " + itNames->first + "."); m->mothurOutEndLine(); m->control_pressed = true; }
554 newCt.printTable(trimCountFile);
558 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
560 //output group counts
561 m->mothurOutEndLine();
563 if (groupCounts.size() != 0) { m->mothurOut("Group count: \n"); }
564 for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
565 total += it->second; m->mothurOut(it->first + "\t" + toString(it->second)); m->mothurOutEndLine();
567 if (total != 0) { m->mothurOut("Total of all groups is " + toString(total)); m->mothurOutEndLine(); }
569 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
571 //set fasta file as new current fastafile
573 itTypes = outputTypes.find("fasta");
574 if (itTypes != outputTypes.end()) {
575 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
578 itTypes = outputTypes.find("name");
579 if (itTypes != outputTypes.end()) {
580 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
583 itTypes = outputTypes.find("qfile");
584 if (itTypes != outputTypes.end()) {
585 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); }
588 itTypes = outputTypes.find("group");
589 if (itTypes != outputTypes.end()) {
590 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
593 itTypes = outputTypes.find("count");
594 if (itTypes != outputTypes.end()) {
595 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
598 m->mothurOutEndLine();
599 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
600 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
601 m->mothurOutEndLine();
606 catch(exception& e) {
607 m->errorOut(e, "TrimSeqsCommand", "execute");
612 /**************************************************************************************/
613 int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string trimFileName, string scrapFileName, string trimQFileName, string scrapQFileName, string trimNFileName, string scrapNFileName, string trimCFileName, string scrapCFileName, string groupFileName, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames, linePair line, linePair qline) {
617 ofstream trimFASTAFile;
618 m->openOutputFile(trimFileName, trimFASTAFile);
620 ofstream scrapFASTAFile;
621 m->openOutputFile(scrapFileName, scrapFASTAFile);
623 ofstream trimQualFile;
624 ofstream scrapQualFile;
626 m->openOutputFile(trimQFileName, trimQualFile);
627 m->openOutputFile(scrapQFileName, scrapQualFile);
630 ofstream trimNameFile;
631 ofstream scrapNameFile;
633 m->openOutputFile(trimNFileName, trimNameFile);
634 m->openOutputFile(scrapNFileName, scrapNameFile);
637 ofstream trimCountFile;
638 ofstream scrapCountFile;
640 m->openOutputFile(trimCFileName, trimCountFile);
641 m->openOutputFile(scrapCFileName, scrapCountFile);
642 if (line.start == 0) { trimCountFile << "Representative_Sequence\ttotal" << endl; scrapCountFile << "Representative_Sequence\ttotal" << endl; }
645 ofstream outGroupsFile;
646 if ((createGroup) && (countfile == "")){ m->openOutputFile(groupFileName, outGroupsFile); }
648 for (int i = 0; i < fastaFileNames.size(); i++) { //clears old file
649 for (int j = 0; j < fastaFileNames[i].size(); j++) { //clears old file
650 if (fastaFileNames[i][j] != "") {
652 m->openOutputFile(fastaFileNames[i][j], temp); temp.close();
654 m->openOutputFile(qualFileNames[i][j], temp); temp.close();
658 m->openOutputFile(nameFileNames[i][j], temp); temp.close();
666 m->openInputFile(filename, inFASTA);
667 inFASTA.seekg(line.start);
670 if(qFileName != "") {
671 m->openInputFile(qFileName, qFile);
672 qFile.seekg(qline.start);
677 int numBarcodes = barcodes.size();
678 TrimOligos* trimOligos = NULL;
679 if (pairedOligos) { trimOligos = new TrimOligos(pdiffs, bdiffs, 0, 0, pairedPrimers, pairedBarcodes); numBarcodes = pairedBarcodes.size(); }
680 else { trimOligos = new TrimOligos(pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, revPrimer, linker, spacer); }
682 TrimOligos* rtrimOligos = NULL;
684 //create reoriented primer and barcode pairs
685 map<int, oligosPair> rpairedPrimers, rpairedBarcodes;
686 for (map<int, oligosPair>::iterator it = pairedPrimers.begin(); it != pairedPrimers.end(); it++) {
687 cout << "primer " << (it->second).forward << '\t' << (it->second).reverse << '\t' << primerNameVector[it->first] << endl;
688 cout << "rprimer " << trimOligos->reverseOligo((it->second).reverse) << '\t' << (trimOligos->reverseOligo((it->second).forward)) << endl;
689 oligosPair tempPair(reverseOligo((it->second).reverse), (reverseOligo((it->second).forward))); //reversePrimer, rc ForwardPrimer
690 rpairedPrimers[it->first] = tempPair;
692 for (map<int, oligosPair>::iterator it = pairedBarcodes.begin(); it != pairedBarcodes.end(); it++) {
693 cout << "barcode " << (it->second).forward << '\t' << (it->second).reverse << '\t' << barcodeNameVector[it->first] << endl;
694 cout << "rbarcode " << trimOligos->reverseOligo((it->second).reverse) << '\t' << (trimOligos->reverseOligo((it->second).forward)) << endl;
695 oligosPair tempPair(reverseOligo((it->second).reverse), (reverseOligo((it->second).forward))); //reverseBarcode, rc ForwardBarcode
696 rpairedBarcodes[it->first] = tempPair;
698 rtrimOligos = new TrimOligos(pdiffs, bdiffs, 0, 0, rpairedPrimers, rpairedBarcodes); numBarcodes = rpairedBarcodes.size();
703 if (m->control_pressed) {
704 delete trimOligos; if (reorient) { delete rtrimOligos; }
705 inFASTA.close(); trimFASTAFile.close(); scrapFASTAFile.close();
706 if ((createGroup) && (countfile == "")) { outGroupsFile.close(); }
707 if(qFileName != "") { qFile.close(); scrapQualFile.close(); trimQualFile.close(); }
708 if(nameFile != "") { scrapNameFile.close(); trimNameFile.close(); }
709 if(countfile != "") { scrapCountFile.close(); trimCountFile.close(); }
710 for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0;
714 string trashCode = "";
715 int currentSeqsDiffs = 0;
717 Sequence currSeq(inFASTA); m->gobble(inFASTA);
718 //cout << currSeq.getName() << '\t' << currSeq.getUnaligned().length() << endl;
720 QualityScores currQual;
722 currQual = QualityScores(qFile); m->gobble(qFile);
723 //cout << currQual.getName() << endl;
726 string origSeq = currSeq.getUnaligned();
729 int barcodeIndex = 0;
733 success = trimOligos->stripLinker(currSeq, currQual);
734 if(success > ldiffs) { trashCode += 'k'; }
735 else{ currentSeqsDiffs += success; }
739 if(numBarcodes != 0){
740 success = trimOligos->stripBarcode(currSeq, currQual, barcodeIndex);
741 if(success > bdiffs) { trashCode += 'b'; }
742 else{ currentSeqsDiffs += success; }
746 success = trimOligos->stripSpacer(currSeq, currQual);
747 if(success > sdiffs) { trashCode += 's'; }
748 else{ currentSeqsDiffs += success; }
752 if(numFPrimers != 0){
753 success = trimOligos->stripForward(currSeq, currQual, primerIndex, keepforward);
754 if(success > pdiffs) { trashCode += 'f'; }
755 else{ currentSeqsDiffs += success; }
758 if (currentSeqsDiffs > tdiffs) { trashCode += 't'; }
760 if(numRPrimers != 0){
761 success = trimOligos->stripReverse(currSeq, currQual);
762 if(!success) { trashCode += 'r'; }
765 if (reorient && (trashCode != "")) { //if you failed and want to check the reverse
767 string thisTrashCode = "";
768 int thisCurrentSeqsDiffs = 0;
770 int thisBarcodeIndex = 0;
771 int thisPrimerIndex = 0;
773 if(numBarcodes != 0){
774 thisSuccess = rtrimOligos->stripBarcode(currSeq, currQual, thisBarcodeIndex);
775 if(thisSuccess > bdiffs) { thisTrashCode += 'b'; }
776 else{ thisCurrentSeqsDiffs += thisSuccess; }
779 if(numFPrimers != 0){
780 thisSuccess = rtrimOligos->stripForward(currSeq, currQual, thisPrimerIndex, keepforward);
781 if(thisSuccess > pdiffs) { thisTrashCode += 'f'; }
782 else{ thisCurrentSeqsDiffs += thisSuccess; }
785 if (thisCurrentSeqsDiffs > tdiffs) { thisTrashCode += 't'; }
787 if (thisTrashCode == "") {
788 trashCode = thisTrashCode;
789 success = thisSuccess;
790 currentSeqsDiffs = thisCurrentSeqsDiffs;
791 barcodeIndex = thisBarcodeIndex;
792 primerIndex = thisPrimerIndex;
793 currSeq.reverseComplement();
795 currQual.flipQScores();
801 success = keepFirstTrim(currSeq, currQual);
805 success = removeLastTrim(currSeq, currQual);
806 if(!success) { trashCode += 'l'; }
811 int origLength = currSeq.getNumBases();
813 if(qThreshold != 0) { success = currQual.stripQualThreshold(currSeq, qThreshold); }
814 else if(qAverage != 0) { success = currQual.cullQualAverage(currSeq, qAverage); }
815 else if(qRollAverage != 0) { success = currQual.stripQualRollingAverage(currSeq, qRollAverage); }
816 else if(qWindowAverage != 0){ success = currQual.stripQualWindowAverage(currSeq, qWindowStep, qWindowSize, qWindowAverage); }
817 else { success = 1; }
819 //you don't want to trim, if it fails above then scrap it
820 if ((!qtrim) && (origLength != currSeq.getNumBases())) { success = 0; }
822 if(!success) { trashCode += 'q'; }
825 if(minLength > 0 || maxLength > 0){
826 success = cullLength(currSeq);
827 if(!success) { trashCode += 'l'; }
830 success = cullHomoP(currSeq);
831 if(!success) { trashCode += 'h'; }
834 success = cullAmbigs(currSeq);
835 if(!success) { trashCode += 'n'; }
838 if(flip){ // should go last
839 currSeq.reverseComplement();
841 currQual.flipQScores();
845 if (m->debug) { m->mothurOut("[DEBUG]: " + currSeq.getName() + ", trashcode= " + trashCode); if (trashCode.length() != 0) { m->mothurOutEndLine(); } }
847 if(trashCode.length() == 0){
848 string thisGroup = "";
850 if(numBarcodes != 0){
851 thisGroup = barcodeNameVector[barcodeIndex];
852 if (numFPrimers != 0) {
853 if (primerNameVector[primerIndex] != "") {
854 if(thisGroup != "") {
855 thisGroup += "." + primerNameVector[primerIndex];
857 thisGroup = primerNameVector[primerIndex];
864 int pos = thisGroup.find("ignore");
865 if (pos == string::npos) {
866 currSeq.setAligned(currSeq.getUnaligned());
867 currSeq.printSequence(trimFASTAFile);
870 currQual.printQScores(trimQualFile);
875 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
876 if (itName != nameMap.end()) { trimNameFile << itName->first << '\t' << itName->second << endl; }
877 else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
880 int numRedundants = 0;
881 if (countfile != "") {
882 map<string, int>::iterator itCount = nameCount.find(currSeq.getName());
883 if (itCount != nameCount.end()) {
884 trimCountFile << itCount->first << '\t' << itCount->second << endl;
885 numRedundants = itCount->second-1;
886 }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your count file, please correct."); m->mothurOutEndLine(); }
890 if(numBarcodes != 0){
892 if (m->debug) { m->mothurOut(", group= " + thisGroup + "\n"); }
894 if (countfile == "") { outGroupsFile << currSeq.getName() << '\t' << thisGroup << endl; }
895 else { groupMap[currSeq.getName()] = thisGroup; }
897 if (nameFile != "") {
898 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
899 if (itName != nameMap.end()) {
900 vector<string> thisSeqsNames;
901 m->splitAtChar(itName->second, thisSeqsNames, ',');
902 numRedundants = thisSeqsNames.size()-1; //we already include ourselves below
903 for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self
904 outGroupsFile << thisSeqsNames[k] << '\t' << thisGroup << endl;
906 }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
909 map<string, int>::iterator it = groupCounts.find(thisGroup);
910 if (it == groupCounts.end()) { groupCounts[thisGroup] = 1 + numRedundants; }
911 else { groupCounts[it->first] += (1 + numRedundants); }
918 m->openOutputFileAppend(fastaFileNames[barcodeIndex][primerIndex], output);
919 currSeq.printSequence(output);
923 m->openOutputFileAppend(qualFileNames[barcodeIndex][primerIndex], output);
924 currQual.printQScores(output);
929 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
930 if (itName != nameMap.end()) {
931 m->openOutputFileAppend(nameFileNames[barcodeIndex][primerIndex], output);
932 output << itName->first << '\t' << itName->second << endl;
934 }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
940 if(nameFile != ""){ //needs to be before the currSeq name is changed
941 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
942 if (itName != nameMap.end()) { scrapNameFile << itName->first << '\t' << itName->second << endl; }
943 else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
945 if (countfile != "") {
946 map<string, int>::iterator itCount = nameCount.find(currSeq.getName());
947 if (itCount != nameCount.end()) {
948 trimCountFile << itCount->first << '\t' << itCount->second << endl;
949 }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your count file, please correct."); m->mothurOutEndLine(); }
952 currSeq.setName(currSeq.getName() + '|' + trashCode);
953 currSeq.setUnaligned(origSeq);
954 currSeq.setAligned(origSeq);
955 currSeq.printSequence(scrapFASTAFile);
957 currQual.printQScores(scrapQualFile);
963 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
964 unsigned long long pos = inFASTA.tellg();
965 if ((pos == -1) || (pos >= line.end)) { break; }
968 if (inFASTA.eof()) { break; }
972 if((count) % 1000 == 0){ m->mothurOut(toString(count)); m->mothurOutEndLine(); }
976 if((count) % 1000 != 0){ m->mothurOut(toString(count)); m->mothurOutEndLine(); }
979 if (reorient) { delete rtrimOligos; }
981 trimFASTAFile.close();
982 scrapFASTAFile.close();
983 if (createGroup) { outGroupsFile.close(); }
984 if(qFileName != "") { qFile.close(); scrapQualFile.close(); trimQualFile.close(); }
985 if(nameFile != "") { scrapNameFile.close(); trimNameFile.close(); }
986 if(countfile != "") { scrapCountFile.close(); trimCountFile.close(); }
990 catch(exception& e) {
991 m->errorOut(e, "TrimSeqsCommand", "driverCreateTrim");
996 /**************************************************************************************************/
998 int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName, string trimFASTAFileName, string scrapFASTAFileName, string trimQualFileName, string scrapQualFileName, string trimNameFileName, string scrapNameFileName, string trimCountFileName, string scrapCountFileName, string groupFile, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames) {
1002 int exitCommand = 1;
1005 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1006 //loop through and create all the processes you want
1007 while (process != processors) {
1011 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
1013 }else if (pid == 0){
1015 vector<vector<string> > tempFASTAFileNames = fastaFileNames;
1016 vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
1017 vector<vector<string> > tempNameFileNames = nameFileNames;
1022 for(int i=0;i<tempFASTAFileNames.size();i++){
1023 for(int j=0;j<tempFASTAFileNames[i].size();j++){
1024 if (tempFASTAFileNames[i][j] != "") {
1025 tempFASTAFileNames[i][j] += toString(getpid()) + ".temp";
1026 m->openOutputFile(tempFASTAFileNames[i][j], temp); temp.close();
1028 if(qFileName != ""){
1029 tempPrimerQualFileNames[i][j] += toString(getpid()) + ".temp";
1030 m->openOutputFile(tempPrimerQualFileNames[i][j], temp); temp.close();
1033 tempNameFileNames[i][j] += toString(getpid()) + ".temp";
1034 m->openOutputFile(tempNameFileNames[i][j], temp); temp.close();
1041 driverCreateTrim(filename,
1043 (trimFASTAFileName + toString(getpid()) + ".temp"),
1044 (scrapFASTAFileName + toString(getpid()) + ".temp"),
1045 (trimQualFileName + toString(getpid()) + ".temp"),
1046 (scrapQualFileName + toString(getpid()) + ".temp"),
1047 (trimNameFileName + toString(getpid()) + ".temp"),
1048 (scrapNameFileName + toString(getpid()) + ".temp"),
1049 (trimCountFileName + toString(getpid()) + ".temp"),
1050 (scrapCountFileName + toString(getpid()) + ".temp"),
1051 (groupFile + toString(getpid()) + ".temp"),
1053 tempPrimerQualFileNames,
1058 if (m->debug) { m->mothurOut("[DEBUG]: " + toString(lines[process].start) + '\t' + toString(qLines[process].start) + '\t' + toString(getpid()) + '\n'); }
1060 //pass groupCounts to parent
1063 string tempFile = filename + toString(getpid()) + ".num.temp";
1064 m->openOutputFile(tempFile, out);
1066 out << groupCounts.size() << endl;
1068 for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
1069 out << it->first << '\t' << it->second << endl;
1072 out << groupMap.size() << endl;
1073 for (map<string, string>::iterator it = groupMap.begin(); it != groupMap.end(); it++) {
1074 out << it->first << '\t' << it->second << endl;
1080 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
1081 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
1088 m->openOutputFile(trimFASTAFileName, temp); temp.close();
1089 m->openOutputFile(scrapFASTAFileName, temp); temp.close();
1090 if(qFileName != ""){
1091 m->openOutputFile(trimQualFileName, temp); temp.close();
1092 m->openOutputFile(scrapQualFileName, temp); temp.close();
1094 if (nameFile != "") {
1095 m->openOutputFile(trimNameFileName, temp); temp.close();
1096 m->openOutputFile(scrapNameFileName, temp); temp.close();
1098 if (countfile != "") {
1099 m->openOutputFile(trimCountFileName, temp); temp.close();
1100 m->openOutputFile(scrapCountFileName, temp); temp.close();
1103 driverCreateTrim(filename, qFileName, trimFASTAFileName, scrapFASTAFileName, trimQualFileName, scrapQualFileName, trimNameFileName, scrapNameFileName, trimCountFileName, scrapCountFileName, groupFile, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
1105 //force parent to wait until all the processes are done
1106 for (int i=0;i<processIDS.size();i++) {
1107 int temp = processIDS[i];
1111 //////////////////////////////////////////////////////////////////////////////////////////////////////
1112 //Windows version shared memory, so be careful when passing variables through the trimData struct.
1113 //Above fork() will clone, so memory is separate, but that's not the case with windows,
1114 //////////////////////////////////////////////////////////////////////////////////////////////////////
1116 vector<trimData*> pDataArray;
1117 DWORD dwThreadIdArray[processors-1];
1118 HANDLE hThreadArray[processors-1];
1120 //Create processor worker threads.
1121 for( int h=0; h<processors-1; h++){
1123 string extension = "";
1124 if (h != 0) { extension = toString(h) + ".temp"; processIDS.push_back(h); }
1125 vector<vector<string> > tempFASTAFileNames = fastaFileNames;
1126 vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
1127 vector<vector<string> > tempNameFileNames = nameFileNames;
1132 for(int i=0;i<tempFASTAFileNames.size();i++){
1133 for(int j=0;j<tempFASTAFileNames[i].size();j++){
1134 if (tempFASTAFileNames[i][j] != "") {
1135 tempFASTAFileNames[i][j] += extension;
1136 m->openOutputFile(tempFASTAFileNames[i][j], temp); temp.close();
1138 if(qFileName != ""){
1139 tempPrimerQualFileNames[i][j] += extension;
1140 m->openOutputFile(tempPrimerQualFileNames[i][j], temp); temp.close();
1143 tempNameFileNames[i][j] += extension;
1144 m->openOutputFile(tempNameFileNames[i][j], temp); temp.close();
1152 trimData* tempTrim = new trimData(filename,
1153 qFileName, nameFile, countfile,
1154 (trimFASTAFileName+extension),
1155 (scrapFASTAFileName+extension),
1156 (trimQualFileName+extension),
1157 (scrapQualFileName+extension),
1158 (trimNameFileName+extension),
1159 (scrapNameFileName+extension),
1160 (trimCountFileName+extension),
1161 (scrapCountFileName+extension),
1162 (groupFile+extension),
1164 tempPrimerQualFileNames,
1166 lines[h].start, lines[h].end, qLines[h].start, qLines[h].end, m,
1167 pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, primers, barcodes, revPrimer, linker, spacer, pairedBarcodes, pairedPrimers, pairedOligos,
1168 primerNameVector, barcodeNameVector, createGroup, allFiles, keepforward, keepFirst, removeLast,
1169 qWindowStep, qWindowSize, qWindowAverage, qtrim, qThreshold, qAverage, qRollAverage,
1170 minLength, maxAmbig, maxHomoP, maxLength, flip, reorient, nameMap, nameCount);
1171 pDataArray.push_back(tempTrim);
1173 hThreadArray[h] = CreateThread(NULL, 0, MyTrimThreadFunction, pDataArray[h], 0, &dwThreadIdArray[h]);
1178 m->openOutputFile(trimFASTAFileName, temp); temp.close();
1179 m->openOutputFile(scrapFASTAFileName, temp); temp.close();
1180 if(qFileName != ""){
1181 m->openOutputFile(trimQualFileName, temp); temp.close();
1182 m->openOutputFile(scrapQualFileName, temp); temp.close();
1184 if (nameFile != "") {
1185 m->openOutputFile(trimNameFileName, temp); temp.close();
1186 m->openOutputFile(scrapNameFileName, temp); temp.close();
1188 vector<vector<string> > tempFASTAFileNames = fastaFileNames;
1189 vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
1190 vector<vector<string> > tempNameFileNames = nameFileNames;
1193 string extension = toString(processors-1) + ".temp";
1194 for(int i=0;i<tempFASTAFileNames.size();i++){
1195 for(int j=0;j<tempFASTAFileNames[i].size();j++){
1196 if (tempFASTAFileNames[i][j] != "") {
1197 tempFASTAFileNames[i][j] += extension;
1198 m->openOutputFile(tempFASTAFileNames[i][j], temp); temp.close();
1200 if(qFileName != ""){
1201 tempPrimerQualFileNames[i][j] += extension;
1202 m->openOutputFile(tempPrimerQualFileNames[i][j], temp); temp.close();
1205 tempNameFileNames[i][j] += extension;
1206 m->openOutputFile(tempNameFileNames[i][j], temp); temp.close();
1213 driverCreateTrim(filename, qFileName, (trimFASTAFileName + toString(processors-1) + ".temp"), (scrapFASTAFileName + toString(processors-1) + ".temp"), (trimQualFileName + toString(processors-1) + ".temp"), (scrapQualFileName + toString(processors-1) + ".temp"), (trimNameFileName + toString(processors-1) + ".temp"), (scrapNameFileName + toString(processors-1) + ".temp"), (trimCountFileName + toString(processors-1) + ".temp"), (scrapCountFileName + toString(processors-1) + ".temp"), (groupFile + toString(processors-1) + ".temp"), tempFASTAFileNames, tempPrimerQualFileNames, tempNameFileNames, lines[processors-1], qLines[processors-1]);
1214 processIDS.push_back(processors-1);
1217 //Wait until all threads have terminated.
1218 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
1220 //Close all thread handles and free memory allocations.
1221 for(int i=0; i < pDataArray.size(); i++){
1222 if (pDataArray[i]->count != pDataArray[i]->lineEnd) {
1223 m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of " + toString(pDataArray[i]->lineEnd) + " sequences assigned to it, quitting. \n"); m->control_pressed = true;
1225 for (map<string, int>::iterator it = pDataArray[i]->groupCounts.begin(); it != pDataArray[i]->groupCounts.end(); it++) {
1226 map<string, int>::iterator it2 = groupCounts.find(it->first);
1227 if (it2 == groupCounts.end()) { groupCounts[it->first] = it->second; }
1228 else { groupCounts[it->first] += it->second; }
1230 for (map<string, string>::iterator it = pDataArray[i]->groupMap.begin(); it != pDataArray[i]->groupMap.end(); it++) {
1231 map<string, string>::iterator it2 = groupMap.find(it->first);
1232 if (it2 == groupMap.end()) { groupMap[it->first] = it->second; }
1233 else { m->mothurOut("[ERROR]: " + it->first + " is in your fasta file more than once. Sequence names must be unique. please correct.\n"); }
1235 CloseHandle(hThreadArray[i]);
1236 delete pDataArray[i];
1243 for(int i=0;i<processIDS.size();i++){
1245 m->mothurOut("Appending files from process " + toString(processIDS[i])); m->mothurOutEndLine();
1247 m->appendFiles((trimFASTAFileName + toString(processIDS[i]) + ".temp"), trimFASTAFileName);
1248 m->mothurRemove((trimFASTAFileName + toString(processIDS[i]) + ".temp"));
1249 m->appendFiles((scrapFASTAFileName + toString(processIDS[i]) + ".temp"), scrapFASTAFileName);
1250 m->mothurRemove((scrapFASTAFileName + toString(processIDS[i]) + ".temp"));
1252 if(qFileName != ""){
1253 m->appendFiles((trimQualFileName + toString(processIDS[i]) + ".temp"), trimQualFileName);
1254 m->mothurRemove((trimQualFileName + toString(processIDS[i]) + ".temp"));
1255 m->appendFiles((scrapQualFileName + toString(processIDS[i]) + ".temp"), scrapQualFileName);
1256 m->mothurRemove((scrapQualFileName + toString(processIDS[i]) + ".temp"));
1260 m->appendFiles((trimNameFileName + toString(processIDS[i]) + ".temp"), trimNameFileName);
1261 m->mothurRemove((trimNameFileName + toString(processIDS[i]) + ".temp"));
1262 m->appendFiles((scrapNameFileName + toString(processIDS[i]) + ".temp"), scrapNameFileName);
1263 m->mothurRemove((scrapNameFileName + toString(processIDS[i]) + ".temp"));
1266 if(countfile != ""){
1267 m->appendFiles((trimCountFileName + toString(processIDS[i]) + ".temp"), trimCountFileName);
1268 m->mothurRemove((trimCountFileName + toString(processIDS[i]) + ".temp"));
1269 m->appendFiles((scrapCountFileName + toString(processIDS[i]) + ".temp"), scrapCountFileName);
1270 m->mothurRemove((scrapCountFileName + toString(processIDS[i]) + ".temp"));
1273 if((createGroup)&&(countfile == "")){
1274 m->appendFiles((groupFile + toString(processIDS[i]) + ".temp"), groupFile);
1275 m->mothurRemove((groupFile + toString(processIDS[i]) + ".temp"));
1280 for(int j=0;j<fastaFileNames.size();j++){
1281 for(int k=0;k<fastaFileNames[j].size();k++){
1282 if (fastaFileNames[j][k] != "") {
1283 m->appendFiles((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"), fastaFileNames[j][k]);
1284 m->mothurRemove((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"));
1286 if(qFileName != ""){
1287 m->appendFiles((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"), qualFileNames[j][k]);
1288 m->mothurRemove((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"));
1292 m->appendFiles((nameFileNames[j][k] + toString(processIDS[i]) + ".temp"), nameFileNames[j][k]);
1293 m->mothurRemove((nameFileNames[j][k] + toString(processIDS[i]) + ".temp"));
1300 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1303 string tempFile = filename + toString(processIDS[i]) + ".num.temp";
1304 m->openInputFile(tempFile, in);
1308 in >> tempNum; m->gobble(in);
1311 for (int i = 0; i < tempNum; i++) {
1313 in >> group >> groupNum; m->gobble(in);
1315 map<string, int>::iterator it = groupCounts.find(group);
1316 if (it == groupCounts.end()) { groupCounts[group] = groupNum; }
1317 else { groupCounts[it->first] += groupNum; }
1320 in >> tempNum; m->gobble(in);
1322 for (int i = 0; i < tempNum; i++) {
1323 string group, seqName;
1324 in >> seqName >> group; m->gobble(in);
1326 map<string, string>::iterator it = groupMap.find(seqName);
1327 if (it == groupMap.end()) { groupMap[seqName] = group; }
1328 else { m->mothurOut("[ERROR]: " + seqName + " is in your fasta file more than once. Sequence names must be unique. please correct.\n"); }
1332 in.close(); m->mothurRemove(tempFile);
1339 catch(exception& e) {
1340 m->errorOut(e, "TrimSeqsCommand", "createProcessesCreateTrim");
1345 /**************************************************************************************************/
1347 int TrimSeqsCommand::setLines(string filename, string qfilename) {
1350 vector<unsigned long long> fastaFilePos;
1351 vector<unsigned long long> qfileFilePos;
1353 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1354 //set file positions for fasta file
1355 fastaFilePos = m->divideFile(filename, processors);
1357 //get name of first sequence in each chunk
1358 map<string, int> firstSeqNames;
1359 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
1361 m->openInputFile(filename, in);
1362 in.seekg(fastaFilePos[i]);
1365 firstSeqNames[temp.getName()] = i;
1370 if(qfilename != "") {
1371 //seach for filePos of each first name in the qfile and save in qfileFilePos
1373 m->openInputFile(qfilename, inQual);
1376 while(!inQual.eof()){
1377 input = m->getline(inQual);
1379 if (input.length() != 0) {
1380 if(input[0] == '>'){ //this is a sequence name line
1381 istringstream nameStream(input);
1383 string sname = ""; nameStream >> sname;
1384 sname = sname.substr(1);
1386 for (int i = 0; i < sname.length(); i++) {
1387 if (sname[i] == ':') { sname[i] = '_'; m->changedSeqNames = true; }
1390 map<string, int>::iterator it = firstSeqNames.find(sname);
1392 if(it != firstSeqNames.end()) { //this is the start of a new chunk
1393 unsigned long long pos = inQual.tellg();
1394 qfileFilePos.push_back(pos - input.length() - 1);
1395 firstSeqNames.erase(it);
1400 if (firstSeqNames.size() == 0) { break; }
1405 if (firstSeqNames.size() != 0) {
1406 for (map<string, int>::iterator it = firstSeqNames.begin(); it != firstSeqNames.end(); it++) {
1407 m->mothurOut(it->first + " is in your fasta file and not in your quality file, not using quality file."); m->mothurOutEndLine();
1413 //get last file position of qfile
1415 unsigned long long size;
1417 //get num bytes in file
1418 pFile = fopen (qfilename.c_str(),"rb");
1419 if (pFile==NULL) perror ("Error opening file");
1421 fseek (pFile, 0, SEEK_END);
1426 qfileFilePos.push_back(size);
1429 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
1430 if (m->debug) { m->mothurOut("[DEBUG]: " + toString(i) +'\t' + toString(fastaFilePos[i]) + '\t' + toString(fastaFilePos[i+1]) + '\n'); }
1431 lines.push_back(linePair(fastaFilePos[i], fastaFilePos[(i+1)]));
1432 if (qfilename != "") { qLines.push_back(linePair(qfileFilePos[i], qfileFilePos[(i+1)])); }
1434 if(qfilename == "") { qLines = lines; } //files with duds
1440 if (processors == 1) { //save time
1441 //fastaFilePos.push_back(0); qfileFilePos.push_back(0);
1442 //fastaFilePos.push_back(1000); qfileFilePos.push_back(1000);
1443 lines.push_back(linePair(0, 1000));
1444 if (qfilename != "") { qLines.push_back(linePair(0, 1000)); }
1446 int numFastaSeqs = 0;
1447 fastaFilePos = m->setFilePosFasta(filename, numFastaSeqs);
1448 if (fastaFilePos.size() < processors) { processors = fastaFilePos.size(); }
1450 if (qfilename != "") {
1451 int numQualSeqs = 0;
1452 qfileFilePos = m->setFilePosFasta(qfilename, numQualSeqs);
1454 if (numFastaSeqs != numQualSeqs) {
1455 m->mothurOut("[ERROR]: You have " + toString(numFastaSeqs) + " sequences in your fasta file, but " + toString(numQualSeqs) + " sequences in your quality file."); m->mothurOutEndLine(); m->control_pressed = true;
1459 //figure out how many sequences you have to process
1460 int numSeqsPerProcessor = numFastaSeqs / processors;
1461 for (int i = 0; i < processors; i++) {
1462 int startIndex = i * numSeqsPerProcessor;
1463 if(i == (processors - 1)){ numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor; }
1464 lines.push_back(linePair(fastaFilePos[startIndex], numSeqsPerProcessor));
1465 if (qfilename != "") { qLines.push_back(linePair(qfileFilePos[startIndex], numSeqsPerProcessor)); }
1468 if(qfilename == "") { qLines = lines; } //files with duds
1473 catch(exception& e) {
1474 m->errorOut(e, "TrimSeqsCommand", "setLines");
1479 //***************************************************************************************************************
1481 bool TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<vector<string> >& qualFileNames, vector<vector<string> >& nameFileNames){
1484 m->openInputFile(oligoFile, inOligos);
1488 string type, oligo, roligo, group;
1489 bool hasPrimer = false; bool hasPairedBarcodes = false;
1491 int indexPrimer = 0;
1492 int indexBarcode = 0;
1493 int indexPairedPrimer = 0;
1494 int indexPairedBarcode = 0;
1495 set<string> uniquePrimers;
1496 set<string> uniqueBarcodes;
1498 while(!inOligos.eof()){
1502 if (m->debug) { m->mothurOut("[DEBUG]: reading type - " + type + ".\n"); }
1505 while (!inOligos.eof()) { char c = inOligos.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there
1506 m->gobble(inOligos);
1509 m->gobble(inOligos);
1510 //make type case insensitive
1511 for(int i=0;i<type.length();i++){ type[i] = toupper(type[i]); }
1515 if (m->debug) { m->mothurOut("[DEBUG]: reading - " + oligo + ".\n"); }
1517 for(int i=0;i<oligo.length();i++){
1518 oligo[i] = toupper(oligo[i]);
1519 if(oligo[i] == 'U') { oligo[i] = 'T'; }
1522 if(type == "FORWARD"){
1525 // get rest of line in case there is a primer name
1526 while (!inOligos.eof()) {
1527 char c = inOligos.get();
1528 if (c == 10 || c == 13){ break; }
1529 else if (c == 32 || c == 9){;} //space or tab
1530 else { group += c; }
1533 //check for repeat barcodes
1534 map<string, int>::iterator itPrime = primers.find(oligo);
1535 if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); }
1537 if (m->debug) { if (group != "") { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); }else{ m->mothurOut("[DEBUG]: no group for primer " + oligo + ".\n"); } }
1539 primers[oligo]=indexPrimer; indexPrimer++;
1540 primerNameVector.push_back(group);
1542 else if (type == "PRIMER"){
1543 m->gobble(inOligos);
1547 for(int i=0;i<roligo.length();i++){
1548 roligo[i] = toupper(roligo[i]);
1549 if(roligo[i] == 'U') { roligo[i] = 'T'; }
1551 roligo = reverseOligo(roligo);
1555 // get rest of line in case there is a primer name
1556 while (!inOligos.eof()) {
1557 char c = inOligos.get();
1558 if (c == 10 || c == 13){ break; }
1559 else if (c == 32 || c == 9){;} //space or tab
1560 else { group += c; }
1563 oligosPair newPrimer(oligo, roligo);
1565 //check for repeat barcodes
1566 string tempPair = oligo+roligo;
1567 if (uniquePrimers.count(tempPair) != 0) { m->mothurOut("primer pair " + newPrimer.forward + " " + newPrimer.reverse + " is in your oligos file already."); m->mothurOutEndLine(); }
1568 else { uniquePrimers.insert(tempPair); }
1570 if (m->debug) { if (group != "") { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); }else{ m->mothurOut("[DEBUG]: no group for primer pair " + newPrimer.forward + " " + newPrimer.reverse + ".\n"); } }
1572 pairedPrimers[indexPairedPrimer]=newPrimer; indexPairedPrimer++;
1573 primerNameVector.push_back(group);
1576 else if(type == "REVERSE"){
1577 //Sequence oligoRC("reverse", oligo);
1578 //oligoRC.reverseComplement();
1579 string oligoRC = reverseOligo(oligo);
1580 revPrimer.push_back(oligoRC);
1582 else if(type == "BARCODE"){
1585 //barcode lines can look like BARCODE atgcatgc groupName - for 454 seqs
1586 //or BARCODE atgcatgc atgcatgc groupName - for illumina data that has forward and reverse info
1589 while (!inOligos.eof()) {
1590 char c = inOligos.get();
1591 if (c == 10 || c == 13){ break; }
1592 else if (c == 32 || c == 9){;} //space or tab
1596 //then this is illumina data with 4 columns
1598 hasPairedBarcodes = true;
1599 string reverseBarcode = group; //reverseOligo(group); //reverse barcode
1602 for(int i=0;i<reverseBarcode.length();i++){
1603 reverseBarcode[i] = toupper(reverseBarcode[i]);
1604 if(reverseBarcode[i] == 'U') { reverseBarcode[i] = 'T'; }
1607 oligosPair newPair(oligo, reverseOligo(reverseBarcode));
1609 if (m->debug) { m->mothurOut("[DEBUG]: barcode pair " + newPair.forward + " " + newPair.reverse + ", and group = " + group + ".\n"); }
1611 //check for repeat barcodes
1612 string tempPair = oligo+reverseBarcode;
1613 if (uniqueBarcodes.count(tempPair) != 0) { m->mothurOut("barcode pair " + newPair.forward + " " + newPair.reverse + " is in your oligos file already, disregarding."); m->mothurOutEndLine(); }
1614 else { uniqueBarcodes.insert(tempPair); }
1616 pairedBarcodes[indexPairedBarcode]=newPair; indexPairedBarcode++;
1617 barcodeNameVector.push_back(group);
1619 //check for repeat barcodes
1620 map<string, int>::iterator itBar = barcodes.find(oligo);
1621 if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); }
1623 barcodes[oligo]=indexBarcode; indexBarcode++;
1624 barcodeNameVector.push_back(group);
1626 }else if(type == "LINKER"){
1627 linker.push_back(oligo);
1628 }else if(type == "SPACER"){
1629 spacer.push_back(oligo);
1631 else{ m->mothurOut("[WARNING]: " + type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); }
1633 m->gobble(inOligos);
1637 if (hasPairedBarcodes || hasPrimer) {
1638 pairedOligos = true;
1639 if ((primers.size() != 0) || (barcodes.size() != 0) || (linker.size() != 0) || (spacer.size() != 0) || (revPrimer.size() != 0)) { m->control_pressed = true; m->mothurOut("[ERROR]: cannot mix paired primers and barcodes with non paired or linkers and spacers, quitting."); m->mothurOutEndLine(); return 0; }
1642 if(barcodeNameVector.size() == 0 && primerNameVector[0] == ""){ allFiles = 0; }
1644 //add in potential combos
1645 if(barcodeNameVector.size() == 0){
1647 barcodeNameVector.push_back("");
1650 if(primerNameVector.size() == 0){
1652 primerNameVector.push_back("");
1655 fastaFileNames.resize(barcodeNameVector.size());
1656 for(int i=0;i<fastaFileNames.size();i++){
1657 fastaFileNames[i].assign(primerNameVector.size(), "");
1659 if(qFileName != "") { qualFileNames = fastaFileNames; }
1660 if(nameFile != "") { nameFileNames = fastaFileNames; }
1663 set<string> uniqueNames; //used to cleanup outputFileNames
1665 for(map<int, oligosPair>::iterator itBar = pairedBarcodes.begin();itBar != pairedBarcodes.end();itBar++){
1666 for(map<int, oligosPair>::iterator itPrimer = pairedPrimers.begin();itPrimer != pairedPrimers.end(); itPrimer++){
1668 string primerName = primerNameVector[itPrimer->first];
1669 string barcodeName = barcodeNameVector[itBar->first];
1671 if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing
1673 string comboGroupName = "";
1674 string fastaFileName = "";
1675 string qualFileName = "";
1676 string nameFileName = "";
1677 string countFileName = "";
1679 if(primerName == ""){
1680 comboGroupName = barcodeNameVector[itBar->first];
1683 if(barcodeName == ""){
1684 comboGroupName = primerNameVector[itPrimer->first];
1687 comboGroupName = barcodeNameVector[itBar->first] + "." + primerNameVector[itPrimer->first];
1693 map<string, string> variables;
1694 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFile));
1695 variables["[tag]"] = comboGroupName;
1696 fastaFileName = getOutputFileName("fasta", variables);
1697 if (uniqueNames.count(fastaFileName) == 0) {
1698 outputNames.push_back(fastaFileName);
1699 outputTypes["fasta"].push_back(fastaFileName);
1700 uniqueNames.insert(fastaFileName);
1703 fastaFileNames[itBar->first][itPrimer->first] = fastaFileName;
1704 m->openOutputFile(fastaFileName, temp); temp.close();
1706 if(qFileName != ""){
1707 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(qFileName));
1708 qualFileName = getOutputFileName("qfile", variables);
1709 if (uniqueNames.count(qualFileName) == 0) {
1710 outputNames.push_back(qualFileName);
1711 outputTypes["qfile"].push_back(qualFileName);
1714 qualFileNames[itBar->first][itPrimer->first] = qualFileName;
1715 m->openOutputFile(qualFileName, temp); temp.close();
1719 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(nameFile));
1720 nameFileName = getOutputFileName("name", variables);
1721 if (uniqueNames.count(nameFileName) == 0) {
1722 outputNames.push_back(nameFileName);
1723 outputTypes["name"].push_back(nameFileName);
1726 nameFileNames[itBar->first][itPrimer->first] = nameFileName;
1727 m->openOutputFile(nameFileName, temp); temp.close();
1733 for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
1734 for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
1736 string primerName = primerNameVector[itPrimer->second];
1737 string barcodeName = barcodeNameVector[itBar->second];
1739 if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing
1741 string comboGroupName = "";
1742 string fastaFileName = "";
1743 string qualFileName = "";
1744 string nameFileName = "";
1745 string countFileName = "";
1747 if(primerName == ""){
1748 comboGroupName = barcodeNameVector[itBar->second];
1751 if(barcodeName == ""){
1752 comboGroupName = primerNameVector[itPrimer->second];
1755 comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second];
1761 map<string, string> variables;
1762 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFile));
1763 variables["[tag]"] = comboGroupName;
1764 fastaFileName = getOutputFileName("fasta", variables);
1765 if (uniqueNames.count(fastaFileName) == 0) {
1766 outputNames.push_back(fastaFileName);
1767 outputTypes["fasta"].push_back(fastaFileName);
1768 uniqueNames.insert(fastaFileName);
1771 fastaFileNames[itBar->second][itPrimer->second] = fastaFileName;
1772 m->openOutputFile(fastaFileName, temp); temp.close();
1774 if(qFileName != ""){
1775 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(qFileName));
1776 qualFileName = getOutputFileName("qfile", variables);
1777 if (uniqueNames.count(qualFileName) == 0) {
1778 outputNames.push_back(qualFileName);
1779 outputTypes["qfile"].push_back(qualFileName);
1782 qualFileNames[itBar->second][itPrimer->second] = qualFileName;
1783 m->openOutputFile(qualFileName, temp); temp.close();
1787 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(nameFile));
1788 nameFileName = getOutputFileName("name", variables);
1789 if (uniqueNames.count(nameFileName) == 0) {
1790 outputNames.push_back(nameFileName);
1791 outputTypes["name"].push_back(nameFileName);
1794 nameFileNames[itBar->second][itPrimer->second] = nameFileName;
1795 m->openOutputFile(nameFileName, temp); temp.close();
1802 numFPrimers = primers.size();
1803 if (pairedOligos) { numFPrimers = pairedPrimers.size(); }
1804 numRPrimers = revPrimer.size();
1805 numLinkers = linker.size();
1806 numSpacers = spacer.size();
1808 bool allBlank = true;
1809 for (int i = 0; i < barcodeNameVector.size(); i++) {
1810 if (barcodeNameVector[i] != "") {
1815 for (int i = 0; i < primerNameVector.size(); i++) {
1816 if (primerNameVector[i] != "") {
1823 m->mothurOut("[WARNING]: your oligos file does not contain any group names. mothur will not create a groupfile."); m->mothurOutEndLine();
1831 catch(exception& e) {
1832 m->errorOut(e, "TrimSeqsCommand", "getOligos");
1836 //***************************************************************************************************************
1838 bool TrimSeqsCommand::keepFirstTrim(Sequence& sequence, QualityScores& qscores){
1841 if(qscores.getName() != ""){
1842 qscores.trimQScores(-1, keepFirst);
1844 sequence.trim(keepFirst);
1847 catch(exception& e) {
1848 m->errorOut(e, "keepFirstTrim", "countDiffs");
1854 //***************************************************************************************************************
1856 bool TrimSeqsCommand::removeLastTrim(Sequence& sequence, QualityScores& qscores){
1860 int length = sequence.getNumBases() - removeLast;
1863 if(qscores.getName() != ""){
1864 qscores.trimQScores(-1, length);
1866 sequence.trim(length);
1875 catch(exception& e) {
1876 m->errorOut(e, "removeLastTrim", "countDiffs");
1882 //***************************************************************************************************************
1884 bool TrimSeqsCommand::cullLength(Sequence& seq){
1887 int length = seq.getNumBases();
1888 bool success = 0; //guilty until proven innocent
1890 if(length >= minLength && maxLength == 0) { success = 1; }
1891 else if(length >= minLength && length <= maxLength) { success = 1; }
1892 else { success = 0; }
1897 catch(exception& e) {
1898 m->errorOut(e, "TrimSeqsCommand", "cullLength");
1904 //***************************************************************************************************************
1906 bool TrimSeqsCommand::cullHomoP(Sequence& seq){
1908 int longHomoP = seq.getLongHomoPolymer();
1909 bool success = 0; //guilty until proven innocent
1911 if(longHomoP <= maxHomoP){ success = 1; }
1912 else { success = 0; }
1916 catch(exception& e) {
1917 m->errorOut(e, "TrimSeqsCommand", "cullHomoP");
1922 //********************************************************************/
1923 string TrimSeqsCommand::reverseOligo(string oligo){
1925 string reverse = "";
1927 for(int i=oligo.length()-1;i>=0;i--){
1929 if(oligo[i] == 'A') { reverse += 'T'; }
1930 else if(oligo[i] == 'T'){ reverse += 'A'; }
1931 else if(oligo[i] == 'U'){ reverse += 'A'; }
1933 else if(oligo[i] == 'G'){ reverse += 'C'; }
1934 else if(oligo[i] == 'C'){ reverse += 'G'; }
1936 else if(oligo[i] == 'R'){ reverse += 'Y'; }
1937 else if(oligo[i] == 'Y'){ reverse += 'R'; }
1939 else if(oligo[i] == 'M'){ reverse += 'K'; }
1940 else if(oligo[i] == 'K'){ reverse += 'M'; }
1942 else if(oligo[i] == 'W'){ reverse += 'W'; }
1943 else if(oligo[i] == 'S'){ reverse += 'S'; }
1945 else if(oligo[i] == 'B'){ reverse += 'V'; }
1946 else if(oligo[i] == 'V'){ reverse += 'B'; }
1948 else if(oligo[i] == 'D'){ reverse += 'H'; }
1949 else if(oligo[i] == 'H'){ reverse += 'D'; }
1951 else { reverse += 'N'; }
1957 catch(exception& e) {
1958 m->errorOut(e, "TrimSeqsCommand", "reverseOligo");
1963 //***************************************************************************************************************
1965 bool TrimSeqsCommand::cullAmbigs(Sequence& seq){
1967 int numNs = seq.getAmbigBases();
1968 bool success = 0; //guilty until proven innocent
1970 if(numNs <= maxAmbig) { success = 1; }
1971 else { success = 0; }
1975 catch(exception& e) {
1976 m->errorOut(e, "TrimSeqsCommand", "cullAmbigs");
1981 //***************************************************************************************************************