5 * Created by Pat Schloss on 6/6/09.
6 * Copyright 2009 Patrick D. Schloss. All rights reserved.
10 #include "trimseqscommand.h"
11 #include "needlemanoverlap.hpp"
12 #include "trimoligos.h"
15 //**********************************************************************************************************************
16 vector<string> TrimSeqsCommand::setParameters(){
18 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","fasta",false,true,true); parameters.push_back(pfasta);
19 CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none","group",false,false,true); parameters.push_back(poligos);
20 CommandParameter pqfile("qfile", "InputTypes", "", "", "none", "none", "none","qfile",false,false,true); parameters.push_back(pqfile);
21 CommandParameter pname("name", "InputTypes", "", "", "namecount", "none", "none","name",false,false,true); parameters.push_back(pname);
22 CommandParameter pcount("count", "InputTypes", "", "", "namecount", "none", "none","count",false,false,true); parameters.push_back(pcount);
23 CommandParameter pflip("flip", "Boolean", "", "F", "", "", "","",false,false,true); parameters.push_back(pflip);
24 CommandParameter preorient("checkorient", "Boolean", "", "F", "", "", "","",false,false,true); parameters.push_back(preorient);
25 CommandParameter pmaxambig("maxambig", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(pmaxambig);
26 CommandParameter pmaxhomop("maxhomop", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pmaxhomop);
27 CommandParameter pminlength("minlength", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pminlength);
28 CommandParameter pmaxlength("maxlength", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pmaxlength);
29 CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(ppdiffs);
30 CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(pbdiffs);
31 CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pldiffs);
32 CommandParameter psdiffs("sdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(psdiffs);
33 CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ptdiffs);
34 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
35 CommandParameter pallfiles("allfiles", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pallfiles);
36 CommandParameter pkeepforward("keepforward", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pkeepforward);
37 CommandParameter pqtrim("qtrim", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pqtrim);
38 CommandParameter pqthreshold("qthreshold", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pqthreshold);
39 CommandParameter pqaverage("qaverage", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pqaverage);
40 CommandParameter prollaverage("rollaverage", "Number", "", "0", "", "", "","",false,false); parameters.push_back(prollaverage);
41 CommandParameter pqwindowaverage("qwindowaverage", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pqwindowaverage);
42 CommandParameter pqstepsize("qstepsize", "Number", "", "1", "", "", "","",false,false); parameters.push_back(pqstepsize);
43 CommandParameter pqwindowsize("qwindowsize", "Number", "", "50", "", "", "","",false,false); parameters.push_back(pqwindowsize);
44 CommandParameter pkeepfirst("keepfirst", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pkeepfirst);
45 CommandParameter premovelast("removelast", "Number", "", "0", "", "", "","",false,false); parameters.push_back(premovelast);
46 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
47 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
49 vector<string> myArray;
50 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
54 m->errorOut(e, "TrimSeqsCommand", "setParameters");
58 //**********************************************************************************************************************
59 string TrimSeqsCommand::getHelpString(){
61 string helpString = "";
62 helpString += "The trim.seqs command reads a fastaFile and creates 2 new fasta files, .trim.fasta and scrap.fasta, as well as group files if you provide and oligos file.\n";
63 helpString += "The .trim.fasta contains sequences that meet your requirements, and the .scrap.fasta contains those which don't.\n";
64 helpString += "The trim.seqs command parameters are fasta, name, count, flip, checkorient, oligos, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim, keepfirst, removelast and allfiles.\n";
65 helpString += "The fasta parameter is required.\n";
66 helpString += "The flip parameter will output the reverse compliment of your trimmed sequence. The default is false.\n";
67 helpString += "The checkorient parameter will check the reverse compliment of the sequence if the barcodes and primers cannot be found in the forward. The default is false.\n";
68 helpString += "The oligos parameter allows you to provide an oligos file.\n";
69 helpString += "The name parameter allows you to provide a names file with your fasta file.\n";
70 helpString += "The count parameter allows you to provide a count file with your fasta file.\n";
71 helpString += "The maxambig parameter allows you to set the maximum number of ambigious bases allowed. The default is -1.\n";
72 helpString += "The maxhomop parameter allows you to set a maximum homopolymer length. \n";
73 helpString += "The minlength parameter allows you to set and minimum sequence length. \n";
74 helpString += "The maxlength parameter allows you to set and maximum sequence length. \n";
75 helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n";
76 helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";
77 helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
78 helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n";
79 helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n";
80 helpString += "The qfile parameter allows you to provide a quality file.\n";
81 helpString += "The qthreshold parameter allows you to set a minimum quality score allowed. \n";
82 helpString += "The qaverage parameter allows you to set a minimum average quality score allowed. \n";
83 helpString += "The qwindowsize parameter allows you to set a number of bases in a window. Default=50.\n";
84 helpString += "The qwindowaverage parameter allows you to set a minimum average quality score allowed over a window. \n";
85 helpString += "The rollaverage parameter allows you to set a minimum rolling average quality score allowed over a window. \n";
86 helpString += "The qstepsize parameter allows you to set a number of bases to move the window over. Default=1.\n";
87 helpString += "The allfiles parameter will create separate group and fasta file for each grouping. The default is F.\n";
88 helpString += "The keepforward parameter allows you to indicate whether you want the forward primer removed or not. The default is F, meaning remove the forward primer.\n";
89 helpString += "The qtrim parameter will trim sequence from the point that they fall below the qthreshold and put it in the .trim file if set to true. The default is T.\n";
90 helpString += "The keepfirst parameter trims the sequence to the first keepfirst number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements. \n";
91 helpString += "The removelast removes the last removelast number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements.\n";
92 helpString += "The trim.seqs command should be in the following format: \n";
93 helpString += "trim.seqs(fasta=yourFastaFile, flip=yourFlip, oligos=yourOligos, maxambig=yourMaxambig, \n";
94 helpString += "maxhomop=yourMaxhomop, minlength=youMinlength, maxlength=yourMaxlength) \n";
95 helpString += "Example trim.seqs(fasta=abrecovery.fasta, flip=..., oligos=..., maxambig=..., maxhomop=..., minlength=..., maxlength=...).\n";
96 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
97 helpString += "For more details please check out the wiki http://www.mothur.org/wiki/Trim.seqs .\n";
100 catch(exception& e) {
101 m->errorOut(e, "TrimSeqsCommand", "getHelpString");
105 //**********************************************************************************************************************
106 string TrimSeqsCommand::getOutputPattern(string type) {
110 if (type == "qfile") { pattern = "[filename],[tag],qual"; }
111 else if (type == "fasta") { pattern = "[filename],[tag],fasta"; }
112 else if (type == "group") { pattern = "[filename],groups"; }
113 else if (type == "name") { pattern = "[filename],[tag],names"; }
114 else if (type == "count") { pattern = "[filename],[tag],count_table-[filename],count_table"; }
115 else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
119 catch(exception& e) {
120 m->errorOut(e, "TrimSeqsCommand", "getOutputPattern");
124 //**********************************************************************************************************************
126 TrimSeqsCommand::TrimSeqsCommand(){
128 abort = true; calledHelp = true;
130 vector<string> tempOutNames;
131 outputTypes["fasta"] = tempOutNames;
132 outputTypes["qfile"] = tempOutNames;
133 outputTypes["group"] = tempOutNames;
134 outputTypes["name"] = tempOutNames;
135 outputTypes["count"] = tempOutNames;
137 catch(exception& e) {
138 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
142 //***************************************************************************************************************
144 TrimSeqsCommand::TrimSeqsCommand(string option) {
147 abort = false; calledHelp = false;
150 //allow user to run help
151 if(option == "help") { help(); abort = true; calledHelp = true; }
152 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
155 vector<string> myArray = setParameters();
157 OptionParser parser(option);
158 map<string,string> parameters = parser.getParameters();
160 ValidParameters validParameter;
161 map<string,string>::iterator it;
163 //check to make sure all parameters are valid for command
164 for (it = parameters.begin(); it != parameters.end(); it++) {
165 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
168 //initialize outputTypes
169 vector<string> tempOutNames;
170 outputTypes["fasta"] = tempOutNames;
171 outputTypes["qfile"] = tempOutNames;
172 outputTypes["group"] = tempOutNames;
173 outputTypes["name"] = tempOutNames;
174 outputTypes["count"] = tempOutNames;
176 //if the user changes the input directory command factory will send this info to us in the output parameter
177 string inputDir = validParameter.validFile(parameters, "inputdir", false);
178 if (inputDir == "not found"){ inputDir = ""; }
181 it = parameters.find("fasta");
182 //user has given a template file
183 if(it != parameters.end()){
184 path = m->hasPath(it->second);
185 //if the user has not given a path then, add inputdir. else leave path alone.
186 if (path == "") { parameters["fasta"] = inputDir + it->second; }
189 it = parameters.find("oligos");
190 //user has given a template file
191 if(it != parameters.end()){
192 path = m->hasPath(it->second);
193 //if the user has not given a path then, add inputdir. else leave path alone.
194 if (path == "") { parameters["oligos"] = inputDir + it->second; }
197 it = parameters.find("qfile");
198 //user has given a template file
199 if(it != parameters.end()){
200 path = m->hasPath(it->second);
201 //if the user has not given a path then, add inputdir. else leave path alone.
202 if (path == "") { parameters["qfile"] = inputDir + it->second; }
205 it = parameters.find("name");
206 //user has given a template file
207 if(it != parameters.end()){
208 path = m->hasPath(it->second);
209 //if the user has not given a path then, add inputdir. else leave path alone.
210 if (path == "") { parameters["name"] = inputDir + it->second; }
213 it = parameters.find("count");
214 //user has given a template file
215 if(it != parameters.end()){
216 path = m->hasPath(it->second);
217 //if the user has not given a path then, add inputdir. else leave path alone.
218 if (path == "") { parameters["count"] = inputDir + it->second; }
224 //check for required parameters
225 fastaFile = validParameter.validFile(parameters, "fasta", true);
226 if (fastaFile == "not found") {
227 fastaFile = m->getFastaFile();
228 if (fastaFile != "") { m->mothurOut("Using " + fastaFile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
229 else { m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
230 }else if (fastaFile == "not open") { abort = true; }
231 else { m->setFastaFile(fastaFile); }
233 //if the user changes the output directory command factory will send this info to us in the output parameter
234 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
236 outputDir += m->hasPath(fastaFile); //if user entered a file with a path then preserve it
240 //check for optional parameter and set defaults
241 // ...at some point should added some additional type checking...
243 temp = validParameter.validFile(parameters, "flip", false);
244 if (temp == "not found") { flip = 0; }
245 else { flip = m->isTrue(temp); }
247 temp = validParameter.validFile(parameters, "oligos", true);
248 if (temp == "not found"){ oligoFile = ""; }
249 else if(temp == "not open"){ abort = true; }
250 else { oligoFile = temp; m->setOligosFile(oligoFile); }
253 temp = validParameter.validFile(parameters, "maxambig", false); if (temp == "not found") { temp = "-1"; }
254 m->mothurConvert(temp, maxAmbig);
256 temp = validParameter.validFile(parameters, "maxhomop", false); if (temp == "not found") { temp = "0"; }
257 m->mothurConvert(temp, maxHomoP);
259 temp = validParameter.validFile(parameters, "minlength", false); if (temp == "not found") { temp = "0"; }
260 m->mothurConvert(temp, minLength);
262 temp = validParameter.validFile(parameters, "maxlength", false); if (temp == "not found") { temp = "0"; }
263 m->mothurConvert(temp, maxLength);
265 temp = validParameter.validFile(parameters, "bdiffs", false); if (temp == "not found") { temp = "0"; }
266 m->mothurConvert(temp, bdiffs);
268 temp = validParameter.validFile(parameters, "pdiffs", false); if (temp == "not found") { temp = "0"; }
269 m->mothurConvert(temp, pdiffs);
271 temp = validParameter.validFile(parameters, "ldiffs", false); if (temp == "not found") { temp = "0"; }
272 m->mothurConvert(temp, ldiffs);
274 temp = validParameter.validFile(parameters, "sdiffs", false); if (temp == "not found") { temp = "0"; }
275 m->mothurConvert(temp, sdiffs);
277 temp = validParameter.validFile(parameters, "tdiffs", false); if (temp == "not found") { int tempTotal = pdiffs + bdiffs + ldiffs + sdiffs; temp = toString(tempTotal); }
278 m->mothurConvert(temp, tdiffs);
280 if(tdiffs == 0){ tdiffs = bdiffs + pdiffs + ldiffs + sdiffs; }
282 temp = validParameter.validFile(parameters, "qfile", true);
283 if (temp == "not found") { qFileName = ""; }
284 else if(temp == "not open") { abort = true; }
285 else { qFileName = temp; m->setQualFile(qFileName); }
287 temp = validParameter.validFile(parameters, "name", true);
288 if (temp == "not found") { nameFile = ""; }
289 else if(temp == "not open") { nameFile = ""; abort = true; }
290 else { nameFile = temp; m->setNameFile(nameFile); }
292 countfile = validParameter.validFile(parameters, "count", true);
293 if (countfile == "not open") { abort = true; countfile = ""; }
294 else if (countfile == "not found") { countfile = ""; }
295 else { m->setCountTableFile(countfile); }
297 if ((countfile != "") && (nameFile != "")) { m->mothurOut("You must enter ONLY ONE of the following: count or name."); m->mothurOutEndLine(); abort = true; }
299 temp = validParameter.validFile(parameters, "qthreshold", false); if (temp == "not found") { temp = "0"; }
300 m->mothurConvert(temp, qThreshold);
302 temp = validParameter.validFile(parameters, "qtrim", false); if (temp == "not found") { temp = "t"; }
303 qtrim = m->isTrue(temp);
305 temp = validParameter.validFile(parameters, "rollaverage", false); if (temp == "not found") { temp = "0"; }
306 convert(temp, qRollAverage);
308 temp = validParameter.validFile(parameters, "qwindowaverage", false);if (temp == "not found") { temp = "0"; }
309 convert(temp, qWindowAverage);
311 temp = validParameter.validFile(parameters, "qwindowsize", false); if (temp == "not found") { temp = "50"; }
312 convert(temp, qWindowSize);
314 temp = validParameter.validFile(parameters, "qstepsize", false); if (temp == "not found") { temp = "1"; }
315 convert(temp, qWindowStep);
317 temp = validParameter.validFile(parameters, "qaverage", false); if (temp == "not found") { temp = "0"; }
318 convert(temp, qAverage);
320 temp = validParameter.validFile(parameters, "keepfirst", false); if (temp == "not found") { temp = "0"; }
321 convert(temp, keepFirst);
323 temp = validParameter.validFile(parameters, "removelast", false); if (temp == "not found") { temp = "0"; }
324 convert(temp, removeLast);
326 temp = validParameter.validFile(parameters, "allfiles", false); if (temp == "not found") { temp = "F"; }
327 allFiles = m->isTrue(temp);
329 temp = validParameter.validFile(parameters, "keepforward", false); if (temp == "not found") { temp = "F"; }
330 keepforward = m->isTrue(temp);
332 temp = validParameter.validFile(parameters, "checkorient", false); if (temp == "not found") { temp = "F"; }
333 reorient = m->isTrue(temp);
335 temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
336 m->setProcessors(temp);
337 m->mothurConvert(temp, processors);
340 if(allFiles && (oligoFile == "")){
341 m->mothurOut("You selected allfiles, but didn't enter an oligos. Ignoring the allfiles request."); m->mothurOutEndLine();
343 if((qAverage != 0 && qThreshold != 0) && qFileName == ""){
344 m->mothurOut("You didn't provide a quality file name, quality criteria will be ignored."); m->mothurOutEndLine();
348 if(!flip && oligoFile=="" && !maxLength && !minLength && (maxAmbig==-1) && !maxHomoP && qFileName == ""){
349 m->mothurOut("You didn't set any options... quiting command."); m->mothurOutEndLine();
353 if (countfile == "") {
354 if (nameFile == "") {
355 vector<string> files; files.push_back(fastaFile);
356 parser.getNameFile(files);
362 catch(exception& e) {
363 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
367 //***************************************************************************************************************
369 int TrimSeqsCommand::execute(){
372 if (abort == true) { if (calledHelp) { return 0; } return 2; }
374 pairedOligos = false;
375 numFPrimers = 0; //this needs to be initialized
380 vector<vector<string> > fastaFileNames;
381 vector<vector<string> > qualFileNames;
382 vector<vector<string> > nameFileNames;
384 map<string, string> variables;
385 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFile));
386 variables["[tag]"] = "trim";
387 string trimSeqFile = getOutputFileName("fasta",variables);
388 string trimQualFile = getOutputFileName("qfile",variables);
389 outputNames.push_back(trimSeqFile); outputTypes["fasta"].push_back(trimSeqFile);
391 variables["[tag]"] = "scrap";
392 string scrapSeqFile = getOutputFileName("fasta",variables);
393 string scrapQualFile = getOutputFileName("qfile",variables);
394 outputNames.push_back(scrapSeqFile); outputTypes["fasta"].push_back(scrapSeqFile);
396 if (qFileName != "") {
397 outputNames.push_back(trimQualFile);
398 outputNames.push_back(scrapQualFile);
399 outputTypes["qfile"].push_back(trimQualFile);
400 outputTypes["qfile"].push_back(scrapQualFile);
403 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(nameFile));
404 variables["[tag]"] = "trim";
405 string trimNameFile = getOutputFileName("name",variables);
406 variables["[tag]"] = "scrap";
407 string scrapNameFile = getOutputFileName("name",variables);
409 if (nameFile != "") {
410 m->readNames(nameFile, nameMap);
411 outputNames.push_back(trimNameFile);
412 outputNames.push_back(scrapNameFile);
413 outputTypes["name"].push_back(trimNameFile);
414 outputTypes["name"].push_back(scrapNameFile);
417 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(countfile));
418 variables["[tag]"] = "trim";
419 string trimCountFile = getOutputFileName("count",variables);
420 variables["[tag]"] = "scrap";
421 string scrapCountFile = getOutputFileName("count",variables);
423 if (countfile != "") {
425 ct.readTable(countfile, true);
426 nameCount = ct.getNameMap();
427 outputNames.push_back(trimCountFile);
428 outputNames.push_back(scrapCountFile);
429 outputTypes["count"].push_back(trimCountFile);
430 outputTypes["count"].push_back(scrapCountFile);
434 if (m->control_pressed) { return 0; }
436 string outputGroupFileName;
438 createGroup = getOligos(fastaFileNames, qualFileNames, nameFileNames);
439 if ((createGroup) && (countfile == "")){
440 map<string, string> myvariables;
441 myvariables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFile));
442 outputGroupFileName = getOutputFileName("group",myvariables);
443 outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName);
447 if (!pairedOligos) { if (reorient) { m->mothurOut("[WARNING]: You cannot use reorient without paired barcodes or primers, skipping."); m->mothurOutEndLine(); reorient = false; } }
449 if (m->control_pressed) { return 0; }
451 //fills lines and qlines
452 setLines(fastaFile, qFileName);
455 driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, trimCountFile, scrapCountFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
457 createProcessesCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, trimCountFile, scrapCountFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames);
461 if (m->control_pressed) { return 0; }
464 map<string, string> uniqueFastaNames;// so we don't add the same groupfile multiple times
465 map<string, string>::iterator it;
466 set<string> namesToRemove;
467 for(int i=0;i<fastaFileNames.size();i++){
468 for(int j=0;j<fastaFileNames[0].size();j++){
469 if (fastaFileNames[i][j] != "") {
470 if (namesToRemove.count(fastaFileNames[i][j]) == 0) {
471 if(m->isBlank(fastaFileNames[i][j])){
472 m->mothurRemove(fastaFileNames[i][j]);
473 namesToRemove.insert(fastaFileNames[i][j]);
476 m->mothurRemove(qualFileNames[i][j]);
477 namesToRemove.insert(qualFileNames[i][j]);
481 m->mothurRemove(nameFileNames[i][j]);
482 namesToRemove.insert(nameFileNames[i][j]);
485 it = uniqueFastaNames.find(fastaFileNames[i][j]);
486 if (it == uniqueFastaNames.end()) {
487 uniqueFastaNames[fastaFileNames[i][j]] = barcodeNameVector[i];
495 //remove names for outputFileNames, just cleans up the output
496 vector<string> outputNames2;
497 for(int i = 0; i < outputNames.size(); i++) { if (namesToRemove.count(outputNames[i]) == 0) { outputNames2.push_back(outputNames[i]); } }
498 outputNames = outputNames2;
500 for (it = uniqueFastaNames.begin(); it != uniqueFastaNames.end(); it++) {
502 m->openInputFile(it->first, in);
505 map<string, string> myvariables;
506 myvariables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(it->first));
507 string thisGroupName = "";
508 if (countfile == "") { thisGroupName = getOutputFileName("group",myvariables); outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName); }
509 else { thisGroupName = getOutputFileName("count",myvariables); outputNames.push_back(thisGroupName); outputTypes["count"].push_back(thisGroupName); }
510 m->openOutputFile(thisGroupName, out);
512 if (countfile != "") { out << "Representative_Sequence\ttotal\t" << it->second << endl; }
515 if (m->control_pressed) { break; }
517 Sequence currSeq(in); m->gobble(in);
518 if (countfile == "") {
519 out << currSeq.getName() << '\t' << it->second << endl;
521 if (nameFile != "") {
522 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
523 if (itName != nameMap.end()) {
524 vector<string> thisSeqsNames;
525 m->splitAtChar(itName->second, thisSeqsNames, ',');
526 for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self
527 out << thisSeqsNames[k] << '\t' << it->second << endl;
529 }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
532 map<string, int>::iterator itTotalReps = nameCount.find(currSeq.getName());
533 if (itTotalReps != nameCount.end()) { out << currSeq.getName() << '\t' << itTotalReps->second << '\t' << itTotalReps->second << endl; }
534 else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your count file, please correct."); m->mothurOutEndLine(); }
541 if (countfile != "") { //create countfile with group info included
542 CountTable* ct = new CountTable();
543 ct->readTable(trimCountFile, true);
544 map<string, int> justTrimmedNames = ct->getNameMap();
548 for (map<string, int>::iterator itCount = groupCounts.begin(); itCount != groupCounts.end(); itCount++) { newCt.addGroup(itCount->first); }
549 vector<int> tempCounts; tempCounts.resize(groupCounts.size(), 0);
550 for (map<string, int>::iterator itNames = justTrimmedNames.begin(); itNames != justTrimmedNames.end(); itNames++) {
551 newCt.push_back(itNames->first, tempCounts); //add it to the table with no abundance so we can set the groups abundance
552 map<string, string>::iterator it2 = groupMap.find(itNames->first);
553 if (it2 != groupMap.end()) { newCt.setAbund(itNames->first, it2->second, itNames->second); }
554 else { m->mothurOut("[ERROR]: missing group info for " + itNames->first + "."); m->mothurOutEndLine(); m->control_pressed = true; }
556 newCt.printTable(trimCountFile);
560 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
562 //output group counts
563 m->mothurOutEndLine();
565 if (groupCounts.size() != 0) { m->mothurOut("Group count: \n"); }
566 for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
567 total += it->second; m->mothurOut(it->first + "\t" + toString(it->second)); m->mothurOutEndLine();
569 if (total != 0) { m->mothurOut("Total of all groups is " + toString(total)); m->mothurOutEndLine(); }
571 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
573 //set fasta file as new current fastafile
575 itTypes = outputTypes.find("fasta");
576 if (itTypes != outputTypes.end()) {
577 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
580 itTypes = outputTypes.find("name");
581 if (itTypes != outputTypes.end()) {
582 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
585 itTypes = outputTypes.find("qfile");
586 if (itTypes != outputTypes.end()) {
587 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); }
590 itTypes = outputTypes.find("group");
591 if (itTypes != outputTypes.end()) {
592 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
595 itTypes = outputTypes.find("count");
596 if (itTypes != outputTypes.end()) {
597 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
600 m->mothurOutEndLine();
601 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
602 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
603 m->mothurOutEndLine();
608 catch(exception& e) {
609 m->errorOut(e, "TrimSeqsCommand", "execute");
614 /**************************************************************************************/
615 int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string trimFileName, string scrapFileName, string trimQFileName, string scrapQFileName, string trimNFileName, string scrapNFileName, string trimCFileName, string scrapCFileName, string groupFileName, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames, linePair line, linePair qline) {
619 ofstream trimFASTAFile;
620 m->openOutputFile(trimFileName, trimFASTAFile);
622 ofstream scrapFASTAFile;
623 m->openOutputFile(scrapFileName, scrapFASTAFile);
625 ofstream trimQualFile;
626 ofstream scrapQualFile;
628 m->openOutputFile(trimQFileName, trimQualFile);
629 m->openOutputFile(scrapQFileName, scrapQualFile);
632 ofstream trimNameFile;
633 ofstream scrapNameFile;
635 m->openOutputFile(trimNFileName, trimNameFile);
636 m->openOutputFile(scrapNFileName, scrapNameFile);
639 ofstream trimCountFile;
640 ofstream scrapCountFile;
642 m->openOutputFile(trimCFileName, trimCountFile);
643 m->openOutputFile(scrapCFileName, scrapCountFile);
644 if (line.start == 0) { trimCountFile << "Representative_Sequence\ttotal" << endl; scrapCountFile << "Representative_Sequence\ttotal" << endl; }
647 ofstream outGroupsFile;
648 if ((createGroup) && (countfile == "")){ m->openOutputFile(groupFileName, outGroupsFile); }
650 for (int i = 0; i < fastaFileNames.size(); i++) { //clears old file
651 for (int j = 0; j < fastaFileNames[i].size(); j++) { //clears old file
652 if (fastaFileNames[i][j] != "") {
654 m->openOutputFile(fastaFileNames[i][j], temp); temp.close();
656 m->openOutputFile(qualFileNames[i][j], temp); temp.close();
660 m->openOutputFile(nameFileNames[i][j], temp); temp.close();
668 m->openInputFile(filename, inFASTA);
669 inFASTA.seekg(line.start);
672 if(qFileName != "") {
673 m->openInputFile(qFileName, qFile);
674 qFile.seekg(qline.start);
679 int numBarcodes = barcodes.size();
680 TrimOligos* trimOligos = NULL;
681 if (pairedOligos) { trimOligos = new TrimOligos(pdiffs, bdiffs, 0, 0, pairedPrimers, pairedBarcodes); numBarcodes = pairedBarcodes.size(); }
682 else { trimOligos = new TrimOligos(pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, revPrimer, linker, spacer); }
684 TrimOligos* rtrimOligos = NULL;
686 //create reoriented primer and barcode pairs
687 map<int, oligosPair> rpairedPrimers, rpairedBarcodes;
688 for (map<int, oligosPair>::iterator it = pairedPrimers.begin(); it != pairedPrimers.end(); it++) {
689 oligosPair tempPair(reverseOligo((it->second).reverse), (reverseOligo((it->second).forward))); //reversePrimer, rc ForwardPrimer
690 rpairedPrimers[it->first] = tempPair;
691 //cout << reverseOligo((it->second).reverse) << '\t' << (reverseOligo((it->second).forward)) << '\t' << primerNameVector[it->first] << endl;
693 for (map<int, oligosPair>::iterator it = pairedBarcodes.begin(); it != pairedBarcodes.end(); it++) {
694 oligosPair tempPair(reverseOligo((it->second).reverse), (reverseOligo((it->second).forward))); //reverseBarcode, rc ForwardBarcode
695 rpairedBarcodes[it->first] = tempPair;
696 //cout << reverseOligo((it->second).reverse) << '\t' << (reverseOligo((it->second).forward)) << '\t' << barcodeNameVector[it->first] << endl;
698 rtrimOligos = new TrimOligos(pdiffs, bdiffs, 0, 0, rpairedPrimers, rpairedBarcodes); numBarcodes = rpairedBarcodes.size();
703 if (m->control_pressed) {
704 delete trimOligos; if (reorient) { delete rtrimOligos; }
705 inFASTA.close(); trimFASTAFile.close(); scrapFASTAFile.close();
706 if ((createGroup) && (countfile == "")) { outGroupsFile.close(); }
707 if(qFileName != "") { qFile.close(); scrapQualFile.close(); trimQualFile.close(); }
708 if(nameFile != "") { scrapNameFile.close(); trimNameFile.close(); }
709 if(countfile != "") { scrapCountFile.close(); trimCountFile.close(); }
710 for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0;
714 string trashCode = "";
715 int currentSeqsDiffs = 0;
717 Sequence currSeq(inFASTA); m->gobble(inFASTA);
718 //cout << currSeq.getName() << '\t' << currSeq.getUnaligned().length() << endl;
719 Sequence savedSeq(currSeq.getName(), currSeq.getAligned());
721 QualityScores currQual; QualityScores savedQual;
723 currQual = QualityScores(qFile); m->gobble(qFile);
724 savedQual.setName(currQual.getName()); savedQual.setScores(currQual.getScores());
725 //cout << currQual.getName() << endl;
728 string origSeq = currSeq.getUnaligned();
731 int barcodeIndex = 0;
735 success = trimOligos->stripLinker(currSeq, currQual);
736 if(success > ldiffs) { trashCode += 'k'; }
737 else{ currentSeqsDiffs += success; }
741 if(numBarcodes != 0){
742 success = trimOligos->stripBarcode(currSeq, currQual, barcodeIndex);
743 if(success > bdiffs) {
746 else{ currentSeqsDiffs += success; }
750 success = trimOligos->stripSpacer(currSeq, currQual);
751 if(success > sdiffs) { trashCode += 's'; }
752 else{ currentSeqsDiffs += success; }
756 if(numFPrimers != 0){
757 success = trimOligos->stripForward(currSeq, currQual, primerIndex, keepforward);
758 if(success > pdiffs) {
761 else{ currentSeqsDiffs += success; }
764 if (currentSeqsDiffs > tdiffs) { trashCode += 't'; }
766 if(numRPrimers != 0){
767 success = trimOligos->stripReverse(currSeq, currQual);
768 if(!success) { trashCode += 'r'; }
771 if (reorient && (trashCode != "")) { //if you failed and want to check the reverse
773 string thisTrashCode = "";
774 int thisCurrentSeqsDiffs = 0;
776 int thisBarcodeIndex = 0;
777 int thisPrimerIndex = 0;
779 if(numBarcodes != 0){
780 thisSuccess = rtrimOligos->stripBarcode(savedSeq, savedQual, thisBarcodeIndex);
781 if(thisSuccess > bdiffs) { thisTrashCode += "b"; }
782 else{ thisCurrentSeqsDiffs += thisSuccess; }
785 if(numFPrimers != 0){
786 thisSuccess = rtrimOligos->stripForward(savedSeq, savedQual, thisPrimerIndex, keepforward);
787 if(thisSuccess > pdiffs) { thisTrashCode += "f"; }
788 else{ thisCurrentSeqsDiffs += thisSuccess; }
791 if (thisCurrentSeqsDiffs > tdiffs) { thisTrashCode += 't'; }
793 if (thisTrashCode == "") {
794 trashCode = thisTrashCode;
795 success = thisSuccess;
796 currentSeqsDiffs = thisCurrentSeqsDiffs;
797 barcodeIndex = thisBarcodeIndex;
798 primerIndex = thisPrimerIndex;
799 savedSeq.reverseComplement();
800 currSeq.setAligned(savedSeq.getAligned());
802 savedQual.flipQScores();
803 currQual.setScores(savedQual.getScores());
805 }else { trashCode += "(" + thisTrashCode + ")"; }
809 success = keepFirstTrim(currSeq, currQual);
813 success = removeLastTrim(currSeq, currQual);
814 if(!success) { trashCode += 'l'; }
819 int origLength = currSeq.getNumBases();
821 if(qThreshold != 0) { success = currQual.stripQualThreshold(currSeq, qThreshold); }
822 else if(qAverage != 0) { success = currQual.cullQualAverage(currSeq, qAverage); }
823 else if(qRollAverage != 0) { success = currQual.stripQualRollingAverage(currSeq, qRollAverage); }
824 else if(qWindowAverage != 0){ success = currQual.stripQualWindowAverage(currSeq, qWindowStep, qWindowSize, qWindowAverage); }
825 else { success = 1; }
827 //you don't want to trim, if it fails above then scrap it
828 if ((!qtrim) && (origLength != currSeq.getNumBases())) { success = 0; }
830 if(!success) { trashCode += 'q'; }
833 if(minLength > 0 || maxLength > 0){
834 success = cullLength(currSeq);
835 if(!success) { trashCode += 'l'; }
838 success = cullHomoP(currSeq);
839 if(!success) { trashCode += 'h'; }
842 success = cullAmbigs(currSeq);
843 if(!success) { trashCode += 'n'; }
846 if(flip){ // should go last
847 currSeq.reverseComplement();
849 currQual.flipQScores();
853 if (m->debug) { m->mothurOut("[DEBUG]: " + currSeq.getName() + ", trashcode= " + trashCode); if (trashCode.length() != 0) { m->mothurOutEndLine(); } }
855 if(trashCode.length() == 0){
856 string thisGroup = "";
858 if(numBarcodes != 0){
859 thisGroup = barcodeNameVector[barcodeIndex];
860 if (numFPrimers != 0) {
861 if (primerNameVector[primerIndex] != "") {
862 if(thisGroup != "") {
863 thisGroup += "." + primerNameVector[primerIndex];
865 thisGroup = primerNameVector[primerIndex];
872 int pos = thisGroup.find("ignore");
873 if (pos == string::npos) {
874 currSeq.setAligned(currSeq.getUnaligned());
875 currSeq.printSequence(trimFASTAFile);
878 currQual.printQScores(trimQualFile);
883 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
884 if (itName != nameMap.end()) { trimNameFile << itName->first << '\t' << itName->second << endl; }
885 else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
888 int numRedundants = 0;
889 if (countfile != "") {
890 map<string, int>::iterator itCount = nameCount.find(currSeq.getName());
891 if (itCount != nameCount.end()) {
892 trimCountFile << itCount->first << '\t' << itCount->second << endl;
893 numRedundants = itCount->second-1;
894 }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your count file, please correct."); m->mothurOutEndLine(); }
898 if(numBarcodes != 0){
900 if (m->debug) { m->mothurOut(", group= " + thisGroup + "\n"); }
902 if (countfile == "") { outGroupsFile << currSeq.getName() << '\t' << thisGroup << endl; }
903 else { groupMap[currSeq.getName()] = thisGroup; }
905 if (nameFile != "") {
906 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
907 if (itName != nameMap.end()) {
908 vector<string> thisSeqsNames;
909 m->splitAtChar(itName->second, thisSeqsNames, ',');
910 numRedundants = thisSeqsNames.size()-1; //we already include ourselves below
911 for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self
912 outGroupsFile << thisSeqsNames[k] << '\t' << thisGroup << endl;
914 }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
917 map<string, int>::iterator it = groupCounts.find(thisGroup);
918 if (it == groupCounts.end()) { groupCounts[thisGroup] = 1 + numRedundants; }
919 else { groupCounts[it->first] += (1 + numRedundants); }
926 m->openOutputFileAppend(fastaFileNames[barcodeIndex][primerIndex], output);
927 currSeq.printSequence(output);
931 m->openOutputFileAppend(qualFileNames[barcodeIndex][primerIndex], output);
932 currQual.printQScores(output);
937 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
938 if (itName != nameMap.end()) {
939 m->openOutputFileAppend(nameFileNames[barcodeIndex][primerIndex], output);
940 output << itName->first << '\t' << itName->second << endl;
942 }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
948 if(nameFile != ""){ //needs to be before the currSeq name is changed
949 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
950 if (itName != nameMap.end()) { scrapNameFile << itName->first << '\t' << itName->second << endl; }
951 else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
953 if (countfile != "") {
954 map<string, int>::iterator itCount = nameCount.find(currSeq.getName());
955 if (itCount != nameCount.end()) {
956 trimCountFile << itCount->first << '\t' << itCount->second << endl;
957 }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your count file, please correct."); m->mothurOutEndLine(); }
960 currSeq.setName(currSeq.getName() + '|' + trashCode);
961 currSeq.setUnaligned(origSeq);
962 currSeq.setAligned(origSeq);
963 currSeq.printSequence(scrapFASTAFile);
965 currQual.printQScores(scrapQualFile);
971 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
972 unsigned long long pos = inFASTA.tellg();
973 if ((pos == -1) || (pos >= line.end)) { break; }
976 if (inFASTA.eof()) { break; }
980 if((count) % 1000 == 0){ m->mothurOut(toString(count)); m->mothurOutEndLine(); }
984 if((count) % 1000 != 0){ m->mothurOut(toString(count)); m->mothurOutEndLine(); }
987 if (reorient) { delete rtrimOligos; }
989 trimFASTAFile.close();
990 scrapFASTAFile.close();
991 if (createGroup) { outGroupsFile.close(); }
992 if(qFileName != "") { qFile.close(); scrapQualFile.close(); trimQualFile.close(); }
993 if(nameFile != "") { scrapNameFile.close(); trimNameFile.close(); }
994 if(countfile != "") { scrapCountFile.close(); trimCountFile.close(); }
998 catch(exception& e) {
999 m->errorOut(e, "TrimSeqsCommand", "driverCreateTrim");
1004 /**************************************************************************************************/
1006 int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName, string trimFASTAFileName, string scrapFASTAFileName, string trimQualFileName, string scrapQualFileName, string trimNameFileName, string scrapNameFileName, string trimCountFileName, string scrapCountFileName, string groupFile, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames) {
1010 int exitCommand = 1;
1013 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1014 //loop through and create all the processes you want
1015 while (process != processors) {
1019 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
1021 }else if (pid == 0){
1023 vector<vector<string> > tempFASTAFileNames = fastaFileNames;
1024 vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
1025 vector<vector<string> > tempNameFileNames = nameFileNames;
1030 for(int i=0;i<tempFASTAFileNames.size();i++){
1031 for(int j=0;j<tempFASTAFileNames[i].size();j++){
1032 if (tempFASTAFileNames[i][j] != "") {
1033 tempFASTAFileNames[i][j] += toString(getpid()) + ".temp";
1034 m->openOutputFile(tempFASTAFileNames[i][j], temp); temp.close();
1036 if(qFileName != ""){
1037 tempPrimerQualFileNames[i][j] += toString(getpid()) + ".temp";
1038 m->openOutputFile(tempPrimerQualFileNames[i][j], temp); temp.close();
1041 tempNameFileNames[i][j] += toString(getpid()) + ".temp";
1042 m->openOutputFile(tempNameFileNames[i][j], temp); temp.close();
1049 driverCreateTrim(filename,
1051 (trimFASTAFileName + toString(getpid()) + ".temp"),
1052 (scrapFASTAFileName + toString(getpid()) + ".temp"),
1053 (trimQualFileName + toString(getpid()) + ".temp"),
1054 (scrapQualFileName + toString(getpid()) + ".temp"),
1055 (trimNameFileName + toString(getpid()) + ".temp"),
1056 (scrapNameFileName + toString(getpid()) + ".temp"),
1057 (trimCountFileName + toString(getpid()) + ".temp"),
1058 (scrapCountFileName + toString(getpid()) + ".temp"),
1059 (groupFile + toString(getpid()) + ".temp"),
1061 tempPrimerQualFileNames,
1066 if (m->debug) { m->mothurOut("[DEBUG]: " + toString(lines[process].start) + '\t' + toString(qLines[process].start) + '\t' + toString(getpid()) + '\n'); }
1068 //pass groupCounts to parent
1071 string tempFile = filename + toString(getpid()) + ".num.temp";
1072 m->openOutputFile(tempFile, out);
1074 out << groupCounts.size() << endl;
1076 for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
1077 out << it->first << '\t' << it->second << endl;
1080 out << groupMap.size() << endl;
1081 for (map<string, string>::iterator it = groupMap.begin(); it != groupMap.end(); it++) {
1082 out << it->first << '\t' << it->second << endl;
1088 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
1089 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
1096 m->openOutputFile(trimFASTAFileName, temp); temp.close();
1097 m->openOutputFile(scrapFASTAFileName, temp); temp.close();
1098 if(qFileName != ""){
1099 m->openOutputFile(trimQualFileName, temp); temp.close();
1100 m->openOutputFile(scrapQualFileName, temp); temp.close();
1102 if (nameFile != "") {
1103 m->openOutputFile(trimNameFileName, temp); temp.close();
1104 m->openOutputFile(scrapNameFileName, temp); temp.close();
1106 if (countfile != "") {
1107 m->openOutputFile(trimCountFileName, temp); temp.close();
1108 m->openOutputFile(scrapCountFileName, temp); temp.close();
1111 driverCreateTrim(filename, qFileName, trimFASTAFileName, scrapFASTAFileName, trimQualFileName, scrapQualFileName, trimNameFileName, scrapNameFileName, trimCountFileName, scrapCountFileName, groupFile, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
1113 //force parent to wait until all the processes are done
1114 for (int i=0;i<processIDS.size();i++) {
1115 int temp = processIDS[i];
1119 //////////////////////////////////////////////////////////////////////////////////////////////////////
1120 //Windows version shared memory, so be careful when passing variables through the trimData struct.
1121 //Above fork() will clone, so memory is separate, but that's not the case with windows,
1122 //////////////////////////////////////////////////////////////////////////////////////////////////////
1124 vector<trimData*> pDataArray;
1125 DWORD dwThreadIdArray[processors-1];
1126 HANDLE hThreadArray[processors-1];
1128 //Create processor worker threads.
1129 for( int h=0; h<processors-1; h++){
1131 string extension = "";
1132 if (h != 0) { extension = toString(h) + ".temp"; processIDS.push_back(h); }
1133 vector<vector<string> > tempFASTAFileNames = fastaFileNames;
1134 vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
1135 vector<vector<string> > tempNameFileNames = nameFileNames;
1140 for(int i=0;i<tempFASTAFileNames.size();i++){
1141 for(int j=0;j<tempFASTAFileNames[i].size();j++){
1142 if (tempFASTAFileNames[i][j] != "") {
1143 tempFASTAFileNames[i][j] += extension;
1144 m->openOutputFile(tempFASTAFileNames[i][j], temp); temp.close();
1146 if(qFileName != ""){
1147 tempPrimerQualFileNames[i][j] += extension;
1148 m->openOutputFile(tempPrimerQualFileNames[i][j], temp); temp.close();
1151 tempNameFileNames[i][j] += extension;
1152 m->openOutputFile(tempNameFileNames[i][j], temp); temp.close();
1160 trimData* tempTrim = new trimData(filename,
1161 qFileName, nameFile, countfile,
1162 (trimFASTAFileName+extension),
1163 (scrapFASTAFileName+extension),
1164 (trimQualFileName+extension),
1165 (scrapQualFileName+extension),
1166 (trimNameFileName+extension),
1167 (scrapNameFileName+extension),
1168 (trimCountFileName+extension),
1169 (scrapCountFileName+extension),
1170 (groupFile+extension),
1172 tempPrimerQualFileNames,
1174 lines[h].start, lines[h].end, qLines[h].start, qLines[h].end, m,
1175 pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, primers, barcodes, revPrimer, linker, spacer, pairedBarcodes, pairedPrimers, pairedOligos,
1176 primerNameVector, barcodeNameVector, createGroup, allFiles, keepforward, keepFirst, removeLast,
1177 qWindowStep, qWindowSize, qWindowAverage, qtrim, qThreshold, qAverage, qRollAverage,
1178 minLength, maxAmbig, maxHomoP, maxLength, flip, reorient, nameMap, nameCount);
1179 pDataArray.push_back(tempTrim);
1181 hThreadArray[h] = CreateThread(NULL, 0, MyTrimThreadFunction, pDataArray[h], 0, &dwThreadIdArray[h]);
1186 m->openOutputFile(trimFASTAFileName, temp); temp.close();
1187 m->openOutputFile(scrapFASTAFileName, temp); temp.close();
1188 if(qFileName != ""){
1189 m->openOutputFile(trimQualFileName, temp); temp.close();
1190 m->openOutputFile(scrapQualFileName, temp); temp.close();
1192 if (nameFile != "") {
1193 m->openOutputFile(trimNameFileName, temp); temp.close();
1194 m->openOutputFile(scrapNameFileName, temp); temp.close();
1196 vector<vector<string> > tempFASTAFileNames = fastaFileNames;
1197 vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
1198 vector<vector<string> > tempNameFileNames = nameFileNames;
1201 string extension = toString(processors-1) + ".temp";
1202 for(int i=0;i<tempFASTAFileNames.size();i++){
1203 for(int j=0;j<tempFASTAFileNames[i].size();j++){
1204 if (tempFASTAFileNames[i][j] != "") {
1205 tempFASTAFileNames[i][j] += extension;
1206 m->openOutputFile(tempFASTAFileNames[i][j], temp); temp.close();
1208 if(qFileName != ""){
1209 tempPrimerQualFileNames[i][j] += extension;
1210 m->openOutputFile(tempPrimerQualFileNames[i][j], temp); temp.close();
1213 tempNameFileNames[i][j] += extension;
1214 m->openOutputFile(tempNameFileNames[i][j], temp); temp.close();
1221 driverCreateTrim(filename, qFileName, (trimFASTAFileName + toString(processors-1) + ".temp"), (scrapFASTAFileName + toString(processors-1) + ".temp"), (trimQualFileName + toString(processors-1) + ".temp"), (scrapQualFileName + toString(processors-1) + ".temp"), (trimNameFileName + toString(processors-1) + ".temp"), (scrapNameFileName + toString(processors-1) + ".temp"), (trimCountFileName + toString(processors-1) + ".temp"), (scrapCountFileName + toString(processors-1) + ".temp"), (groupFile + toString(processors-1) + ".temp"), tempFASTAFileNames, tempPrimerQualFileNames, tempNameFileNames, lines[processors-1], qLines[processors-1]);
1222 processIDS.push_back(processors-1);
1225 //Wait until all threads have terminated.
1226 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
1228 //Close all thread handles and free memory allocations.
1229 for(int i=0; i < pDataArray.size(); i++){
1230 if (pDataArray[i]->count != pDataArray[i]->lineEnd) {
1231 m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of " + toString(pDataArray[i]->lineEnd) + " sequences assigned to it, quitting. \n"); m->control_pressed = true;
1233 for (map<string, int>::iterator it = pDataArray[i]->groupCounts.begin(); it != pDataArray[i]->groupCounts.end(); it++) {
1234 map<string, int>::iterator it2 = groupCounts.find(it->first);
1235 if (it2 == groupCounts.end()) { groupCounts[it->first] = it->second; }
1236 else { groupCounts[it->first] += it->second; }
1238 for (map<string, string>::iterator it = pDataArray[i]->groupMap.begin(); it != pDataArray[i]->groupMap.end(); it++) {
1239 map<string, string>::iterator it2 = groupMap.find(it->first);
1240 if (it2 == groupMap.end()) { groupMap[it->first] = it->second; }
1241 else { m->mothurOut("[ERROR]: " + it->first + " is in your fasta file more than once. Sequence names must be unique. please correct.\n"); }
1243 CloseHandle(hThreadArray[i]);
1244 delete pDataArray[i];
1251 for(int i=0;i<processIDS.size();i++){
1253 m->mothurOut("Appending files from process " + toString(processIDS[i])); m->mothurOutEndLine();
1255 m->appendFiles((trimFASTAFileName + toString(processIDS[i]) + ".temp"), trimFASTAFileName);
1256 m->mothurRemove((trimFASTAFileName + toString(processIDS[i]) + ".temp"));
1257 m->appendFiles((scrapFASTAFileName + toString(processIDS[i]) + ".temp"), scrapFASTAFileName);
1258 m->mothurRemove((scrapFASTAFileName + toString(processIDS[i]) + ".temp"));
1260 if(qFileName != ""){
1261 m->appendFiles((trimQualFileName + toString(processIDS[i]) + ".temp"), trimQualFileName);
1262 m->mothurRemove((trimQualFileName + toString(processIDS[i]) + ".temp"));
1263 m->appendFiles((scrapQualFileName + toString(processIDS[i]) + ".temp"), scrapQualFileName);
1264 m->mothurRemove((scrapQualFileName + toString(processIDS[i]) + ".temp"));
1268 m->appendFiles((trimNameFileName + toString(processIDS[i]) + ".temp"), trimNameFileName);
1269 m->mothurRemove((trimNameFileName + toString(processIDS[i]) + ".temp"));
1270 m->appendFiles((scrapNameFileName + toString(processIDS[i]) + ".temp"), scrapNameFileName);
1271 m->mothurRemove((scrapNameFileName + toString(processIDS[i]) + ".temp"));
1274 if(countfile != ""){
1275 m->appendFiles((trimCountFileName + toString(processIDS[i]) + ".temp"), trimCountFileName);
1276 m->mothurRemove((trimCountFileName + toString(processIDS[i]) + ".temp"));
1277 m->appendFiles((scrapCountFileName + toString(processIDS[i]) + ".temp"), scrapCountFileName);
1278 m->mothurRemove((scrapCountFileName + toString(processIDS[i]) + ".temp"));
1281 if((createGroup)&&(countfile == "")){
1282 m->appendFiles((groupFile + toString(processIDS[i]) + ".temp"), groupFile);
1283 m->mothurRemove((groupFile + toString(processIDS[i]) + ".temp"));
1288 for(int j=0;j<fastaFileNames.size();j++){
1289 for(int k=0;k<fastaFileNames[j].size();k++){
1290 if (fastaFileNames[j][k] != "") {
1291 m->appendFiles((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"), fastaFileNames[j][k]);
1292 m->mothurRemove((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"));
1294 if(qFileName != ""){
1295 m->appendFiles((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"), qualFileNames[j][k]);
1296 m->mothurRemove((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"));
1300 m->appendFiles((nameFileNames[j][k] + toString(processIDS[i]) + ".temp"), nameFileNames[j][k]);
1301 m->mothurRemove((nameFileNames[j][k] + toString(processIDS[i]) + ".temp"));
1308 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1311 string tempFile = filename + toString(processIDS[i]) + ".num.temp";
1312 m->openInputFile(tempFile, in);
1316 in >> tempNum; m->gobble(in);
1319 for (int i = 0; i < tempNum; i++) {
1321 in >> group >> groupNum; m->gobble(in);
1323 map<string, int>::iterator it = groupCounts.find(group);
1324 if (it == groupCounts.end()) { groupCounts[group] = groupNum; }
1325 else { groupCounts[it->first] += groupNum; }
1328 in >> tempNum; m->gobble(in);
1330 for (int i = 0; i < tempNum; i++) {
1331 string group, seqName;
1332 in >> seqName >> group; m->gobble(in);
1334 map<string, string>::iterator it = groupMap.find(seqName);
1335 if (it == groupMap.end()) { groupMap[seqName] = group; }
1336 else { m->mothurOut("[ERROR]: " + seqName + " is in your fasta file more than once. Sequence names must be unique. please correct.\n"); }
1340 in.close(); m->mothurRemove(tempFile);
1347 catch(exception& e) {
1348 m->errorOut(e, "TrimSeqsCommand", "createProcessesCreateTrim");
1353 /**************************************************************************************************/
1355 int TrimSeqsCommand::setLines(string filename, string qfilename) {
1358 vector<unsigned long long> fastaFilePos;
1359 vector<unsigned long long> qfileFilePos;
1361 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1362 //set file positions for fasta file
1363 fastaFilePos = m->divideFile(filename, processors);
1365 //get name of first sequence in each chunk
1366 map<string, int> firstSeqNames;
1367 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
1369 m->openInputFile(filename, in);
1370 in.seekg(fastaFilePos[i]);
1373 firstSeqNames[temp.getName()] = i;
1378 if(qfilename != "") {
1379 //seach for filePos of each first name in the qfile and save in qfileFilePos
1381 m->openInputFile(qfilename, inQual);
1384 while(!inQual.eof()){
1385 input = m->getline(inQual);
1387 if (input.length() != 0) {
1388 if(input[0] == '>'){ //this is a sequence name line
1389 istringstream nameStream(input);
1391 string sname = ""; nameStream >> sname;
1392 sname = sname.substr(1);
1394 m->checkName(sname);
1396 map<string, int>::iterator it = firstSeqNames.find(sname);
1398 if(it != firstSeqNames.end()) { //this is the start of a new chunk
1399 unsigned long long pos = inQual.tellg();
1400 qfileFilePos.push_back(pos - input.length() - 1);
1401 firstSeqNames.erase(it);
1406 if (firstSeqNames.size() == 0) { break; }
1411 if (firstSeqNames.size() != 0) {
1412 for (map<string, int>::iterator it = firstSeqNames.begin(); it != firstSeqNames.end(); it++) {
1413 m->mothurOut(it->first + " is in your fasta file and not in your quality file, not using quality file."); m->mothurOutEndLine();
1419 //get last file position of qfile
1421 unsigned long long size;
1423 //get num bytes in file
1424 pFile = fopen (qfilename.c_str(),"rb");
1425 if (pFile==NULL) perror ("Error opening file");
1427 fseek (pFile, 0, SEEK_END);
1432 qfileFilePos.push_back(size);
1435 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
1436 if (m->debug) { m->mothurOut("[DEBUG]: " + toString(i) +'\t' + toString(fastaFilePos[i]) + '\t' + toString(fastaFilePos[i+1]) + '\n'); }
1437 lines.push_back(linePair(fastaFilePos[i], fastaFilePos[(i+1)]));
1438 if (qfilename != "") { qLines.push_back(linePair(qfileFilePos[i], qfileFilePos[(i+1)])); }
1440 if(qfilename == "") { qLines = lines; } //files with duds
1446 if (processors == 1) { //save time
1447 //fastaFilePos.push_back(0); qfileFilePos.push_back(0);
1448 //fastaFilePos.push_back(1000); qfileFilePos.push_back(1000);
1449 lines.push_back(linePair(0, 1000));
1450 if (qfilename != "") { qLines.push_back(linePair(0, 1000)); }
1452 int numFastaSeqs = 0;
1453 fastaFilePos = m->setFilePosFasta(filename, numFastaSeqs);
1454 if (fastaFilePos.size() < processors) { processors = fastaFilePos.size(); }
1456 if (qfilename != "") {
1457 int numQualSeqs = 0;
1458 qfileFilePos = m->setFilePosFasta(qfilename, numQualSeqs);
1460 if (numFastaSeqs != numQualSeqs) {
1461 m->mothurOut("[ERROR]: You have " + toString(numFastaSeqs) + " sequences in your fasta file, but " + toString(numQualSeqs) + " sequences in your quality file."); m->mothurOutEndLine(); m->control_pressed = true;
1465 //figure out how many sequences you have to process
1466 int numSeqsPerProcessor = numFastaSeqs / processors;
1467 for (int i = 0; i < processors; i++) {
1468 int startIndex = i * numSeqsPerProcessor;
1469 if(i == (processors - 1)){ numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor; }
1470 lines.push_back(linePair(fastaFilePos[startIndex], numSeqsPerProcessor));
1471 if (qfilename != "") { qLines.push_back(linePair(qfileFilePos[startIndex], numSeqsPerProcessor)); }
1474 if(qfilename == "") { qLines = lines; } //files with duds
1479 catch(exception& e) {
1480 m->errorOut(e, "TrimSeqsCommand", "setLines");
1485 //***************************************************************************************************************
1487 bool TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<vector<string> >& qualFileNames, vector<vector<string> >& nameFileNames){
1490 m->openInputFile(oligoFile, inOligos);
1494 string type, oligo, roligo, group;
1495 bool hasPrimer = false; bool hasPairedBarcodes = false;
1497 int indexPrimer = 0;
1498 int indexBarcode = 0;
1499 int indexPairedPrimer = 0;
1500 int indexPairedBarcode = 0;
1501 set<string> uniquePrimers;
1502 set<string> uniqueBarcodes;
1504 while(!inOligos.eof()){
1508 if (m->debug) { m->mothurOut("[DEBUG]: reading type - " + type + ".\n"); }
1511 while (!inOligos.eof()) { char c = inOligos.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there
1512 m->gobble(inOligos);
1515 m->gobble(inOligos);
1516 //make type case insensitive
1517 for(int i=0;i<type.length();i++){ type[i] = toupper(type[i]); }
1521 if (m->debug) { m->mothurOut("[DEBUG]: reading - " + oligo + ".\n"); }
1523 for(int i=0;i<oligo.length();i++){
1524 oligo[i] = toupper(oligo[i]);
1525 if(oligo[i] == 'U') { oligo[i] = 'T'; }
1528 if(type == "FORWARD"){
1531 // get rest of line in case there is a primer name
1532 while (!inOligos.eof()) {
1533 char c = inOligos.get();
1534 if (c == 10 || c == 13 || c == -1){ break; }
1535 else if (c == 32 || c == 9){;} //space or tab
1536 else { group += c; }
1539 //check for repeat barcodes
1540 map<string, int>::iterator itPrime = primers.find(oligo);
1541 if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); }
1543 if (m->debug) { if (group != "") { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); }else{ m->mothurOut("[DEBUG]: no group for primer " + oligo + ".\n"); } }
1545 primers[oligo]=indexPrimer; indexPrimer++;
1546 primerNameVector.push_back(group);
1548 else if (type == "PRIMER"){
1549 m->gobble(inOligos);
1553 for(int i=0;i<roligo.length();i++){
1554 roligo[i] = toupper(roligo[i]);
1555 if(roligo[i] == 'U') { roligo[i] = 'T'; }
1557 roligo = reverseOligo(roligo);
1561 // get rest of line in case there is a primer name
1562 while (!inOligos.eof()) {
1563 char c = inOligos.get();
1564 if (c == 10 || c == 13 || c == -1){ break; }
1565 else if (c == 32 || c == 9){;} //space or tab
1566 else { group += c; }
1569 oligosPair newPrimer(oligo, roligo);
1571 if (m->debug) { m->mothurOut("[DEBUG]: primer pair " + newPrimer.forward + " " + newPrimer.reverse + ", and group = " + group + ".\n"); }
1573 //check for repeat barcodes
1574 string tempPair = oligo+roligo;
1575 if (uniquePrimers.count(tempPair) != 0) { m->mothurOut("primer pair " + newPrimer.forward + " " + newPrimer.reverse + " is in your oligos file already."); m->mothurOutEndLine(); }
1576 else { uniquePrimers.insert(tempPair); }
1578 if (m->debug) { if (group != "") { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); }else{ m->mothurOut("[DEBUG]: no group for primer pair " + newPrimer.forward + " " + newPrimer.reverse + ".\n"); } }
1580 pairedPrimers[indexPairedPrimer]=newPrimer; indexPairedPrimer++;
1581 primerNameVector.push_back(group);
1584 else if(type == "REVERSE"){
1585 //Sequence oligoRC("reverse", oligo);
1586 //oligoRC.reverseComplement();
1587 string oligoRC = reverseOligo(oligo);
1588 revPrimer.push_back(oligoRC);
1590 else if(type == "BARCODE"){
1593 //barcode lines can look like BARCODE atgcatgc groupName - for 454 seqs
1594 //or BARCODE atgcatgc atgcatgc groupName - for illumina data that has forward and reverse info
1597 while (!inOligos.eof()) {
1598 char c = inOligos.get();
1599 if (c == 10 || c == 13 || c == -1){ break; }
1600 else if (c == 32 || c == 9){;} //space or tab
1604 //then this is illumina data with 4 columns
1606 hasPairedBarcodes = true;
1607 string reverseBarcode = group; //reverseOligo(group); //reverse barcode
1610 for(int i=0;i<reverseBarcode.length();i++){
1611 reverseBarcode[i] = toupper(reverseBarcode[i]);
1612 if(reverseBarcode[i] == 'U') { reverseBarcode[i] = 'T'; }
1615 reverseBarcode = reverseOligo(reverseBarcode);
1616 oligosPair newPair(oligo, reverseBarcode);
1618 if (m->debug) { m->mothurOut("[DEBUG]: barcode pair " + newPair.forward + " " + newPair.reverse + ", and group = " + group + ".\n"); }
1620 //check for repeat barcodes
1621 string tempPair = oligo+reverseBarcode;
1622 if (uniqueBarcodes.count(tempPair) != 0) { m->mothurOut("barcode pair " + newPair.forward + " " + newPair.reverse + " is in your oligos file already, disregarding."); m->mothurOutEndLine(); }
1623 else { uniqueBarcodes.insert(tempPair); }
1625 pairedBarcodes[indexPairedBarcode]=newPair; indexPairedBarcode++;
1626 barcodeNameVector.push_back(group);
1628 //check for repeat barcodes
1629 map<string, int>::iterator itBar = barcodes.find(oligo);
1630 if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); }
1632 barcodes[oligo]=indexBarcode; indexBarcode++;
1633 barcodeNameVector.push_back(group);
1635 }else if(type == "LINKER"){
1636 linker.push_back(oligo);
1637 }else if(type == "SPACER"){
1638 spacer.push_back(oligo);
1640 else{ m->mothurOut("[WARNING]: " + type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); }
1642 m->gobble(inOligos);
1646 if (hasPairedBarcodes || hasPrimer) {
1647 pairedOligos = true;
1648 if ((primers.size() != 0) || (barcodes.size() != 0) || (linker.size() != 0) || (spacer.size() != 0) || (revPrimer.size() != 0)) { m->control_pressed = true; m->mothurOut("[ERROR]: cannot mix paired primers and barcodes with non paired or linkers and spacers, quitting."); m->mothurOutEndLine(); return 0; }
1649 }else if (reorient) { m->mothurOut("[Warning]: cannot use checkorient without paired barcodes or primers, ignoring.\n"); m->mothurOutEndLine(); reorient = false; }
1651 if(barcodeNameVector.size() == 0 && primerNameVector[0] == ""){ allFiles = 0; }
1653 //add in potential combos
1654 if(barcodeNameVector.size() == 0){
1656 barcodeNameVector.push_back("");
1659 if(primerNameVector.size() == 0){
1661 primerNameVector.push_back("");
1664 fastaFileNames.resize(barcodeNameVector.size());
1665 for(int i=0;i<fastaFileNames.size();i++){
1666 fastaFileNames[i].assign(primerNameVector.size(), "");
1668 if(qFileName != "") { qualFileNames = fastaFileNames; }
1669 if(nameFile != "") { nameFileNames = fastaFileNames; }
1672 set<string> uniqueNames; //used to cleanup outputFileNames
1674 for(map<int, oligosPair>::iterator itBar = pairedBarcodes.begin();itBar != pairedBarcodes.end();itBar++){
1675 for(map<int, oligosPair>::iterator itPrimer = pairedPrimers.begin();itPrimer != pairedPrimers.end(); itPrimer++){
1677 string primerName = primerNameVector[itPrimer->first];
1678 string barcodeName = barcodeNameVector[itBar->first];
1680 if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing
1682 string comboGroupName = "";
1683 string fastaFileName = "";
1684 string qualFileName = "";
1685 string nameFileName = "";
1686 string countFileName = "";
1688 if(primerName == ""){
1689 comboGroupName = barcodeNameVector[itBar->first];
1692 if(barcodeName == ""){
1693 comboGroupName = primerNameVector[itPrimer->first];
1696 comboGroupName = barcodeNameVector[itBar->first] + "." + primerNameVector[itPrimer->first];
1702 map<string, string> variables;
1703 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFile));
1704 variables["[tag]"] = comboGroupName;
1705 fastaFileName = getOutputFileName("fasta", variables);
1706 if (uniqueNames.count(fastaFileName) == 0) {
1707 outputNames.push_back(fastaFileName);
1708 outputTypes["fasta"].push_back(fastaFileName);
1709 uniqueNames.insert(fastaFileName);
1712 fastaFileNames[itBar->first][itPrimer->first] = fastaFileName;
1713 m->openOutputFile(fastaFileName, temp); temp.close();
1715 if(qFileName != ""){
1716 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(qFileName));
1717 qualFileName = getOutputFileName("qfile", variables);
1718 if (uniqueNames.count(qualFileName) == 0) {
1719 outputNames.push_back(qualFileName);
1720 outputTypes["qfile"].push_back(qualFileName);
1723 qualFileNames[itBar->first][itPrimer->first] = qualFileName;
1724 m->openOutputFile(qualFileName, temp); temp.close();
1728 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(nameFile));
1729 nameFileName = getOutputFileName("name", variables);
1730 if (uniqueNames.count(nameFileName) == 0) {
1731 outputNames.push_back(nameFileName);
1732 outputTypes["name"].push_back(nameFileName);
1735 nameFileNames[itBar->first][itPrimer->first] = nameFileName;
1736 m->openOutputFile(nameFileName, temp); temp.close();
1742 for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
1743 for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
1745 string primerName = primerNameVector[itPrimer->second];
1746 string barcodeName = barcodeNameVector[itBar->second];
1748 if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing
1750 string comboGroupName = "";
1751 string fastaFileName = "";
1752 string qualFileName = "";
1753 string nameFileName = "";
1754 string countFileName = "";
1756 if(primerName == ""){
1757 comboGroupName = barcodeNameVector[itBar->second];
1760 if(barcodeName == ""){
1761 comboGroupName = primerNameVector[itPrimer->second];
1764 comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second];
1770 map<string, string> variables;
1771 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFile));
1772 variables["[tag]"] = comboGroupName;
1773 fastaFileName = getOutputFileName("fasta", variables);
1774 if (uniqueNames.count(fastaFileName) == 0) {
1775 outputNames.push_back(fastaFileName);
1776 outputTypes["fasta"].push_back(fastaFileName);
1777 uniqueNames.insert(fastaFileName);
1780 fastaFileNames[itBar->second][itPrimer->second] = fastaFileName;
1781 m->openOutputFile(fastaFileName, temp); temp.close();
1783 if(qFileName != ""){
1784 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(qFileName));
1785 qualFileName = getOutputFileName("qfile", variables);
1786 if (uniqueNames.count(qualFileName) == 0) {
1787 outputNames.push_back(qualFileName);
1788 outputTypes["qfile"].push_back(qualFileName);
1791 qualFileNames[itBar->second][itPrimer->second] = qualFileName;
1792 m->openOutputFile(qualFileName, temp); temp.close();
1796 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(nameFile));
1797 nameFileName = getOutputFileName("name", variables);
1798 if (uniqueNames.count(nameFileName) == 0) {
1799 outputNames.push_back(nameFileName);
1800 outputTypes["name"].push_back(nameFileName);
1803 nameFileNames[itBar->second][itPrimer->second] = nameFileName;
1804 m->openOutputFile(nameFileName, temp); temp.close();
1811 numFPrimers = primers.size();
1812 if (pairedOligos) { numFPrimers = pairedPrimers.size(); }
1813 numRPrimers = revPrimer.size();
1814 numLinkers = linker.size();
1815 numSpacers = spacer.size();
1817 bool allBlank = true;
1818 for (int i = 0; i < barcodeNameVector.size(); i++) {
1819 if (barcodeNameVector[i] != "") {
1824 for (int i = 0; i < primerNameVector.size(); i++) {
1825 if (primerNameVector[i] != "") {
1832 m->mothurOut("[WARNING]: your oligos file does not contain any group names. mothur will not create a groupfile."); m->mothurOutEndLine();
1840 catch(exception& e) {
1841 m->errorOut(e, "TrimSeqsCommand", "getOligos");
1845 //***************************************************************************************************************
1847 bool TrimSeqsCommand::keepFirstTrim(Sequence& sequence, QualityScores& qscores){
1850 if(qscores.getName() != ""){
1851 qscores.trimQScores(-1, keepFirst);
1854 // sequence.printSequence(cout);cout << endl;
1856 sequence.trim(keepFirst);
1858 // sequence.printSequence(cout);cout << endl << endl;;
1862 catch(exception& e) {
1863 m->errorOut(e, "keepFirstTrim", "countDiffs");
1869 //***************************************************************************************************************
1871 bool TrimSeqsCommand::removeLastTrim(Sequence& sequence, QualityScores& qscores){
1875 int length = sequence.getNumBases() - removeLast;
1878 if(qscores.getName() != ""){
1879 qscores.trimQScores(-1, length);
1881 sequence.trim(length);
1890 catch(exception& e) {
1891 m->errorOut(e, "removeLastTrim", "countDiffs");
1897 //***************************************************************************************************************
1899 bool TrimSeqsCommand::cullLength(Sequence& seq){
1902 int length = seq.getNumBases();
1903 bool success = 0; //guilty until proven innocent
1905 if(length >= minLength && maxLength == 0) { success = 1; }
1906 else if(length >= minLength && length <= maxLength) { success = 1; }
1907 else { success = 0; }
1912 catch(exception& e) {
1913 m->errorOut(e, "TrimSeqsCommand", "cullLength");
1919 //***************************************************************************************************************
1921 bool TrimSeqsCommand::cullHomoP(Sequence& seq){
1923 int longHomoP = seq.getLongHomoPolymer();
1924 bool success = 0; //guilty until proven innocent
1926 if(longHomoP <= maxHomoP){ success = 1; }
1927 else { success = 0; }
1931 catch(exception& e) {
1932 m->errorOut(e, "TrimSeqsCommand", "cullHomoP");
1937 //********************************************************************/
1938 string TrimSeqsCommand::reverseOligo(string oligo){
1940 string reverse = "";
1942 for(int i=oligo.length()-1;i>=0;i--){
1944 if(oligo[i] == 'A') { reverse += 'T'; }
1945 else if(oligo[i] == 'T'){ reverse += 'A'; }
1946 else if(oligo[i] == 'U'){ reverse += 'A'; }
1948 else if(oligo[i] == 'G'){ reverse += 'C'; }
1949 else if(oligo[i] == 'C'){ reverse += 'G'; }
1951 else if(oligo[i] == 'R'){ reverse += 'Y'; }
1952 else if(oligo[i] == 'Y'){ reverse += 'R'; }
1954 else if(oligo[i] == 'M'){ reverse += 'K'; }
1955 else if(oligo[i] == 'K'){ reverse += 'M'; }
1957 else if(oligo[i] == 'W'){ reverse += 'W'; }
1958 else if(oligo[i] == 'S'){ reverse += 'S'; }
1960 else if(oligo[i] == 'B'){ reverse += 'V'; }
1961 else if(oligo[i] == 'V'){ reverse += 'B'; }
1963 else if(oligo[i] == 'D'){ reverse += 'H'; }
1964 else if(oligo[i] == 'H'){ reverse += 'D'; }
1966 else { reverse += 'N'; }
1972 catch(exception& e) {
1973 m->errorOut(e, "TrimSeqsCommand", "reverseOligo");
1978 //***************************************************************************************************************
1980 bool TrimSeqsCommand::cullAmbigs(Sequence& seq){
1982 int numNs = seq.getAmbigBases();
1983 bool success = 0; //guilty until proven innocent
1985 if(numNs <= maxAmbig) { success = 1; }
1986 else { success = 0; }
1990 catch(exception& e) {
1991 m->errorOut(e, "TrimSeqsCommand", "cullAmbigs");
1996 //***************************************************************************************************************