5 * Created by Pat Schloss on 6/6/09.
6 * Copyright 2009 Patrick D. Schloss. All rights reserved.
10 #include "trimseqscommand.h"
11 #include "needlemanoverlap.hpp"
12 #include "trimoligos.h"
15 //**********************************************************************************************************************
16 vector<string> TrimSeqsCommand::setParameters(){
18 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","fasta",false,true,true); parameters.push_back(pfasta);
19 CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none","group",false,false,true); parameters.push_back(poligos);
20 CommandParameter pqfile("qfile", "InputTypes", "", "", "none", "none", "none","qfile",false,false,true); parameters.push_back(pqfile);
21 CommandParameter pname("name", "InputTypes", "", "", "namecount", "none", "none","name",false,false,true); parameters.push_back(pname);
22 CommandParameter pcount("count", "InputTypes", "", "", "namecount", "none", "none","count",false,false,true); parameters.push_back(pcount);
23 CommandParameter pflip("flip", "Boolean", "", "F", "", "", "","",false,false,true); parameters.push_back(pflip);
24 CommandParameter preorient("checkorient", "Boolean", "", "F", "", "", "","",false,false,true); parameters.push_back(preorient);
25 CommandParameter pmaxambig("maxambig", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(pmaxambig);
26 CommandParameter pmaxhomop("maxhomop", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pmaxhomop);
27 CommandParameter pminlength("minlength", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pminlength);
28 CommandParameter pmaxlength("maxlength", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pmaxlength);
29 CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(ppdiffs);
30 CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(pbdiffs);
31 CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pldiffs);
32 CommandParameter psdiffs("sdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(psdiffs);
33 CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ptdiffs);
34 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
35 CommandParameter pallfiles("allfiles", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pallfiles);
36 CommandParameter pkeepforward("keepforward", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pkeepforward);
37 CommandParameter pqtrim("qtrim", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pqtrim);
38 CommandParameter pqthreshold("qthreshold", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pqthreshold);
39 CommandParameter pqaverage("qaverage", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pqaverage);
40 CommandParameter prollaverage("rollaverage", "Number", "", "0", "", "", "","",false,false); parameters.push_back(prollaverage);
41 CommandParameter pqwindowaverage("qwindowaverage", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pqwindowaverage);
42 CommandParameter pqstepsize("qstepsize", "Number", "", "1", "", "", "","",false,false); parameters.push_back(pqstepsize);
43 CommandParameter pqwindowsize("qwindowsize", "Number", "", "50", "", "", "","",false,false); parameters.push_back(pqwindowsize);
44 CommandParameter pkeepfirst("keepfirst", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pkeepfirst);
45 CommandParameter premovelast("removelast", "Number", "", "0", "", "", "","",false,false); parameters.push_back(premovelast);
46 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
47 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
49 vector<string> myArray;
50 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
54 m->errorOut(e, "TrimSeqsCommand", "setParameters");
58 //**********************************************************************************************************************
59 string TrimSeqsCommand::getHelpString(){
61 string helpString = "";
62 helpString += "The trim.seqs command reads a fastaFile and creates 2 new fasta files, .trim.fasta and scrap.fasta, as well as group files if you provide and oligos file.\n";
63 helpString += "The .trim.fasta contains sequences that meet your requirements, and the .scrap.fasta contains those which don't.\n";
64 helpString += "The trim.seqs command parameters are fasta, name, count, flip, checkorient, oligos, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim, keepfirst, removelast and allfiles.\n";
65 helpString += "The fasta parameter is required.\n";
66 helpString += "The flip parameter will output the reverse compliment of your trimmed sequence. The default is false.\n";
67 helpString += "The checkorient parameter will check the reverse compliment of the sequence if the barcodes and primers cannot be found in the forward. The default is false.\n";
68 helpString += "The oligos parameter allows you to provide an oligos file.\n";
69 helpString += "The name parameter allows you to provide a names file with your fasta file.\n";
70 helpString += "The count parameter allows you to provide a count file with your fasta file.\n";
71 helpString += "The maxambig parameter allows you to set the maximum number of ambigious bases allowed. The default is -1.\n";
72 helpString += "The maxhomop parameter allows you to set a maximum homopolymer length. \n";
73 helpString += "The minlength parameter allows you to set and minimum sequence length. \n";
74 helpString += "The maxlength parameter allows you to set and maximum sequence length. \n";
75 helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n";
76 helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";
77 helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
78 helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n";
79 helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n";
80 helpString += "The qfile parameter allows you to provide a quality file.\n";
81 helpString += "The qthreshold parameter allows you to set a minimum quality score allowed. \n";
82 helpString += "The qaverage parameter allows you to set a minimum average quality score allowed. \n";
83 helpString += "The qwindowsize parameter allows you to set a number of bases in a window. Default=50.\n";
84 helpString += "The qwindowaverage parameter allows you to set a minimum average quality score allowed over a window. \n";
85 helpString += "The rollaverage parameter allows you to set a minimum rolling average quality score allowed over a window. \n";
86 helpString += "The qstepsize parameter allows you to set a number of bases to move the window over. Default=1.\n";
87 helpString += "The allfiles parameter will create separate group and fasta file for each grouping. The default is F.\n";
88 helpString += "The keepforward parameter allows you to indicate whether you want the forward primer removed or not. The default is F, meaning remove the forward primer.\n";
89 helpString += "The qtrim parameter will trim sequence from the point that they fall below the qthreshold and put it in the .trim file if set to true. The default is T.\n";
90 helpString += "The keepfirst parameter trims the sequence to the first keepfirst number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements. \n";
91 helpString += "The removelast removes the last removelast number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements.\n";
92 helpString += "The trim.seqs command should be in the following format: \n";
93 helpString += "trim.seqs(fasta=yourFastaFile, flip=yourFlip, oligos=yourOligos, maxambig=yourMaxambig, \n";
94 helpString += "maxhomop=yourMaxhomop, minlength=youMinlength, maxlength=yourMaxlength) \n";
95 helpString += "Example trim.seqs(fasta=abrecovery.fasta, flip=..., oligos=..., maxambig=..., maxhomop=..., minlength=..., maxlength=...).\n";
96 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
97 helpString += "For more details please check out the wiki http://www.mothur.org/wiki/Trim.seqs .\n";
100 catch(exception& e) {
101 m->errorOut(e, "TrimSeqsCommand", "getHelpString");
105 //**********************************************************************************************************************
106 string TrimSeqsCommand::getOutputPattern(string type) {
110 if (type == "qfile") { pattern = "[filename],[tag],qual"; }
111 else if (type == "fasta") { pattern = "[filename],[tag],fasta"; }
112 else if (type == "group") { pattern = "[filename],groups"; }
113 else if (type == "name") { pattern = "[filename],[tag],names"; }
114 else if (type == "count") { pattern = "[filename],[tag],count_table-[filename],count_table"; }
115 else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
119 catch(exception& e) {
120 m->errorOut(e, "TrimSeqsCommand", "getOutputPattern");
124 //**********************************************************************************************************************
126 TrimSeqsCommand::TrimSeqsCommand(){
128 abort = true; calledHelp = true;
130 vector<string> tempOutNames;
131 outputTypes["fasta"] = tempOutNames;
132 outputTypes["qfile"] = tempOutNames;
133 outputTypes["group"] = tempOutNames;
134 outputTypes["name"] = tempOutNames;
135 outputTypes["count"] = tempOutNames;
137 catch(exception& e) {
138 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
142 //***************************************************************************************************************
144 TrimSeqsCommand::TrimSeqsCommand(string option) {
147 abort = false; calledHelp = false;
150 //allow user to run help
151 if(option == "help") { help(); abort = true; calledHelp = true; }
152 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
155 vector<string> myArray = setParameters();
157 OptionParser parser(option);
158 map<string,string> parameters = parser.getParameters();
160 ValidParameters validParameter;
161 map<string,string>::iterator it;
163 //check to make sure all parameters are valid for command
164 for (it = parameters.begin(); it != parameters.end(); it++) {
165 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
168 //initialize outputTypes
169 vector<string> tempOutNames;
170 outputTypes["fasta"] = tempOutNames;
171 outputTypes["qfile"] = tempOutNames;
172 outputTypes["group"] = tempOutNames;
173 outputTypes["name"] = tempOutNames;
174 outputTypes["count"] = tempOutNames;
176 //if the user changes the input directory command factory will send this info to us in the output parameter
177 string inputDir = validParameter.validFile(parameters, "inputdir", false);
178 if (inputDir == "not found"){ inputDir = ""; }
181 it = parameters.find("fasta");
182 //user has given a template file
183 if(it != parameters.end()){
184 path = m->hasPath(it->second);
185 //if the user has not given a path then, add inputdir. else leave path alone.
186 if (path == "") { parameters["fasta"] = inputDir + it->second; }
189 it = parameters.find("oligos");
190 //user has given a template file
191 if(it != parameters.end()){
192 path = m->hasPath(it->second);
193 //if the user has not given a path then, add inputdir. else leave path alone.
194 if (path == "") { parameters["oligos"] = inputDir + it->second; }
197 it = parameters.find("qfile");
198 //user has given a template file
199 if(it != parameters.end()){
200 path = m->hasPath(it->second);
201 //if the user has not given a path then, add inputdir. else leave path alone.
202 if (path == "") { parameters["qfile"] = inputDir + it->second; }
205 it = parameters.find("name");
206 //user has given a template file
207 if(it != parameters.end()){
208 path = m->hasPath(it->second);
209 //if the user has not given a path then, add inputdir. else leave path alone.
210 if (path == "") { parameters["name"] = inputDir + it->second; }
213 it = parameters.find("count");
214 //user has given a template file
215 if(it != parameters.end()){
216 path = m->hasPath(it->second);
217 //if the user has not given a path then, add inputdir. else leave path alone.
218 if (path == "") { parameters["count"] = inputDir + it->second; }
224 //check for required parameters
225 fastaFile = validParameter.validFile(parameters, "fasta", true);
226 if (fastaFile == "not found") {
227 fastaFile = m->getFastaFile();
228 if (fastaFile != "") { m->mothurOut("Using " + fastaFile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
229 else { m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
230 }else if (fastaFile == "not open") { abort = true; }
231 else { m->setFastaFile(fastaFile); }
233 //if the user changes the output directory command factory will send this info to us in the output parameter
234 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
236 outputDir += m->hasPath(fastaFile); //if user entered a file with a path then preserve it
240 //check for optional parameter and set defaults
241 // ...at some point should added some additional type checking...
243 temp = validParameter.validFile(parameters, "flip", false);
244 if (temp == "not found") { flip = 0; }
245 else { flip = m->isTrue(temp); }
247 temp = validParameter.validFile(parameters, "oligos", true);
248 if (temp == "not found"){ oligoFile = ""; }
249 else if(temp == "not open"){ abort = true; }
250 else { oligoFile = temp; m->setOligosFile(oligoFile); }
253 temp = validParameter.validFile(parameters, "maxambig", false); if (temp == "not found") { temp = "-1"; }
254 m->mothurConvert(temp, maxAmbig);
256 temp = validParameter.validFile(parameters, "maxhomop", false); if (temp == "not found") { temp = "0"; }
257 m->mothurConvert(temp, maxHomoP);
259 temp = validParameter.validFile(parameters, "minlength", false); if (temp == "not found") { temp = "0"; }
260 m->mothurConvert(temp, minLength);
262 temp = validParameter.validFile(parameters, "maxlength", false); if (temp == "not found") { temp = "0"; }
263 m->mothurConvert(temp, maxLength);
265 temp = validParameter.validFile(parameters, "bdiffs", false); if (temp == "not found") { temp = "0"; }
266 m->mothurConvert(temp, bdiffs);
268 temp = validParameter.validFile(parameters, "pdiffs", false); if (temp == "not found") { temp = "0"; }
269 m->mothurConvert(temp, pdiffs);
271 temp = validParameter.validFile(parameters, "ldiffs", false); if (temp == "not found") { temp = "0"; }
272 m->mothurConvert(temp, ldiffs);
274 temp = validParameter.validFile(parameters, "sdiffs", false); if (temp == "not found") { temp = "0"; }
275 m->mothurConvert(temp, sdiffs);
277 temp = validParameter.validFile(parameters, "tdiffs", false); if (temp == "not found") { int tempTotal = pdiffs + bdiffs + ldiffs + sdiffs; temp = toString(tempTotal); }
278 m->mothurConvert(temp, tdiffs);
280 if(tdiffs == 0){ tdiffs = bdiffs + pdiffs + ldiffs + sdiffs; }
282 temp = validParameter.validFile(parameters, "qfile", true);
283 if (temp == "not found") { qFileName = ""; }
284 else if(temp == "not open") { abort = true; }
285 else { qFileName = temp; m->setQualFile(qFileName); }
287 temp = validParameter.validFile(parameters, "name", true);
288 if (temp == "not found") { nameFile = ""; }
289 else if(temp == "not open") { nameFile = ""; abort = true; }
290 else { nameFile = temp; m->setNameFile(nameFile); }
292 countfile = validParameter.validFile(parameters, "count", true);
293 if (countfile == "not open") { abort = true; countfile = ""; }
294 else if (countfile == "not found") { countfile = ""; }
295 else { m->setCountTableFile(countfile); }
297 if ((countfile != "") && (nameFile != "")) { m->mothurOut("You must enter ONLY ONE of the following: count or name."); m->mothurOutEndLine(); abort = true; }
299 temp = validParameter.validFile(parameters, "qthreshold", false); if (temp == "not found") { temp = "0"; }
300 m->mothurConvert(temp, qThreshold);
302 temp = validParameter.validFile(parameters, "qtrim", false); if (temp == "not found") { temp = "t"; }
303 qtrim = m->isTrue(temp);
305 temp = validParameter.validFile(parameters, "rollaverage", false); if (temp == "not found") { temp = "0"; }
306 convert(temp, qRollAverage);
308 temp = validParameter.validFile(parameters, "qwindowaverage", false);if (temp == "not found") { temp = "0"; }
309 convert(temp, qWindowAverage);
311 temp = validParameter.validFile(parameters, "qwindowsize", false); if (temp == "not found") { temp = "50"; }
312 convert(temp, qWindowSize);
314 temp = validParameter.validFile(parameters, "qstepsize", false); if (temp == "not found") { temp = "1"; }
315 convert(temp, qWindowStep);
317 temp = validParameter.validFile(parameters, "qaverage", false); if (temp == "not found") { temp = "0"; }
318 convert(temp, qAverage);
320 temp = validParameter.validFile(parameters, "keepfirst", false); if (temp == "not found") { temp = "0"; }
321 convert(temp, keepFirst);
323 temp = validParameter.validFile(parameters, "removelast", false); if (temp == "not found") { temp = "0"; }
324 convert(temp, removeLast);
326 temp = validParameter.validFile(parameters, "allfiles", false); if (temp == "not found") { temp = "F"; }
327 allFiles = m->isTrue(temp);
329 temp = validParameter.validFile(parameters, "keepforward", false); if (temp == "not found") { temp = "F"; }
330 keepforward = m->isTrue(temp);
332 temp = validParameter.validFile(parameters, "checkorient", false); if (temp == "not found") { temp = "F"; }
333 reorient = m->isTrue(temp);
335 temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
336 m->setProcessors(temp);
337 m->mothurConvert(temp, processors);
340 if(allFiles && (oligoFile == "")){
341 m->mothurOut("You selected allfiles, but didn't enter an oligos. Ignoring the allfiles request."); m->mothurOutEndLine();
343 if((qAverage != 0 && qThreshold != 0) && qFileName == ""){
344 m->mothurOut("You didn't provide a quality file name, quality criteria will be ignored."); m->mothurOutEndLine();
348 if(!flip && oligoFile=="" && !maxLength && !minLength && (maxAmbig==-1) && !maxHomoP && qFileName == ""){
349 m->mothurOut("You didn't set any options... quiting command."); m->mothurOutEndLine();
353 if (countfile == "") {
354 if (nameFile == "") {
355 vector<string> files; files.push_back(fastaFile);
356 parser.getNameFile(files);
362 catch(exception& e) {
363 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
367 //***************************************************************************************************************
369 int TrimSeqsCommand::execute(){
372 if (abort == true) { if (calledHelp) { return 0; } return 2; }
374 pairedOligos = false;
375 numFPrimers = 0; //this needs to be initialized
380 vector<vector<string> > fastaFileNames;
381 vector<vector<string> > qualFileNames;
382 vector<vector<string> > nameFileNames;
384 map<string, string> variables;
385 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFile));
386 variables["[tag]"] = "trim";
387 string trimSeqFile = getOutputFileName("fasta",variables);
388 string trimQualFile = getOutputFileName("qfile",variables);
389 outputNames.push_back(trimSeqFile); outputTypes["fasta"].push_back(trimSeqFile);
391 variables["[tag]"] = "scrap";
392 string scrapSeqFile = getOutputFileName("fasta",variables);
393 string scrapQualFile = getOutputFileName("qfile",variables);
394 outputNames.push_back(scrapSeqFile); outputTypes["fasta"].push_back(scrapSeqFile);
396 if (qFileName != "") {
397 outputNames.push_back(trimQualFile);
398 outputNames.push_back(scrapQualFile);
399 outputTypes["qfile"].push_back(trimQualFile);
400 outputTypes["qfile"].push_back(scrapQualFile);
403 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(nameFile));
404 variables["[tag]"] = "trim";
405 string trimNameFile = getOutputFileName("name",variables);
406 variables["[tag]"] = "scrap";
407 string scrapNameFile = getOutputFileName("name",variables);
409 if (nameFile != "") {
410 m->readNames(nameFile, nameMap);
411 outputNames.push_back(trimNameFile);
412 outputNames.push_back(scrapNameFile);
413 outputTypes["name"].push_back(trimNameFile);
414 outputTypes["name"].push_back(scrapNameFile);
417 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(countfile));
418 variables["[tag]"] = "trim";
419 string trimCountFile = getOutputFileName("count",variables);
420 variables["[tag]"] = "scrap";
421 string scrapCountFile = getOutputFileName("count",variables);
423 if (countfile != "") {
425 ct.readTable(countfile);
426 nameCount = ct.getNameMap();
427 outputNames.push_back(trimCountFile);
428 outputNames.push_back(scrapCountFile);
429 outputTypes["count"].push_back(trimCountFile);
430 outputTypes["count"].push_back(scrapCountFile);
434 if (m->control_pressed) { return 0; }
436 string outputGroupFileName;
438 createGroup = getOligos(fastaFileNames, qualFileNames, nameFileNames);
439 if ((createGroup) && (countfile == "")){
440 map<string, string> myvariables;
441 myvariables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFile));
442 outputGroupFileName = getOutputFileName("group",myvariables);
443 outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName);
447 if (m->control_pressed) { return 0; }
449 //fills lines and qlines
450 setLines(fastaFile, qFileName);
453 driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, trimCountFile, scrapCountFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
455 createProcessesCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, trimCountFile, scrapCountFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames);
459 if (m->control_pressed) { return 0; }
462 map<string, string> uniqueFastaNames;// so we don't add the same groupfile multiple times
463 map<string, string>::iterator it;
464 set<string> namesToRemove;
465 for(int i=0;i<fastaFileNames.size();i++){
466 for(int j=0;j<fastaFileNames[0].size();j++){
467 if (fastaFileNames[i][j] != "") {
468 if (namesToRemove.count(fastaFileNames[i][j]) == 0) {
469 if(m->isBlank(fastaFileNames[i][j])){
470 m->mothurRemove(fastaFileNames[i][j]);
471 namesToRemove.insert(fastaFileNames[i][j]);
474 m->mothurRemove(qualFileNames[i][j]);
475 namesToRemove.insert(qualFileNames[i][j]);
479 m->mothurRemove(nameFileNames[i][j]);
480 namesToRemove.insert(nameFileNames[i][j]);
483 it = uniqueFastaNames.find(fastaFileNames[i][j]);
484 if (it == uniqueFastaNames.end()) {
485 uniqueFastaNames[fastaFileNames[i][j]] = barcodeNameVector[i];
493 //remove names for outputFileNames, just cleans up the output
494 vector<string> outputNames2;
495 for(int i = 0; i < outputNames.size(); i++) { if (namesToRemove.count(outputNames[i]) == 0) { outputNames2.push_back(outputNames[i]); } }
496 outputNames = outputNames2;
498 for (it = uniqueFastaNames.begin(); it != uniqueFastaNames.end(); it++) {
500 m->openInputFile(it->first, in);
503 map<string, string> myvariables;
504 myvariables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(it->first));
505 string thisGroupName = "";
506 if (countfile == "") { thisGroupName = getOutputFileName("group",myvariables); outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName); }
507 else { thisGroupName = getOutputFileName("count",myvariables); outputNames.push_back(thisGroupName); outputTypes["count"].push_back(thisGroupName); }
508 m->openOutputFile(thisGroupName, out);
510 if (countfile != "") { out << "Representative_Sequence\ttotal\t" << it->second << endl; }
513 if (m->control_pressed) { break; }
515 Sequence currSeq(in); m->gobble(in);
516 if (countfile == "") {
517 out << currSeq.getName() << '\t' << it->second << endl;
519 if (nameFile != "") {
520 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
521 if (itName != nameMap.end()) {
522 vector<string> thisSeqsNames;
523 m->splitAtChar(itName->second, thisSeqsNames, ',');
524 for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self
525 out << thisSeqsNames[k] << '\t' << it->second << endl;
527 }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
530 map<string, int>::iterator itTotalReps = nameCount.find(currSeq.getName());
531 if (itTotalReps != nameCount.end()) { out << currSeq.getName() << '\t' << itTotalReps->second << '\t' << itTotalReps->second << endl; }
532 else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your count file, please correct."); m->mothurOutEndLine(); }
539 if (countfile != "") { //create countfile with group info included
540 CountTable* ct = new CountTable();
541 ct->readTable(trimCountFile);
542 map<string, int> justTrimmedNames = ct->getNameMap();
546 for (map<string, int>::iterator itCount = groupCounts.begin(); itCount != groupCounts.end(); itCount++) { newCt.addGroup(itCount->first); }
547 vector<int> tempCounts; tempCounts.resize(groupCounts.size(), 0);
548 for (map<string, int>::iterator itNames = justTrimmedNames.begin(); itNames != justTrimmedNames.end(); itNames++) {
549 newCt.push_back(itNames->first, tempCounts); //add it to the table with no abundance so we can set the groups abundance
550 map<string, string>::iterator it2 = groupMap.find(itNames->first);
551 if (it2 != groupMap.end()) { newCt.setAbund(itNames->first, it2->second, itNames->second); }
552 else { m->mothurOut("[ERROR]: missing group info for " + itNames->first + "."); m->mothurOutEndLine(); m->control_pressed = true; }
554 newCt.printTable(trimCountFile);
558 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
560 //output group counts
561 m->mothurOutEndLine();
563 if (groupCounts.size() != 0) { m->mothurOut("Group count: \n"); }
564 for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
565 total += it->second; m->mothurOut(it->first + "\t" + toString(it->second)); m->mothurOutEndLine();
567 if (total != 0) { m->mothurOut("Total of all groups is " + toString(total)); m->mothurOutEndLine(); }
569 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
571 //set fasta file as new current fastafile
573 itTypes = outputTypes.find("fasta");
574 if (itTypes != outputTypes.end()) {
575 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
578 itTypes = outputTypes.find("name");
579 if (itTypes != outputTypes.end()) {
580 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
583 itTypes = outputTypes.find("qfile");
584 if (itTypes != outputTypes.end()) {
585 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); }
588 itTypes = outputTypes.find("group");
589 if (itTypes != outputTypes.end()) {
590 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
593 itTypes = outputTypes.find("count");
594 if (itTypes != outputTypes.end()) {
595 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
598 m->mothurOutEndLine();
599 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
600 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
601 m->mothurOutEndLine();
606 catch(exception& e) {
607 m->errorOut(e, "TrimSeqsCommand", "execute");
612 /**************************************************************************************/
613 int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string trimFileName, string scrapFileName, string trimQFileName, string scrapQFileName, string trimNFileName, string scrapNFileName, string trimCFileName, string scrapCFileName, string groupFileName, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames, linePair line, linePair qline) {
617 ofstream trimFASTAFile;
618 m->openOutputFile(trimFileName, trimFASTAFile);
620 ofstream scrapFASTAFile;
621 m->openOutputFile(scrapFileName, scrapFASTAFile);
623 ofstream trimQualFile;
624 ofstream scrapQualFile;
626 m->openOutputFile(trimQFileName, trimQualFile);
627 m->openOutputFile(scrapQFileName, scrapQualFile);
630 ofstream trimNameFile;
631 ofstream scrapNameFile;
633 m->openOutputFile(trimNFileName, trimNameFile);
634 m->openOutputFile(scrapNFileName, scrapNameFile);
637 ofstream trimCountFile;
638 ofstream scrapCountFile;
640 m->openOutputFile(trimCFileName, trimCountFile);
641 m->openOutputFile(scrapCFileName, scrapCountFile);
642 if (line.start == 0) { trimCountFile << "Representative_Sequence\ttotal" << endl; scrapCountFile << "Representative_Sequence\ttotal" << endl; }
645 ofstream outGroupsFile;
646 if ((createGroup) && (countfile == "")){ m->openOutputFile(groupFileName, outGroupsFile); }
648 for (int i = 0; i < fastaFileNames.size(); i++) { //clears old file
649 for (int j = 0; j < fastaFileNames[i].size(); j++) { //clears old file
650 if (fastaFileNames[i][j] != "") {
652 m->openOutputFile(fastaFileNames[i][j], temp); temp.close();
654 m->openOutputFile(qualFileNames[i][j], temp); temp.close();
658 m->openOutputFile(nameFileNames[i][j], temp); temp.close();
666 m->openInputFile(filename, inFASTA);
667 inFASTA.seekg(line.start);
670 if(qFileName != "") {
671 m->openInputFile(qFileName, qFile);
672 qFile.seekg(qline.start);
677 int numBarcodes = barcodes.size();
678 TrimOligos* trimOligos = NULL;
679 if (pairedOligos) { trimOligos = new TrimOligos(pdiffs, bdiffs, 0, 0, pairedPrimers, pairedBarcodes); numBarcodes = pairedBarcodes.size(); }
680 else { trimOligos = new TrimOligos(pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, revPrimer, linker, spacer); }
682 TrimOligos* rtrimOligos = NULL;
684 //create reoriented primer and barcode pairs
685 map<int, oligosPair> rpairedPrimers, rpairedBarcodes;
686 for (map<int, oligosPair>::iterator it = pairedPrimers.begin(); it != pairedPrimers.end(); it++) {
687 oligosPair tempPair(reverseOligo((it->second).reverse), (reverseOligo((it->second).forward))); //reversePrimer, rc ForwardPrimer
688 rpairedPrimers[it->first] = tempPair;
689 //cout << reverseOligo((it->second).reverse) << '\t' << (reverseOligo((it->second).forward)) << '\t' << primerNameVector[it->first] << endl;
691 for (map<int, oligosPair>::iterator it = pairedBarcodes.begin(); it != pairedBarcodes.end(); it++) {
692 oligosPair tempPair(reverseOligo((it->second).reverse), (reverseOligo((it->second).forward))); //reverseBarcode, rc ForwardBarcode
693 rpairedBarcodes[it->first] = tempPair;
694 //cout << reverseOligo((it->second).reverse) << '\t' << (reverseOligo((it->second).forward)) << '\t' << barcodeNameVector[it->first] << endl;
696 rtrimOligos = new TrimOligos(pdiffs, bdiffs, 0, 0, rpairedPrimers, rpairedBarcodes); numBarcodes = rpairedBarcodes.size();
701 if (m->control_pressed) {
702 delete trimOligos; if (reorient) { delete rtrimOligos; }
703 inFASTA.close(); trimFASTAFile.close(); scrapFASTAFile.close();
704 if ((createGroup) && (countfile == "")) { outGroupsFile.close(); }
705 if(qFileName != "") { qFile.close(); scrapQualFile.close(); trimQualFile.close(); }
706 if(nameFile != "") { scrapNameFile.close(); trimNameFile.close(); }
707 if(countfile != "") { scrapCountFile.close(); trimCountFile.close(); }
708 for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0;
712 string trashCode = "";
713 int currentSeqsDiffs = 0;
715 Sequence currSeq(inFASTA); m->gobble(inFASTA);
716 //cout << currSeq.getName() << '\t' << currSeq.getUnaligned().length() << endl;
717 Sequence savedSeq(currSeq.getName(), currSeq.getAligned());
719 QualityScores currQual; QualityScores savedQual;
721 currQual = QualityScores(qFile); m->gobble(qFile);
722 savedQual.setName(currQual.getName()); savedQual.setScores(currQual.getScores());
723 //cout << currQual.getName() << endl;
726 string origSeq = currSeq.getUnaligned();
729 int barcodeIndex = 0;
733 success = trimOligos->stripLinker(currSeq, currQual);
734 if(success > ldiffs) { trashCode += 'k'; }
735 else{ currentSeqsDiffs += success; }
739 if(numBarcodes != 0){
740 success = trimOligos->stripBarcode(currSeq, currQual, barcodeIndex);
741 if(success > bdiffs) { trashCode += 'b'; }
742 else{ currentSeqsDiffs += success; }
746 success = trimOligos->stripSpacer(currSeq, currQual);
747 if(success > sdiffs) { trashCode += 's'; }
748 else{ currentSeqsDiffs += success; }
752 if(numFPrimers != 0){
753 success = trimOligos->stripForward(currSeq, currQual, primerIndex, keepforward);
754 if(success > pdiffs) {
755 //if (pairedOligos) { trashCode += trimOligos->getTrashCode(); }
756 //else { trashCode += 'f'; }
759 else{ currentSeqsDiffs += success; }
762 if (currentSeqsDiffs > tdiffs) { trashCode += 't'; }
764 if(numRPrimers != 0){
765 success = trimOligos->stripReverse(currSeq, currQual);
766 if(!success) { trashCode += 'r'; }
769 if (reorient && (trashCode != "")) { //if you failed and want to check the reverse
771 string thisTrashCode = "";
772 int thisCurrentSeqsDiffs = 0;
774 int thisBarcodeIndex = 0;
775 int thisPrimerIndex = 0;
777 if(numBarcodes != 0){
778 thisSuccess = rtrimOligos->stripBarcode(savedSeq, savedQual, thisBarcodeIndex);
779 if(thisSuccess > bdiffs) { thisTrashCode += 'b'; }
780 else{ thisCurrentSeqsDiffs += thisSuccess; }
783 if(numFPrimers != 0){
784 thisSuccess = rtrimOligos->stripForward(savedSeq, savedQual, thisPrimerIndex, keepforward);
785 if(thisSuccess > pdiffs) {
786 //if (pairedOligos) { thisTrashCode += rtrimOligos->getTrashCode(); }
787 //else { thisTrashCode += 'f'; }
788 thisTrashCode += 'f';
790 else{ thisCurrentSeqsDiffs += thisSuccess; }
793 if (thisCurrentSeqsDiffs > tdiffs) { thisTrashCode += 't'; }
795 if (thisTrashCode == "") {
796 trashCode = thisTrashCode;
797 success = thisSuccess;
798 currentSeqsDiffs = thisCurrentSeqsDiffs;
799 barcodeIndex = thisBarcodeIndex;
800 primerIndex = thisPrimerIndex;
801 savedSeq.reverseComplement();
802 currSeq.setAligned(savedSeq.getAligned());
804 savedQual.flipQScores();
805 currQual.setScores(savedQual.getScores());
811 success = keepFirstTrim(currSeq, currQual);
815 success = removeLastTrim(currSeq, currQual);
816 if(!success) { trashCode += 'l'; }
821 int origLength = currSeq.getNumBases();
823 if(qThreshold != 0) { success = currQual.stripQualThreshold(currSeq, qThreshold); }
824 else if(qAverage != 0) { success = currQual.cullQualAverage(currSeq, qAverage); }
825 else if(qRollAverage != 0) { success = currQual.stripQualRollingAverage(currSeq, qRollAverage); }
826 else if(qWindowAverage != 0){ success = currQual.stripQualWindowAverage(currSeq, qWindowStep, qWindowSize, qWindowAverage); }
827 else { success = 1; }
829 //you don't want to trim, if it fails above then scrap it
830 if ((!qtrim) && (origLength != currSeq.getNumBases())) { success = 0; }
832 if(!success) { trashCode += 'q'; }
835 if(minLength > 0 || maxLength > 0){
836 success = cullLength(currSeq);
837 if(!success) { trashCode += 'l'; }
840 success = cullHomoP(currSeq);
841 if(!success) { trashCode += 'h'; }
844 success = cullAmbigs(currSeq);
845 if(!success) { trashCode += 'n'; }
848 if(flip){ // should go last
849 currSeq.reverseComplement();
851 currQual.flipQScores();
855 if (m->debug) { m->mothurOut("[DEBUG]: " + currSeq.getName() + ", trashcode= " + trashCode); if (trashCode.length() != 0) { m->mothurOutEndLine(); } }
857 if(trashCode.length() == 0){
858 string thisGroup = "";
860 if(numBarcodes != 0){
861 thisGroup = barcodeNameVector[barcodeIndex];
862 if (numFPrimers != 0) {
863 if (primerNameVector[primerIndex] != "") {
864 if(thisGroup != "") {
865 thisGroup += "." + primerNameVector[primerIndex];
867 thisGroup = primerNameVector[primerIndex];
874 int pos = thisGroup.find("ignore");
875 if (pos == string::npos) {
876 currSeq.setAligned(currSeq.getUnaligned());
877 currSeq.printSequence(trimFASTAFile);
880 currQual.printQScores(trimQualFile);
885 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
886 if (itName != nameMap.end()) { trimNameFile << itName->first << '\t' << itName->second << endl; }
887 else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
890 int numRedundants = 0;
891 if (countfile != "") {
892 map<string, int>::iterator itCount = nameCount.find(currSeq.getName());
893 if (itCount != nameCount.end()) {
894 trimCountFile << itCount->first << '\t' << itCount->second << endl;
895 numRedundants = itCount->second-1;
896 }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your count file, please correct."); m->mothurOutEndLine(); }
900 if(numBarcodes != 0){
902 if (m->debug) { m->mothurOut(", group= " + thisGroup + "\n"); }
904 if (countfile == "") { outGroupsFile << currSeq.getName() << '\t' << thisGroup << endl; }
905 else { groupMap[currSeq.getName()] = thisGroup; }
907 if (nameFile != "") {
908 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
909 if (itName != nameMap.end()) {
910 vector<string> thisSeqsNames;
911 m->splitAtChar(itName->second, thisSeqsNames, ',');
912 numRedundants = thisSeqsNames.size()-1; //we already include ourselves below
913 for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self
914 outGroupsFile << thisSeqsNames[k] << '\t' << thisGroup << endl;
916 }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
919 map<string, int>::iterator it = groupCounts.find(thisGroup);
920 if (it == groupCounts.end()) { groupCounts[thisGroup] = 1 + numRedundants; }
921 else { groupCounts[it->first] += (1 + numRedundants); }
928 m->openOutputFileAppend(fastaFileNames[barcodeIndex][primerIndex], output);
929 currSeq.printSequence(output);
933 m->openOutputFileAppend(qualFileNames[barcodeIndex][primerIndex], output);
934 currQual.printQScores(output);
939 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
940 if (itName != nameMap.end()) {
941 m->openOutputFileAppend(nameFileNames[barcodeIndex][primerIndex], output);
942 output << itName->first << '\t' << itName->second << endl;
944 }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
950 if(nameFile != ""){ //needs to be before the currSeq name is changed
951 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
952 if (itName != nameMap.end()) { scrapNameFile << itName->first << '\t' << itName->second << endl; }
953 else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
955 if (countfile != "") {
956 map<string, int>::iterator itCount = nameCount.find(currSeq.getName());
957 if (itCount != nameCount.end()) {
958 trimCountFile << itCount->first << '\t' << itCount->second << endl;
959 }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your count file, please correct."); m->mothurOutEndLine(); }
962 currSeq.setName(currSeq.getName() + '|' + trashCode);
963 currSeq.setUnaligned(origSeq);
964 currSeq.setAligned(origSeq);
965 currSeq.printSequence(scrapFASTAFile);
967 currQual.printQScores(scrapQualFile);
973 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
974 unsigned long long pos = inFASTA.tellg();
975 if ((pos == -1) || (pos >= line.end)) { break; }
978 if (inFASTA.eof()) { break; }
982 if((count) % 1000 == 0){ m->mothurOut(toString(count)); m->mothurOutEndLine(); }
986 if((count) % 1000 != 0){ m->mothurOut(toString(count)); m->mothurOutEndLine(); }
989 if (reorient) { delete rtrimOligos; }
991 trimFASTAFile.close();
992 scrapFASTAFile.close();
993 if (createGroup) { outGroupsFile.close(); }
994 if(qFileName != "") { qFile.close(); scrapQualFile.close(); trimQualFile.close(); }
995 if(nameFile != "") { scrapNameFile.close(); trimNameFile.close(); }
996 if(countfile != "") { scrapCountFile.close(); trimCountFile.close(); }
1000 catch(exception& e) {
1001 m->errorOut(e, "TrimSeqsCommand", "driverCreateTrim");
1006 /**************************************************************************************************/
1008 int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName, string trimFASTAFileName, string scrapFASTAFileName, string trimQualFileName, string scrapQualFileName, string trimNameFileName, string scrapNameFileName, string trimCountFileName, string scrapCountFileName, string groupFile, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames) {
1012 int exitCommand = 1;
1015 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1016 //loop through and create all the processes you want
1017 while (process != processors) {
1021 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
1023 }else if (pid == 0){
1025 vector<vector<string> > tempFASTAFileNames = fastaFileNames;
1026 vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
1027 vector<vector<string> > tempNameFileNames = nameFileNames;
1032 for(int i=0;i<tempFASTAFileNames.size();i++){
1033 for(int j=0;j<tempFASTAFileNames[i].size();j++){
1034 if (tempFASTAFileNames[i][j] != "") {
1035 tempFASTAFileNames[i][j] += toString(getpid()) + ".temp";
1036 m->openOutputFile(tempFASTAFileNames[i][j], temp); temp.close();
1038 if(qFileName != ""){
1039 tempPrimerQualFileNames[i][j] += toString(getpid()) + ".temp";
1040 m->openOutputFile(tempPrimerQualFileNames[i][j], temp); temp.close();
1043 tempNameFileNames[i][j] += toString(getpid()) + ".temp";
1044 m->openOutputFile(tempNameFileNames[i][j], temp); temp.close();
1051 driverCreateTrim(filename,
1053 (trimFASTAFileName + toString(getpid()) + ".temp"),
1054 (scrapFASTAFileName + toString(getpid()) + ".temp"),
1055 (trimQualFileName + toString(getpid()) + ".temp"),
1056 (scrapQualFileName + toString(getpid()) + ".temp"),
1057 (trimNameFileName + toString(getpid()) + ".temp"),
1058 (scrapNameFileName + toString(getpid()) + ".temp"),
1059 (trimCountFileName + toString(getpid()) + ".temp"),
1060 (scrapCountFileName + toString(getpid()) + ".temp"),
1061 (groupFile + toString(getpid()) + ".temp"),
1063 tempPrimerQualFileNames,
1068 if (m->debug) { m->mothurOut("[DEBUG]: " + toString(lines[process].start) + '\t' + toString(qLines[process].start) + '\t' + toString(getpid()) + '\n'); }
1070 //pass groupCounts to parent
1073 string tempFile = filename + toString(getpid()) + ".num.temp";
1074 m->openOutputFile(tempFile, out);
1076 out << groupCounts.size() << endl;
1078 for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
1079 out << it->first << '\t' << it->second << endl;
1082 out << groupMap.size() << endl;
1083 for (map<string, string>::iterator it = groupMap.begin(); it != groupMap.end(); it++) {
1084 out << it->first << '\t' << it->second << endl;
1090 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
1091 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
1098 m->openOutputFile(trimFASTAFileName, temp); temp.close();
1099 m->openOutputFile(scrapFASTAFileName, temp); temp.close();
1100 if(qFileName != ""){
1101 m->openOutputFile(trimQualFileName, temp); temp.close();
1102 m->openOutputFile(scrapQualFileName, temp); temp.close();
1104 if (nameFile != "") {
1105 m->openOutputFile(trimNameFileName, temp); temp.close();
1106 m->openOutputFile(scrapNameFileName, temp); temp.close();
1108 if (countfile != "") {
1109 m->openOutputFile(trimCountFileName, temp); temp.close();
1110 m->openOutputFile(scrapCountFileName, temp); temp.close();
1113 driverCreateTrim(filename, qFileName, trimFASTAFileName, scrapFASTAFileName, trimQualFileName, scrapQualFileName, trimNameFileName, scrapNameFileName, trimCountFileName, scrapCountFileName, groupFile, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
1115 //force parent to wait until all the processes are done
1116 for (int i=0;i<processIDS.size();i++) {
1117 int temp = processIDS[i];
1121 //////////////////////////////////////////////////////////////////////////////////////////////////////
1122 //Windows version shared memory, so be careful when passing variables through the trimData struct.
1123 //Above fork() will clone, so memory is separate, but that's not the case with windows,
1124 //////////////////////////////////////////////////////////////////////////////////////////////////////
1126 vector<trimData*> pDataArray;
1127 DWORD dwThreadIdArray[processors-1];
1128 HANDLE hThreadArray[processors-1];
1130 //Create processor worker threads.
1131 for( int h=0; h<processors-1; h++){
1133 string extension = "";
1134 if (h != 0) { extension = toString(h) + ".temp"; processIDS.push_back(h); }
1135 vector<vector<string> > tempFASTAFileNames = fastaFileNames;
1136 vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
1137 vector<vector<string> > tempNameFileNames = nameFileNames;
1142 for(int i=0;i<tempFASTAFileNames.size();i++){
1143 for(int j=0;j<tempFASTAFileNames[i].size();j++){
1144 if (tempFASTAFileNames[i][j] != "") {
1145 tempFASTAFileNames[i][j] += extension;
1146 m->openOutputFile(tempFASTAFileNames[i][j], temp); temp.close();
1148 if(qFileName != ""){
1149 tempPrimerQualFileNames[i][j] += extension;
1150 m->openOutputFile(tempPrimerQualFileNames[i][j], temp); temp.close();
1153 tempNameFileNames[i][j] += extension;
1154 m->openOutputFile(tempNameFileNames[i][j], temp); temp.close();
1162 trimData* tempTrim = new trimData(filename,
1163 qFileName, nameFile, countfile,
1164 (trimFASTAFileName+extension),
1165 (scrapFASTAFileName+extension),
1166 (trimQualFileName+extension),
1167 (scrapQualFileName+extension),
1168 (trimNameFileName+extension),
1169 (scrapNameFileName+extension),
1170 (trimCountFileName+extension),
1171 (scrapCountFileName+extension),
1172 (groupFile+extension),
1174 tempPrimerQualFileNames,
1176 lines[h].start, lines[h].end, qLines[h].start, qLines[h].end, m,
1177 pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, primers, barcodes, revPrimer, linker, spacer, pairedBarcodes, pairedPrimers, pairedOligos,
1178 primerNameVector, barcodeNameVector, createGroup, allFiles, keepforward, keepFirst, removeLast,
1179 qWindowStep, qWindowSize, qWindowAverage, qtrim, qThreshold, qAverage, qRollAverage,
1180 minLength, maxAmbig, maxHomoP, maxLength, flip, reorient, nameMap, nameCount);
1181 pDataArray.push_back(tempTrim);
1183 hThreadArray[h] = CreateThread(NULL, 0, MyTrimThreadFunction, pDataArray[h], 0, &dwThreadIdArray[h]);
1188 m->openOutputFile(trimFASTAFileName, temp); temp.close();
1189 m->openOutputFile(scrapFASTAFileName, temp); temp.close();
1190 if(qFileName != ""){
1191 m->openOutputFile(trimQualFileName, temp); temp.close();
1192 m->openOutputFile(scrapQualFileName, temp); temp.close();
1194 if (nameFile != "") {
1195 m->openOutputFile(trimNameFileName, temp); temp.close();
1196 m->openOutputFile(scrapNameFileName, temp); temp.close();
1198 vector<vector<string> > tempFASTAFileNames = fastaFileNames;
1199 vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
1200 vector<vector<string> > tempNameFileNames = nameFileNames;
1203 string extension = toString(processors-1) + ".temp";
1204 for(int i=0;i<tempFASTAFileNames.size();i++){
1205 for(int j=0;j<tempFASTAFileNames[i].size();j++){
1206 if (tempFASTAFileNames[i][j] != "") {
1207 tempFASTAFileNames[i][j] += extension;
1208 m->openOutputFile(tempFASTAFileNames[i][j], temp); temp.close();
1210 if(qFileName != ""){
1211 tempPrimerQualFileNames[i][j] += extension;
1212 m->openOutputFile(tempPrimerQualFileNames[i][j], temp); temp.close();
1215 tempNameFileNames[i][j] += extension;
1216 m->openOutputFile(tempNameFileNames[i][j], temp); temp.close();
1223 driverCreateTrim(filename, qFileName, (trimFASTAFileName + toString(processors-1) + ".temp"), (scrapFASTAFileName + toString(processors-1) + ".temp"), (trimQualFileName + toString(processors-1) + ".temp"), (scrapQualFileName + toString(processors-1) + ".temp"), (trimNameFileName + toString(processors-1) + ".temp"), (scrapNameFileName + toString(processors-1) + ".temp"), (trimCountFileName + toString(processors-1) + ".temp"), (scrapCountFileName + toString(processors-1) + ".temp"), (groupFile + toString(processors-1) + ".temp"), tempFASTAFileNames, tempPrimerQualFileNames, tempNameFileNames, lines[processors-1], qLines[processors-1]);
1224 processIDS.push_back(processors-1);
1227 //Wait until all threads have terminated.
1228 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
1230 //Close all thread handles and free memory allocations.
1231 for(int i=0; i < pDataArray.size(); i++){
1232 if (pDataArray[i]->count != pDataArray[i]->lineEnd) {
1233 m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of " + toString(pDataArray[i]->lineEnd) + " sequences assigned to it, quitting. \n"); m->control_pressed = true;
1235 for (map<string, int>::iterator it = pDataArray[i]->groupCounts.begin(); it != pDataArray[i]->groupCounts.end(); it++) {
1236 map<string, int>::iterator it2 = groupCounts.find(it->first);
1237 if (it2 == groupCounts.end()) { groupCounts[it->first] = it->second; }
1238 else { groupCounts[it->first] += it->second; }
1240 for (map<string, string>::iterator it = pDataArray[i]->groupMap.begin(); it != pDataArray[i]->groupMap.end(); it++) {
1241 map<string, string>::iterator it2 = groupMap.find(it->first);
1242 if (it2 == groupMap.end()) { groupMap[it->first] = it->second; }
1243 else { m->mothurOut("[ERROR]: " + it->first + " is in your fasta file more than once. Sequence names must be unique. please correct.\n"); }
1245 CloseHandle(hThreadArray[i]);
1246 delete pDataArray[i];
1253 for(int i=0;i<processIDS.size();i++){
1255 m->mothurOut("Appending files from process " + toString(processIDS[i])); m->mothurOutEndLine();
1257 m->appendFiles((trimFASTAFileName + toString(processIDS[i]) + ".temp"), trimFASTAFileName);
1258 m->mothurRemove((trimFASTAFileName + toString(processIDS[i]) + ".temp"));
1259 m->appendFiles((scrapFASTAFileName + toString(processIDS[i]) + ".temp"), scrapFASTAFileName);
1260 m->mothurRemove((scrapFASTAFileName + toString(processIDS[i]) + ".temp"));
1262 if(qFileName != ""){
1263 m->appendFiles((trimQualFileName + toString(processIDS[i]) + ".temp"), trimQualFileName);
1264 m->mothurRemove((trimQualFileName + toString(processIDS[i]) + ".temp"));
1265 m->appendFiles((scrapQualFileName + toString(processIDS[i]) + ".temp"), scrapQualFileName);
1266 m->mothurRemove((scrapQualFileName + toString(processIDS[i]) + ".temp"));
1270 m->appendFiles((trimNameFileName + toString(processIDS[i]) + ".temp"), trimNameFileName);
1271 m->mothurRemove((trimNameFileName + toString(processIDS[i]) + ".temp"));
1272 m->appendFiles((scrapNameFileName + toString(processIDS[i]) + ".temp"), scrapNameFileName);
1273 m->mothurRemove((scrapNameFileName + toString(processIDS[i]) + ".temp"));
1276 if(countfile != ""){
1277 m->appendFiles((trimCountFileName + toString(processIDS[i]) + ".temp"), trimCountFileName);
1278 m->mothurRemove((trimCountFileName + toString(processIDS[i]) + ".temp"));
1279 m->appendFiles((scrapCountFileName + toString(processIDS[i]) + ".temp"), scrapCountFileName);
1280 m->mothurRemove((scrapCountFileName + toString(processIDS[i]) + ".temp"));
1283 if((createGroup)&&(countfile == "")){
1284 m->appendFiles((groupFile + toString(processIDS[i]) + ".temp"), groupFile);
1285 m->mothurRemove((groupFile + toString(processIDS[i]) + ".temp"));
1290 for(int j=0;j<fastaFileNames.size();j++){
1291 for(int k=0;k<fastaFileNames[j].size();k++){
1292 if (fastaFileNames[j][k] != "") {
1293 m->appendFiles((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"), fastaFileNames[j][k]);
1294 m->mothurRemove((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"));
1296 if(qFileName != ""){
1297 m->appendFiles((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"), qualFileNames[j][k]);
1298 m->mothurRemove((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"));
1302 m->appendFiles((nameFileNames[j][k] + toString(processIDS[i]) + ".temp"), nameFileNames[j][k]);
1303 m->mothurRemove((nameFileNames[j][k] + toString(processIDS[i]) + ".temp"));
1310 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1313 string tempFile = filename + toString(processIDS[i]) + ".num.temp";
1314 m->openInputFile(tempFile, in);
1318 in >> tempNum; m->gobble(in);
1321 for (int i = 0; i < tempNum; i++) {
1323 in >> group >> groupNum; m->gobble(in);
1325 map<string, int>::iterator it = groupCounts.find(group);
1326 if (it == groupCounts.end()) { groupCounts[group] = groupNum; }
1327 else { groupCounts[it->first] += groupNum; }
1330 in >> tempNum; m->gobble(in);
1332 for (int i = 0; i < tempNum; i++) {
1333 string group, seqName;
1334 in >> seqName >> group; m->gobble(in);
1336 map<string, string>::iterator it = groupMap.find(seqName);
1337 if (it == groupMap.end()) { groupMap[seqName] = group; }
1338 else { m->mothurOut("[ERROR]: " + seqName + " is in your fasta file more than once. Sequence names must be unique. please correct.\n"); }
1342 in.close(); m->mothurRemove(tempFile);
1349 catch(exception& e) {
1350 m->errorOut(e, "TrimSeqsCommand", "createProcessesCreateTrim");
1355 /**************************************************************************************************/
1357 int TrimSeqsCommand::setLines(string filename, string qfilename) {
1360 vector<unsigned long long> fastaFilePos;
1361 vector<unsigned long long> qfileFilePos;
1363 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1364 //set file positions for fasta file
1365 fastaFilePos = m->divideFile(filename, processors);
1367 //get name of first sequence in each chunk
1368 map<string, int> firstSeqNames;
1369 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
1371 m->openInputFile(filename, in);
1372 in.seekg(fastaFilePos[i]);
1375 firstSeqNames[temp.getName()] = i;
1380 if(qfilename != "") {
1381 //seach for filePos of each first name in the qfile and save in qfileFilePos
1383 m->openInputFile(qfilename, inQual);
1386 while(!inQual.eof()){
1387 input = m->getline(inQual);
1389 if (input.length() != 0) {
1390 if(input[0] == '>'){ //this is a sequence name line
1391 istringstream nameStream(input);
1393 string sname = ""; nameStream >> sname;
1394 sname = sname.substr(1);
1396 for (int i = 0; i < sname.length(); i++) {
1397 if (sname[i] == ':') { sname[i] = '_'; m->changedSeqNames = true; }
1400 map<string, int>::iterator it = firstSeqNames.find(sname);
1402 if(it != firstSeqNames.end()) { //this is the start of a new chunk
1403 unsigned long long pos = inQual.tellg();
1404 qfileFilePos.push_back(pos - input.length() - 1);
1405 firstSeqNames.erase(it);
1410 if (firstSeqNames.size() == 0) { break; }
1415 if (firstSeqNames.size() != 0) {
1416 for (map<string, int>::iterator it = firstSeqNames.begin(); it != firstSeqNames.end(); it++) {
1417 m->mothurOut(it->first + " is in your fasta file and not in your quality file, not using quality file."); m->mothurOutEndLine();
1423 //get last file position of qfile
1425 unsigned long long size;
1427 //get num bytes in file
1428 pFile = fopen (qfilename.c_str(),"rb");
1429 if (pFile==NULL) perror ("Error opening file");
1431 fseek (pFile, 0, SEEK_END);
1436 qfileFilePos.push_back(size);
1439 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
1440 if (m->debug) { m->mothurOut("[DEBUG]: " + toString(i) +'\t' + toString(fastaFilePos[i]) + '\t' + toString(fastaFilePos[i+1]) + '\n'); }
1441 lines.push_back(linePair(fastaFilePos[i], fastaFilePos[(i+1)]));
1442 if (qfilename != "") { qLines.push_back(linePair(qfileFilePos[i], qfileFilePos[(i+1)])); }
1444 if(qfilename == "") { qLines = lines; } //files with duds
1450 if (processors == 1) { //save time
1451 //fastaFilePos.push_back(0); qfileFilePos.push_back(0);
1452 //fastaFilePos.push_back(1000); qfileFilePos.push_back(1000);
1453 lines.push_back(linePair(0, 1000));
1454 if (qfilename != "") { qLines.push_back(linePair(0, 1000)); }
1456 int numFastaSeqs = 0;
1457 fastaFilePos = m->setFilePosFasta(filename, numFastaSeqs);
1458 if (fastaFilePos.size() < processors) { processors = fastaFilePos.size(); }
1460 if (qfilename != "") {
1461 int numQualSeqs = 0;
1462 qfileFilePos = m->setFilePosFasta(qfilename, numQualSeqs);
1464 if (numFastaSeqs != numQualSeqs) {
1465 m->mothurOut("[ERROR]: You have " + toString(numFastaSeqs) + " sequences in your fasta file, but " + toString(numQualSeqs) + " sequences in your quality file."); m->mothurOutEndLine(); m->control_pressed = true;
1469 //figure out how many sequences you have to process
1470 int numSeqsPerProcessor = numFastaSeqs / processors;
1471 for (int i = 0; i < processors; i++) {
1472 int startIndex = i * numSeqsPerProcessor;
1473 if(i == (processors - 1)){ numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor; }
1474 lines.push_back(linePair(fastaFilePos[startIndex], numSeqsPerProcessor));
1475 if (qfilename != "") { qLines.push_back(linePair(qfileFilePos[startIndex], numSeqsPerProcessor)); }
1478 if(qfilename == "") { qLines = lines; } //files with duds
1483 catch(exception& e) {
1484 m->errorOut(e, "TrimSeqsCommand", "setLines");
1489 //***************************************************************************************************************
1491 bool TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<vector<string> >& qualFileNames, vector<vector<string> >& nameFileNames){
1494 m->openInputFile(oligoFile, inOligos);
1498 string type, oligo, roligo, group;
1499 bool hasPrimer = false; bool hasPairedBarcodes = false;
1501 int indexPrimer = 0;
1502 int indexBarcode = 0;
1503 int indexPairedPrimer = 0;
1504 int indexPairedBarcode = 0;
1505 set<string> uniquePrimers;
1506 set<string> uniqueBarcodes;
1508 while(!inOligos.eof()){
1512 if (m->debug) { m->mothurOut("[DEBUG]: reading type - " + type + ".\n"); }
1515 while (!inOligos.eof()) { char c = inOligos.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there
1516 m->gobble(inOligos);
1519 m->gobble(inOligos);
1520 //make type case insensitive
1521 for(int i=0;i<type.length();i++){ type[i] = toupper(type[i]); }
1525 if (m->debug) { m->mothurOut("[DEBUG]: reading - " + oligo + ".\n"); }
1527 for(int i=0;i<oligo.length();i++){
1528 oligo[i] = toupper(oligo[i]);
1529 if(oligo[i] == 'U') { oligo[i] = 'T'; }
1532 if(type == "FORWARD"){
1535 // get rest of line in case there is a primer name
1536 while (!inOligos.eof()) {
1537 char c = inOligos.get();
1538 if (c == 10 || c == 13){ break; }
1539 else if (c == 32 || c == 9){;} //space or tab
1540 else { group += c; }
1543 //check for repeat barcodes
1544 map<string, int>::iterator itPrime = primers.find(oligo);
1545 if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); }
1547 if (m->debug) { if (group != "") { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); }else{ m->mothurOut("[DEBUG]: no group for primer " + oligo + ".\n"); } }
1549 primers[oligo]=indexPrimer; indexPrimer++;
1550 primerNameVector.push_back(group);
1552 else if (type == "PRIMER"){
1553 m->gobble(inOligos);
1557 for(int i=0;i<roligo.length();i++){
1558 roligo[i] = toupper(roligo[i]);
1559 if(roligo[i] == 'U') { roligo[i] = 'T'; }
1561 roligo = reverseOligo(roligo);
1565 // get rest of line in case there is a primer name
1566 while (!inOligos.eof()) {
1567 char c = inOligos.get();
1568 if (c == 10 || c == 13 || c == -1){ break; }
1569 else if (c == 32 || c == 9){;} //space or tab
1570 else { group += c; }
1573 oligosPair newPrimer(oligo, roligo);
1575 if (m->debug) { m->mothurOut("[DEBUG]: primer pair " + newPrimer.forward + " " + newPrimer.reverse + ", and group = " + group + ".\n"); }
1577 //check for repeat barcodes
1578 string tempPair = oligo+roligo;
1579 if (uniquePrimers.count(tempPair) != 0) { m->mothurOut("primer pair " + newPrimer.forward + " " + newPrimer.reverse + " is in your oligos file already."); m->mothurOutEndLine(); }
1580 else { uniquePrimers.insert(tempPair); }
1582 if (m->debug) { if (group != "") { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); }else{ m->mothurOut("[DEBUG]: no group for primer pair " + newPrimer.forward + " " + newPrimer.reverse + ".\n"); } }
1584 pairedPrimers[indexPairedPrimer]=newPrimer; indexPairedPrimer++;
1585 primerNameVector.push_back(group);
1588 else if(type == "REVERSE"){
1589 //Sequence oligoRC("reverse", oligo);
1590 //oligoRC.reverseComplement();
1591 string oligoRC = reverseOligo(oligo);
1592 revPrimer.push_back(oligoRC);
1594 else if(type == "BARCODE"){
1597 //barcode lines can look like BARCODE atgcatgc groupName - for 454 seqs
1598 //or BARCODE atgcatgc atgcatgc groupName - for illumina data that has forward and reverse info
1601 while (!inOligos.eof()) {
1602 char c = inOligos.get();
1603 if (c == 10 || c == 13 || c == -1){ break; }
1604 else if (c == 32 || c == 9){;} //space or tab
1608 //then this is illumina data with 4 columns
1610 hasPairedBarcodes = true;
1611 string reverseBarcode = group; //reverseOligo(group); //reverse barcode
1614 for(int i=0;i<reverseBarcode.length();i++){
1615 reverseBarcode[i] = toupper(reverseBarcode[i]);
1616 if(reverseBarcode[i] == 'U') { reverseBarcode[i] = 'T'; }
1619 reverseBarcode = reverseOligo(reverseBarcode);
1620 oligosPair newPair(oligo, reverseBarcode);
1622 if (m->debug) { m->mothurOut("[DEBUG]: barcode pair " + newPair.forward + " " + newPair.reverse + ", and group = " + group + ".\n"); }
1624 //check for repeat barcodes
1625 string tempPair = oligo+reverseBarcode;
1626 if (uniqueBarcodes.count(tempPair) != 0) { m->mothurOut("barcode pair " + newPair.forward + " " + newPair.reverse + " is in your oligos file already, disregarding."); m->mothurOutEndLine(); }
1627 else { uniqueBarcodes.insert(tempPair); }
1629 pairedBarcodes[indexPairedBarcode]=newPair; indexPairedBarcode++;
1630 barcodeNameVector.push_back(group);
1632 //check for repeat barcodes
1633 map<string, int>::iterator itBar = barcodes.find(oligo);
1634 if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); }
1636 barcodes[oligo]=indexBarcode; indexBarcode++;
1637 barcodeNameVector.push_back(group);
1639 }else if(type == "LINKER"){
1640 linker.push_back(oligo);
1641 }else if(type == "SPACER"){
1642 spacer.push_back(oligo);
1644 else{ m->mothurOut("[WARNING]: " + type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); }
1646 m->gobble(inOligos);
1650 if (hasPairedBarcodes || hasPrimer) {
1651 pairedOligos = true;
1652 if ((primers.size() != 0) || (barcodes.size() != 0) || (linker.size() != 0) || (spacer.size() != 0) || (revPrimer.size() != 0)) { m->control_pressed = true; m->mothurOut("[ERROR]: cannot mix paired primers and barcodes with non paired or linkers and spacers, quitting."); m->mothurOutEndLine(); return 0; }
1655 if(barcodeNameVector.size() == 0 && primerNameVector[0] == ""){ allFiles = 0; }
1657 //add in potential combos
1658 if(barcodeNameVector.size() == 0){
1660 barcodeNameVector.push_back("");
1663 if(primerNameVector.size() == 0){
1665 primerNameVector.push_back("");
1668 fastaFileNames.resize(barcodeNameVector.size());
1669 for(int i=0;i<fastaFileNames.size();i++){
1670 fastaFileNames[i].assign(primerNameVector.size(), "");
1672 if(qFileName != "") { qualFileNames = fastaFileNames; }
1673 if(nameFile != "") { nameFileNames = fastaFileNames; }
1676 set<string> uniqueNames; //used to cleanup outputFileNames
1678 for(map<int, oligosPair>::iterator itBar = pairedBarcodes.begin();itBar != pairedBarcodes.end();itBar++){
1679 for(map<int, oligosPair>::iterator itPrimer = pairedPrimers.begin();itPrimer != pairedPrimers.end(); itPrimer++){
1681 string primerName = primerNameVector[itPrimer->first];
1682 string barcodeName = barcodeNameVector[itBar->first];
1684 if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing
1686 string comboGroupName = "";
1687 string fastaFileName = "";
1688 string qualFileName = "";
1689 string nameFileName = "";
1690 string countFileName = "";
1692 if(primerName == ""){
1693 comboGroupName = barcodeNameVector[itBar->first];
1696 if(barcodeName == ""){
1697 comboGroupName = primerNameVector[itPrimer->first];
1700 comboGroupName = barcodeNameVector[itBar->first] + "." + primerNameVector[itPrimer->first];
1706 map<string, string> variables;
1707 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFile));
1708 variables["[tag]"] = comboGroupName;
1709 fastaFileName = getOutputFileName("fasta", variables);
1710 if (uniqueNames.count(fastaFileName) == 0) {
1711 outputNames.push_back(fastaFileName);
1712 outputTypes["fasta"].push_back(fastaFileName);
1713 uniqueNames.insert(fastaFileName);
1716 fastaFileNames[itBar->first][itPrimer->first] = fastaFileName;
1717 m->openOutputFile(fastaFileName, temp); temp.close();
1719 if(qFileName != ""){
1720 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(qFileName));
1721 qualFileName = getOutputFileName("qfile", variables);
1722 if (uniqueNames.count(qualFileName) == 0) {
1723 outputNames.push_back(qualFileName);
1724 outputTypes["qfile"].push_back(qualFileName);
1727 qualFileNames[itBar->first][itPrimer->first] = qualFileName;
1728 m->openOutputFile(qualFileName, temp); temp.close();
1732 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(nameFile));
1733 nameFileName = getOutputFileName("name", variables);
1734 if (uniqueNames.count(nameFileName) == 0) {
1735 outputNames.push_back(nameFileName);
1736 outputTypes["name"].push_back(nameFileName);
1739 nameFileNames[itBar->first][itPrimer->first] = nameFileName;
1740 m->openOutputFile(nameFileName, temp); temp.close();
1746 for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
1747 for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
1749 string primerName = primerNameVector[itPrimer->second];
1750 string barcodeName = barcodeNameVector[itBar->second];
1752 if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing
1754 string comboGroupName = "";
1755 string fastaFileName = "";
1756 string qualFileName = "";
1757 string nameFileName = "";
1758 string countFileName = "";
1760 if(primerName == ""){
1761 comboGroupName = barcodeNameVector[itBar->second];
1764 if(barcodeName == ""){
1765 comboGroupName = primerNameVector[itPrimer->second];
1768 comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second];
1774 map<string, string> variables;
1775 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFile));
1776 variables["[tag]"] = comboGroupName;
1777 fastaFileName = getOutputFileName("fasta", variables);
1778 if (uniqueNames.count(fastaFileName) == 0) {
1779 outputNames.push_back(fastaFileName);
1780 outputTypes["fasta"].push_back(fastaFileName);
1781 uniqueNames.insert(fastaFileName);
1784 fastaFileNames[itBar->second][itPrimer->second] = fastaFileName;
1785 m->openOutputFile(fastaFileName, temp); temp.close();
1787 if(qFileName != ""){
1788 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(qFileName));
1789 qualFileName = getOutputFileName("qfile", variables);
1790 if (uniqueNames.count(qualFileName) == 0) {
1791 outputNames.push_back(qualFileName);
1792 outputTypes["qfile"].push_back(qualFileName);
1795 qualFileNames[itBar->second][itPrimer->second] = qualFileName;
1796 m->openOutputFile(qualFileName, temp); temp.close();
1800 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(nameFile));
1801 nameFileName = getOutputFileName("name", variables);
1802 if (uniqueNames.count(nameFileName) == 0) {
1803 outputNames.push_back(nameFileName);
1804 outputTypes["name"].push_back(nameFileName);
1807 nameFileNames[itBar->second][itPrimer->second] = nameFileName;
1808 m->openOutputFile(nameFileName, temp); temp.close();
1815 numFPrimers = primers.size();
1816 if (pairedOligos) { numFPrimers = pairedPrimers.size(); }
1817 numRPrimers = revPrimer.size();
1818 numLinkers = linker.size();
1819 numSpacers = spacer.size();
1821 bool allBlank = true;
1822 for (int i = 0; i < barcodeNameVector.size(); i++) {
1823 if (barcodeNameVector[i] != "") {
1828 for (int i = 0; i < primerNameVector.size(); i++) {
1829 if (primerNameVector[i] != "") {
1836 m->mothurOut("[WARNING]: your oligos file does not contain any group names. mothur will not create a groupfile."); m->mothurOutEndLine();
1844 catch(exception& e) {
1845 m->errorOut(e, "TrimSeqsCommand", "getOligos");
1849 //***************************************************************************************************************
1851 bool TrimSeqsCommand::keepFirstTrim(Sequence& sequence, QualityScores& qscores){
1854 if(qscores.getName() != ""){
1855 qscores.trimQScores(-1, keepFirst);
1857 sequence.trim(keepFirst);
1860 catch(exception& e) {
1861 m->errorOut(e, "keepFirstTrim", "countDiffs");
1867 //***************************************************************************************************************
1869 bool TrimSeqsCommand::removeLastTrim(Sequence& sequence, QualityScores& qscores){
1873 int length = sequence.getNumBases() - removeLast;
1876 if(qscores.getName() != ""){
1877 qscores.trimQScores(-1, length);
1879 sequence.trim(length);
1888 catch(exception& e) {
1889 m->errorOut(e, "removeLastTrim", "countDiffs");
1895 //***************************************************************************************************************
1897 bool TrimSeqsCommand::cullLength(Sequence& seq){
1900 int length = seq.getNumBases();
1901 bool success = 0; //guilty until proven innocent
1903 if(length >= minLength && maxLength == 0) { success = 1; }
1904 else if(length >= minLength && length <= maxLength) { success = 1; }
1905 else { success = 0; }
1910 catch(exception& e) {
1911 m->errorOut(e, "TrimSeqsCommand", "cullLength");
1917 //***************************************************************************************************************
1919 bool TrimSeqsCommand::cullHomoP(Sequence& seq){
1921 int longHomoP = seq.getLongHomoPolymer();
1922 bool success = 0; //guilty until proven innocent
1924 if(longHomoP <= maxHomoP){ success = 1; }
1925 else { success = 0; }
1929 catch(exception& e) {
1930 m->errorOut(e, "TrimSeqsCommand", "cullHomoP");
1935 //********************************************************************/
1936 string TrimSeqsCommand::reverseOligo(string oligo){
1938 string reverse = "";
1940 for(int i=oligo.length()-1;i>=0;i--){
1942 if(oligo[i] == 'A') { reverse += 'T'; }
1943 else if(oligo[i] == 'T'){ reverse += 'A'; }
1944 else if(oligo[i] == 'U'){ reverse += 'A'; }
1946 else if(oligo[i] == 'G'){ reverse += 'C'; }
1947 else if(oligo[i] == 'C'){ reverse += 'G'; }
1949 else if(oligo[i] == 'R'){ reverse += 'Y'; }
1950 else if(oligo[i] == 'Y'){ reverse += 'R'; }
1952 else if(oligo[i] == 'M'){ reverse += 'K'; }
1953 else if(oligo[i] == 'K'){ reverse += 'M'; }
1955 else if(oligo[i] == 'W'){ reverse += 'W'; }
1956 else if(oligo[i] == 'S'){ reverse += 'S'; }
1958 else if(oligo[i] == 'B'){ reverse += 'V'; }
1959 else if(oligo[i] == 'V'){ reverse += 'B'; }
1961 else if(oligo[i] == 'D'){ reverse += 'H'; }
1962 else if(oligo[i] == 'H'){ reverse += 'D'; }
1964 else { reverse += 'N'; }
1970 catch(exception& e) {
1971 m->errorOut(e, "TrimSeqsCommand", "reverseOligo");
1976 //***************************************************************************************************************
1978 bool TrimSeqsCommand::cullAmbigs(Sequence& seq){
1980 int numNs = seq.getAmbigBases();
1981 bool success = 0; //guilty until proven innocent
1983 if(numNs <= maxAmbig) { success = 1; }
1984 else { success = 0; }
1988 catch(exception& e) {
1989 m->errorOut(e, "TrimSeqsCommand", "cullAmbigs");
1994 //***************************************************************************************************************