5 * Created by Pat Schloss on 6/6/09.
6 * Copyright 2009 Patrick D. Schloss. All rights reserved.
10 #include "trimseqscommand.h"
11 #include "needlemanoverlap.hpp"
12 #include "trimoligos.h"
15 //**********************************************************************************************************************
16 vector<string> TrimSeqsCommand::setParameters(){
18 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","fasta",false,true,true); parameters.push_back(pfasta);
19 CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none","group",false,false,true); parameters.push_back(poligos);
20 CommandParameter pqfile("qfile", "InputTypes", "", "", "none", "none", "none","qfile",false,false,true); parameters.push_back(pqfile);
21 CommandParameter pname("name", "InputTypes", "", "", "namecount", "none", "none","name",false,false,true); parameters.push_back(pname);
22 CommandParameter pcount("count", "InputTypes", "", "", "namecount", "none", "none","count",false,false,true); parameters.push_back(pcount);
23 CommandParameter pflip("flip", "Boolean", "", "F", "", "", "","",false,false,true); parameters.push_back(pflip);
24 CommandParameter preorient("checkorient", "Boolean", "", "F", "", "", "","",false,false,true); parameters.push_back(preorient);
25 CommandParameter pmaxambig("maxambig", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(pmaxambig);
26 CommandParameter pmaxhomop("maxhomop", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pmaxhomop);
27 CommandParameter pminlength("minlength", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pminlength);
28 CommandParameter pmaxlength("maxlength", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pmaxlength);
29 CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(ppdiffs);
30 CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(pbdiffs);
31 CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pldiffs);
32 CommandParameter psdiffs("sdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(psdiffs);
33 CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ptdiffs);
34 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
35 CommandParameter pallfiles("allfiles", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pallfiles);
36 CommandParameter pkeepforward("keepforward", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pkeepforward);
37 CommandParameter pqtrim("qtrim", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pqtrim);
38 CommandParameter pqthreshold("qthreshold", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pqthreshold);
39 CommandParameter pqaverage("qaverage", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pqaverage);
40 CommandParameter prollaverage("rollaverage", "Number", "", "0", "", "", "","",false,false); parameters.push_back(prollaverage);
41 CommandParameter pqwindowaverage("qwindowaverage", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pqwindowaverage);
42 CommandParameter pqstepsize("qstepsize", "Number", "", "1", "", "", "","",false,false); parameters.push_back(pqstepsize);
43 CommandParameter pqwindowsize("qwindowsize", "Number", "", "50", "", "", "","",false,false); parameters.push_back(pqwindowsize);
44 CommandParameter pkeepfirst("keepfirst", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pkeepfirst);
45 CommandParameter premovelast("removelast", "Number", "", "0", "", "", "","",false,false); parameters.push_back(premovelast);
46 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
47 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
49 vector<string> myArray;
50 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
54 m->errorOut(e, "TrimSeqsCommand", "setParameters");
58 //**********************************************************************************************************************
59 string TrimSeqsCommand::getHelpString(){
61 string helpString = "";
62 helpString += "The trim.seqs command reads a fastaFile and creates 2 new fasta files, .trim.fasta and scrap.fasta, as well as group files if you provide and oligos file.\n";
63 helpString += "The .trim.fasta contains sequences that meet your requirements, and the .scrap.fasta contains those which don't.\n";
64 helpString += "The trim.seqs command parameters are fasta, name, count, flip, checkorient, oligos, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim, keepfirst, removelast and allfiles.\n";
65 helpString += "The fasta parameter is required.\n";
66 helpString += "The flip parameter will output the reverse compliment of your trimmed sequence. The default is false.\n";
67 helpString += "The checkorient parameter will check the reverse compliment of the sequence if the barcodes and primers cannot be found in the forward. The default is false.\n";
68 helpString += "The oligos parameter allows you to provide an oligos file.\n";
69 helpString += "The name parameter allows you to provide a names file with your fasta file.\n";
70 helpString += "The count parameter allows you to provide a count file with your fasta file.\n";
71 helpString += "The maxambig parameter allows you to set the maximum number of ambigious bases allowed. The default is -1.\n";
72 helpString += "The maxhomop parameter allows you to set a maximum homopolymer length. \n";
73 helpString += "The minlength parameter allows you to set and minimum sequence length. \n";
74 helpString += "The maxlength parameter allows you to set and maximum sequence length. \n";
75 helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n";
76 helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";
77 helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
78 helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n";
79 helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n";
80 helpString += "The qfile parameter allows you to provide a quality file.\n";
81 helpString += "The qthreshold parameter allows you to set a minimum quality score allowed. \n";
82 helpString += "The qaverage parameter allows you to set a minimum average quality score allowed. \n";
83 helpString += "The qwindowsize parameter allows you to set a number of bases in a window. Default=50.\n";
84 helpString += "The qwindowaverage parameter allows you to set a minimum average quality score allowed over a window. \n";
85 helpString += "The rollaverage parameter allows you to set a minimum rolling average quality score allowed over a window. \n";
86 helpString += "The qstepsize parameter allows you to set a number of bases to move the window over. Default=1.\n";
87 helpString += "The allfiles parameter will create separate group and fasta file for each grouping. The default is F.\n";
88 helpString += "The keepforward parameter allows you to indicate whether you want the forward primer removed or not. The default is F, meaning remove the forward primer.\n";
89 helpString += "The qtrim parameter will trim sequence from the point that they fall below the qthreshold and put it in the .trim file if set to true. The default is T.\n";
90 helpString += "The keepfirst parameter trims the sequence to the first keepfirst number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements. \n";
91 helpString += "The removelast removes the last removelast number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements.\n";
92 helpString += "The trim.seqs command should be in the following format: \n";
93 helpString += "trim.seqs(fasta=yourFastaFile, flip=yourFlip, oligos=yourOligos, maxambig=yourMaxambig, \n";
94 helpString += "maxhomop=yourMaxhomop, minlength=youMinlength, maxlength=yourMaxlength) \n";
95 helpString += "Example trim.seqs(fasta=abrecovery.fasta, flip=..., oligos=..., maxambig=..., maxhomop=..., minlength=..., maxlength=...).\n";
96 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
97 helpString += "For more details please check out the wiki http://www.mothur.org/wiki/Trim.seqs .\n";
100 catch(exception& e) {
101 m->errorOut(e, "TrimSeqsCommand", "getHelpString");
105 //**********************************************************************************************************************
106 string TrimSeqsCommand::getOutputPattern(string type) {
110 if (type == "qfile") { pattern = "[filename],[tag],qual"; }
111 else if (type == "fasta") { pattern = "[filename],[tag],fasta"; }
112 else if (type == "group") { pattern = "[filename],groups"; }
113 else if (type == "name") { pattern = "[filename],[tag],names"; }
114 else if (type == "count") { pattern = "[filename],[tag],count_table-[filename],count_table"; }
115 else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
119 catch(exception& e) {
120 m->errorOut(e, "TrimSeqsCommand", "getOutputPattern");
124 //**********************************************************************************************************************
126 TrimSeqsCommand::TrimSeqsCommand(){
128 abort = true; calledHelp = true;
130 vector<string> tempOutNames;
131 outputTypes["fasta"] = tempOutNames;
132 outputTypes["qfile"] = tempOutNames;
133 outputTypes["group"] = tempOutNames;
134 outputTypes["name"] = tempOutNames;
135 outputTypes["count"] = tempOutNames;
137 catch(exception& e) {
138 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
142 //***************************************************************************************************************
144 TrimSeqsCommand::TrimSeqsCommand(string option) {
147 abort = false; calledHelp = false;
150 //allow user to run help
151 if(option == "help") { help(); abort = true; calledHelp = true; }
152 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
155 vector<string> myArray = setParameters();
157 OptionParser parser(option);
158 map<string,string> parameters = parser.getParameters();
160 ValidParameters validParameter;
161 map<string,string>::iterator it;
163 //check to make sure all parameters are valid for command
164 for (it = parameters.begin(); it != parameters.end(); it++) {
165 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
168 //initialize outputTypes
169 vector<string> tempOutNames;
170 outputTypes["fasta"] = tempOutNames;
171 outputTypes["qfile"] = tempOutNames;
172 outputTypes["group"] = tempOutNames;
173 outputTypes["name"] = tempOutNames;
174 outputTypes["count"] = tempOutNames;
176 //if the user changes the input directory command factory will send this info to us in the output parameter
177 string inputDir = validParameter.validFile(parameters, "inputdir", false);
178 if (inputDir == "not found"){ inputDir = ""; }
181 it = parameters.find("fasta");
182 //user has given a template file
183 if(it != parameters.end()){
184 path = m->hasPath(it->second);
185 //if the user has not given a path then, add inputdir. else leave path alone.
186 if (path == "") { parameters["fasta"] = inputDir + it->second; }
189 it = parameters.find("oligos");
190 //user has given a template file
191 if(it != parameters.end()){
192 path = m->hasPath(it->second);
193 //if the user has not given a path then, add inputdir. else leave path alone.
194 if (path == "") { parameters["oligos"] = inputDir + it->second; }
197 it = parameters.find("qfile");
198 //user has given a template file
199 if(it != parameters.end()){
200 path = m->hasPath(it->second);
201 //if the user has not given a path then, add inputdir. else leave path alone.
202 if (path == "") { parameters["qfile"] = inputDir + it->second; }
205 it = parameters.find("name");
206 //user has given a template file
207 if(it != parameters.end()){
208 path = m->hasPath(it->second);
209 //if the user has not given a path then, add inputdir. else leave path alone.
210 if (path == "") { parameters["name"] = inputDir + it->second; }
213 it = parameters.find("count");
214 //user has given a template file
215 if(it != parameters.end()){
216 path = m->hasPath(it->second);
217 //if the user has not given a path then, add inputdir. else leave path alone.
218 if (path == "") { parameters["count"] = inputDir + it->second; }
224 //check for required parameters
225 fastaFile = validParameter.validFile(parameters, "fasta", true);
226 if (fastaFile == "not found") {
227 fastaFile = m->getFastaFile();
228 if (fastaFile != "") { m->mothurOut("Using " + fastaFile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
229 else { m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
230 }else if (fastaFile == "not open") { abort = true; }
231 else { m->setFastaFile(fastaFile); }
233 //if the user changes the output directory command factory will send this info to us in the output parameter
234 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
236 outputDir += m->hasPath(fastaFile); //if user entered a file with a path then preserve it
240 //check for optional parameter and set defaults
241 // ...at some point should added some additional type checking...
243 temp = validParameter.validFile(parameters, "flip", false);
244 if (temp == "not found") { flip = 0; }
245 else { flip = m->isTrue(temp); }
247 temp = validParameter.validFile(parameters, "oligos", true);
248 if (temp == "not found"){ oligoFile = ""; }
249 else if(temp == "not open"){ abort = true; }
250 else { oligoFile = temp; m->setOligosFile(oligoFile); }
253 temp = validParameter.validFile(parameters, "maxambig", false); if (temp == "not found") { temp = "-1"; }
254 m->mothurConvert(temp, maxAmbig);
256 temp = validParameter.validFile(parameters, "maxhomop", false); if (temp == "not found") { temp = "0"; }
257 m->mothurConvert(temp, maxHomoP);
259 temp = validParameter.validFile(parameters, "minlength", false); if (temp == "not found") { temp = "0"; }
260 m->mothurConvert(temp, minLength);
262 temp = validParameter.validFile(parameters, "maxlength", false); if (temp == "not found") { temp = "0"; }
263 m->mothurConvert(temp, maxLength);
265 temp = validParameter.validFile(parameters, "bdiffs", false); if (temp == "not found") { temp = "0"; }
266 m->mothurConvert(temp, bdiffs);
268 temp = validParameter.validFile(parameters, "pdiffs", false); if (temp == "not found") { temp = "0"; }
269 m->mothurConvert(temp, pdiffs);
271 temp = validParameter.validFile(parameters, "ldiffs", false); if (temp == "not found") { temp = "0"; }
272 m->mothurConvert(temp, ldiffs);
274 temp = validParameter.validFile(parameters, "sdiffs", false); if (temp == "not found") { temp = "0"; }
275 m->mothurConvert(temp, sdiffs);
277 temp = validParameter.validFile(parameters, "tdiffs", false); if (temp == "not found") { int tempTotal = pdiffs + bdiffs + ldiffs + sdiffs; temp = toString(tempTotal); }
278 m->mothurConvert(temp, tdiffs);
280 if(tdiffs == 0){ tdiffs = bdiffs + pdiffs + ldiffs + sdiffs; }
282 temp = validParameter.validFile(parameters, "qfile", true);
283 if (temp == "not found") { qFileName = ""; }
284 else if(temp == "not open") { abort = true; }
285 else { qFileName = temp; m->setQualFile(qFileName); }
287 temp = validParameter.validFile(parameters, "name", true);
288 if (temp == "not found") { nameFile = ""; }
289 else if(temp == "not open") { nameFile = ""; abort = true; }
290 else { nameFile = temp; m->setNameFile(nameFile); }
292 countfile = validParameter.validFile(parameters, "count", true);
293 if (countfile == "not open") { abort = true; countfile = ""; }
294 else if (countfile == "not found") { countfile = ""; }
295 else { m->setCountTableFile(countfile); }
297 if ((countfile != "") && (nameFile != "")) { m->mothurOut("You must enter ONLY ONE of the following: count or name."); m->mothurOutEndLine(); abort = true; }
299 temp = validParameter.validFile(parameters, "qthreshold", false); if (temp == "not found") { temp = "0"; }
300 m->mothurConvert(temp, qThreshold);
302 temp = validParameter.validFile(parameters, "qtrim", false); if (temp == "not found") { temp = "t"; }
303 qtrim = m->isTrue(temp);
305 temp = validParameter.validFile(parameters, "rollaverage", false); if (temp == "not found") { temp = "0"; }
306 convert(temp, qRollAverage);
308 temp = validParameter.validFile(parameters, "qwindowaverage", false);if (temp == "not found") { temp = "0"; }
309 convert(temp, qWindowAverage);
311 temp = validParameter.validFile(parameters, "qwindowsize", false); if (temp == "not found") { temp = "50"; }
312 convert(temp, qWindowSize);
314 temp = validParameter.validFile(parameters, "qstepsize", false); if (temp == "not found") { temp = "1"; }
315 convert(temp, qWindowStep);
317 temp = validParameter.validFile(parameters, "qaverage", false); if (temp == "not found") { temp = "0"; }
318 convert(temp, qAverage);
320 temp = validParameter.validFile(parameters, "keepfirst", false); if (temp == "not found") { temp = "0"; }
321 convert(temp, keepFirst);
323 temp = validParameter.validFile(parameters, "removelast", false); if (temp == "not found") { temp = "0"; }
324 convert(temp, removeLast);
326 temp = validParameter.validFile(parameters, "allfiles", false); if (temp == "not found") { temp = "F"; }
327 allFiles = m->isTrue(temp);
329 temp = validParameter.validFile(parameters, "keepforward", false); if (temp == "not found") { temp = "F"; }
330 keepforward = m->isTrue(temp);
332 temp = validParameter.validFile(parameters, "checkorient", false); if (temp == "not found") { temp = "F"; }
333 reorient = m->isTrue(temp);
335 temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
336 m->setProcessors(temp);
337 m->mothurConvert(temp, processors);
340 if(allFiles && (oligoFile == "")){
341 m->mothurOut("You selected allfiles, but didn't enter an oligos. Ignoring the allfiles request."); m->mothurOutEndLine();
343 if((qAverage != 0 && qThreshold != 0) && qFileName == ""){
344 m->mothurOut("You didn't provide a quality file name, quality criteria will be ignored."); m->mothurOutEndLine();
348 if(!flip && oligoFile=="" && !maxLength && !minLength && (maxAmbig==-1) && !maxHomoP && qFileName == ""){
349 m->mothurOut("You didn't set any options... quiting command."); m->mothurOutEndLine();
353 if (countfile == "") {
354 if (nameFile == "") {
355 vector<string> files; files.push_back(fastaFile);
356 parser.getNameFile(files);
362 catch(exception& e) {
363 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
367 //***************************************************************************************************************
369 int TrimSeqsCommand::execute(){
372 if (abort == true) { if (calledHelp) { return 0; } return 2; }
374 pairedOligos = false;
375 numFPrimers = 0; //this needs to be initialized
380 vector<vector<string> > fastaFileNames;
381 vector<vector<string> > qualFileNames;
382 vector<vector<string> > nameFileNames;
384 map<string, string> variables;
385 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFile));
386 variables["[tag]"] = "trim";
387 string trimSeqFile = getOutputFileName("fasta",variables);
388 string trimQualFile = getOutputFileName("qfile",variables);
389 outputNames.push_back(trimSeqFile); outputTypes["fasta"].push_back(trimSeqFile);
391 variables["[tag]"] = "scrap";
392 string scrapSeqFile = getOutputFileName("fasta",variables);
393 string scrapQualFile = getOutputFileName("qfile",variables);
394 outputNames.push_back(scrapSeqFile); outputTypes["fasta"].push_back(scrapSeqFile);
396 if (qFileName != "") {
397 outputNames.push_back(trimQualFile);
398 outputNames.push_back(scrapQualFile);
399 outputTypes["qfile"].push_back(trimQualFile);
400 outputTypes["qfile"].push_back(scrapQualFile);
403 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(nameFile));
404 variables["[tag]"] = "trim";
405 string trimNameFile = getOutputFileName("name",variables);
406 variables["[tag]"] = "scrap";
407 string scrapNameFile = getOutputFileName("name",variables);
409 if (nameFile != "") {
410 m->readNames(nameFile, nameMap);
411 outputNames.push_back(trimNameFile);
412 outputNames.push_back(scrapNameFile);
413 outputTypes["name"].push_back(trimNameFile);
414 outputTypes["name"].push_back(scrapNameFile);
417 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(countfile));
418 variables["[tag]"] = "trim";
419 string trimCountFile = getOutputFileName("count",variables);
420 variables["[tag]"] = "scrap";
421 string scrapCountFile = getOutputFileName("count",variables);
423 if (countfile != "") {
425 ct.readTable(countfile);
426 nameCount = ct.getNameMap();
427 outputNames.push_back(trimCountFile);
428 outputNames.push_back(scrapCountFile);
429 outputTypes["count"].push_back(trimCountFile);
430 outputTypes["count"].push_back(scrapCountFile);
434 if (m->control_pressed) { return 0; }
436 string outputGroupFileName;
438 createGroup = getOligos(fastaFileNames, qualFileNames, nameFileNames);
439 if ((createGroup) && (countfile == "")){
440 map<string, string> myvariables;
441 myvariables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFile));
442 outputGroupFileName = getOutputFileName("group",myvariables);
443 outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName);
447 if (m->control_pressed) { return 0; }
449 //fills lines and qlines
450 setLines(fastaFile, qFileName);
453 driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, trimCountFile, scrapCountFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
455 createProcessesCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, trimCountFile, scrapCountFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames);
459 if (m->control_pressed) { return 0; }
462 map<string, string> uniqueFastaNames;// so we don't add the same groupfile multiple times
463 map<string, string>::iterator it;
464 set<string> namesToRemove;
465 for(int i=0;i<fastaFileNames.size();i++){
466 for(int j=0;j<fastaFileNames[0].size();j++){
467 if (fastaFileNames[i][j] != "") {
468 if (namesToRemove.count(fastaFileNames[i][j]) == 0) {
469 if(m->isBlank(fastaFileNames[i][j])){
470 m->mothurRemove(fastaFileNames[i][j]);
471 namesToRemove.insert(fastaFileNames[i][j]);
474 m->mothurRemove(qualFileNames[i][j]);
475 namesToRemove.insert(qualFileNames[i][j]);
479 m->mothurRemove(nameFileNames[i][j]);
480 namesToRemove.insert(nameFileNames[i][j]);
483 it = uniqueFastaNames.find(fastaFileNames[i][j]);
484 if (it == uniqueFastaNames.end()) {
485 uniqueFastaNames[fastaFileNames[i][j]] = barcodeNameVector[i];
493 //remove names for outputFileNames, just cleans up the output
494 vector<string> outputNames2;
495 for(int i = 0; i < outputNames.size(); i++) { if (namesToRemove.count(outputNames[i]) == 0) { outputNames2.push_back(outputNames[i]); } }
496 outputNames = outputNames2;
498 for (it = uniqueFastaNames.begin(); it != uniqueFastaNames.end(); it++) {
500 m->openInputFile(it->first, in);
503 map<string, string> myvariables;
504 myvariables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(it->first));
505 string thisGroupName = "";
506 if (countfile == "") { thisGroupName = getOutputFileName("group",myvariables); outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName); }
507 else { thisGroupName = getOutputFileName("count",myvariables); outputNames.push_back(thisGroupName); outputTypes["count"].push_back(thisGroupName); }
508 m->openOutputFile(thisGroupName, out);
510 if (countfile != "") { out << "Representative_Sequence\ttotal\t" << it->second << endl; }
513 if (m->control_pressed) { break; }
515 Sequence currSeq(in); m->gobble(in);
516 if (countfile == "") {
517 out << currSeq.getName() << '\t' << it->second << endl;
519 if (nameFile != "") {
520 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
521 if (itName != nameMap.end()) {
522 vector<string> thisSeqsNames;
523 m->splitAtChar(itName->second, thisSeqsNames, ',');
524 for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self
525 out << thisSeqsNames[k] << '\t' << it->second << endl;
527 }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
530 map<string, int>::iterator itTotalReps = nameCount.find(currSeq.getName());
531 if (itTotalReps != nameCount.end()) { out << currSeq.getName() << '\t' << itTotalReps->second << '\t' << itTotalReps->second << endl; }
532 else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your count file, please correct."); m->mothurOutEndLine(); }
539 if (countfile != "") { //create countfile with group info included
540 CountTable* ct = new CountTable();
541 ct->readTable(trimCountFile);
542 map<string, int> justTrimmedNames = ct->getNameMap();
546 for (map<string, int>::iterator itCount = groupCounts.begin(); itCount != groupCounts.end(); itCount++) { newCt.addGroup(itCount->first); }
547 vector<int> tempCounts; tempCounts.resize(groupCounts.size(), 0);
548 for (map<string, int>::iterator itNames = justTrimmedNames.begin(); itNames != justTrimmedNames.end(); itNames++) {
549 newCt.push_back(itNames->first, tempCounts); //add it to the table with no abundance so we can set the groups abundance
550 map<string, string>::iterator it2 = groupMap.find(itNames->first);
551 if (it2 != groupMap.end()) { newCt.setAbund(itNames->first, it2->second, itNames->second); }
552 else { m->mothurOut("[ERROR]: missing group info for " + itNames->first + "."); m->mothurOutEndLine(); m->control_pressed = true; }
554 newCt.printTable(trimCountFile);
558 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
560 //output group counts
561 m->mothurOutEndLine();
563 if (groupCounts.size() != 0) { m->mothurOut("Group count: \n"); }
564 for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
565 total += it->second; m->mothurOut(it->first + "\t" + toString(it->second)); m->mothurOutEndLine();
567 if (total != 0) { m->mothurOut("Total of all groups is " + toString(total)); m->mothurOutEndLine(); }
569 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
571 //set fasta file as new current fastafile
573 itTypes = outputTypes.find("fasta");
574 if (itTypes != outputTypes.end()) {
575 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
578 itTypes = outputTypes.find("name");
579 if (itTypes != outputTypes.end()) {
580 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
583 itTypes = outputTypes.find("qfile");
584 if (itTypes != outputTypes.end()) {
585 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); }
588 itTypes = outputTypes.find("group");
589 if (itTypes != outputTypes.end()) {
590 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
593 itTypes = outputTypes.find("count");
594 if (itTypes != outputTypes.end()) {
595 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
598 m->mothurOutEndLine();
599 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
600 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
601 m->mothurOutEndLine();
606 catch(exception& e) {
607 m->errorOut(e, "TrimSeqsCommand", "execute");
612 /**************************************************************************************/
613 int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string trimFileName, string scrapFileName, string trimQFileName, string scrapQFileName, string trimNFileName, string scrapNFileName, string trimCFileName, string scrapCFileName, string groupFileName, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames, linePair line, linePair qline) {
617 ofstream trimFASTAFile;
618 m->openOutputFile(trimFileName, trimFASTAFile);
620 ofstream scrapFASTAFile;
621 m->openOutputFile(scrapFileName, scrapFASTAFile);
623 ofstream trimQualFile;
624 ofstream scrapQualFile;
626 m->openOutputFile(trimQFileName, trimQualFile);
627 m->openOutputFile(scrapQFileName, scrapQualFile);
630 ofstream trimNameFile;
631 ofstream scrapNameFile;
633 m->openOutputFile(trimNFileName, trimNameFile);
634 m->openOutputFile(scrapNFileName, scrapNameFile);
637 ofstream trimCountFile;
638 ofstream scrapCountFile;
640 m->openOutputFile(trimCFileName, trimCountFile);
641 m->openOutputFile(scrapCFileName, scrapCountFile);
642 if (line.start == 0) { trimCountFile << "Representative_Sequence\ttotal" << endl; scrapCountFile << "Representative_Sequence\ttotal" << endl; }
645 ofstream outGroupsFile;
646 if ((createGroup) && (countfile == "")){ m->openOutputFile(groupFileName, outGroupsFile); }
648 for (int i = 0; i < fastaFileNames.size(); i++) { //clears old file
649 for (int j = 0; j < fastaFileNames[i].size(); j++) { //clears old file
650 if (fastaFileNames[i][j] != "") {
652 m->openOutputFile(fastaFileNames[i][j], temp); temp.close();
654 m->openOutputFile(qualFileNames[i][j], temp); temp.close();
658 m->openOutputFile(nameFileNames[i][j], temp); temp.close();
666 m->openInputFile(filename, inFASTA);
667 inFASTA.seekg(line.start);
670 if(qFileName != "") {
671 m->openInputFile(qFileName, qFile);
672 qFile.seekg(qline.start);
677 int numBarcodes = barcodes.size();
678 TrimOligos* trimOligos = NULL;
679 if (pairedOligos) { trimOligos = new TrimOligos(pdiffs, bdiffs, 0, 0, pairedPrimers, pairedBarcodes); numBarcodes = pairedBarcodes.size(); }
680 else { trimOligos = new TrimOligos(pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, revPrimer, linker, spacer); }
682 TrimOligos* rtrimOligos = NULL;
684 //create reoriented primer and barcode pairs
685 map<int, oligosPair> rpairedPrimers, rpairedBarcodes;
686 for (map<int, oligosPair>::iterator it = pairedPrimers.begin(); it != pairedPrimers.end(); it++) {
687 oligosPair tempPair(reverseOligo((it->second).reverse), (reverseOligo((it->second).forward))); //reversePrimer, rc ForwardPrimer
688 rpairedPrimers[it->first] = tempPair;
690 for (map<int, oligosPair>::iterator it = pairedBarcodes.begin(); it != pairedBarcodes.end(); it++) {
691 oligosPair tempPair(reverseOligo((it->second).reverse), (reverseOligo((it->second).forward))); //reverseBarcode, rc ForwardBarcode
692 rpairedBarcodes[it->first] = tempPair;
694 rtrimOligos = new TrimOligos(pdiffs, bdiffs, 0, 0, rpairedPrimers, rpairedBarcodes); numBarcodes = rpairedBarcodes.size();
699 if (m->control_pressed) {
700 delete trimOligos; if (reorient) { delete rtrimOligos; }
701 inFASTA.close(); trimFASTAFile.close(); scrapFASTAFile.close();
702 if ((createGroup) && (countfile == "")) { outGroupsFile.close(); }
703 if(qFileName != "") { qFile.close(); scrapQualFile.close(); trimQualFile.close(); }
704 if(nameFile != "") { scrapNameFile.close(); trimNameFile.close(); }
705 if(countfile != "") { scrapCountFile.close(); trimCountFile.close(); }
706 for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0;
710 string trashCode = "";
711 int currentSeqsDiffs = 0;
713 Sequence currSeq(inFASTA); m->gobble(inFASTA);
714 //cout << currSeq.getName() << '\t' << currSeq.getUnaligned().length() << endl;
716 QualityScores currQual;
718 currQual = QualityScores(qFile); m->gobble(qFile);
719 //cout << currQual.getName() << endl;
722 string origSeq = currSeq.getUnaligned();
725 int barcodeIndex = 0;
729 success = trimOligos->stripLinker(currSeq, currQual);
730 if(success > ldiffs) { trashCode += 'k'; }
731 else{ currentSeqsDiffs += success; }
735 if(numBarcodes != 0){
736 success = trimOligos->stripBarcode(currSeq, currQual, barcodeIndex);
737 if(success > bdiffs) { trashCode += 'b'; }
738 else{ currentSeqsDiffs += success; }
742 success = trimOligos->stripSpacer(currSeq, currQual);
743 if(success > sdiffs) { trashCode += 's'; }
744 else{ currentSeqsDiffs += success; }
748 if(numFPrimers != 0){
749 success = trimOligos->stripForward(currSeq, currQual, primerIndex, keepforward);
750 if(success > pdiffs) { trashCode += 'f'; }
751 else{ currentSeqsDiffs += success; }
754 if (currentSeqsDiffs > tdiffs) { trashCode += 't'; }
756 if(numRPrimers != 0){
757 success = trimOligos->stripReverse(currSeq, currQual);
758 if(!success) { trashCode += 'r'; }
761 if (reorient && (trashCode != "")) { //if you failed and want to check the reverse
763 string thisTrashCode = "";
764 int thisCurrentSeqsDiffs = 0;
766 int thisBarcodeIndex = 0;
767 int thisPrimerIndex = 0;
769 if(numBarcodes != 0){
770 thisSuccess = rtrimOligos->stripBarcode(currSeq, currQual, thisBarcodeIndex);
771 if(thisSuccess > bdiffs) { thisTrashCode += 'b'; }
772 else{ thisCurrentSeqsDiffs += thisSuccess; }
775 if(numFPrimers != 0){
776 thisSuccess = rtrimOligos->stripForward(currSeq, currQual, thisPrimerIndex, keepforward);
777 if(thisSuccess > pdiffs) { thisTrashCode += 'f'; }
778 else{ thisCurrentSeqsDiffs += thisSuccess; }
781 if (thisCurrentSeqsDiffs > tdiffs) { thisTrashCode += 't'; }
783 if (thisTrashCode == "") {
784 trashCode = thisTrashCode;
785 success = thisSuccess;
786 currentSeqsDiffs = thisCurrentSeqsDiffs;
787 barcodeIndex = thisBarcodeIndex;
788 primerIndex = thisPrimerIndex;
789 currSeq.reverseComplement();
791 currQual.flipQScores();
797 success = keepFirstTrim(currSeq, currQual);
801 success = removeLastTrim(currSeq, currQual);
802 if(!success) { trashCode += 'l'; }
807 int origLength = currSeq.getNumBases();
809 if(qThreshold != 0) { success = currQual.stripQualThreshold(currSeq, qThreshold); }
810 else if(qAverage != 0) { success = currQual.cullQualAverage(currSeq, qAverage); }
811 else if(qRollAverage != 0) { success = currQual.stripQualRollingAverage(currSeq, qRollAverage); }
812 else if(qWindowAverage != 0){ success = currQual.stripQualWindowAverage(currSeq, qWindowStep, qWindowSize, qWindowAverage); }
813 else { success = 1; }
815 //you don't want to trim, if it fails above then scrap it
816 if ((!qtrim) && (origLength != currSeq.getNumBases())) { success = 0; }
818 if(!success) { trashCode += 'q'; }
821 if(minLength > 0 || maxLength > 0){
822 success = cullLength(currSeq);
823 if(!success) { trashCode += 'l'; }
826 success = cullHomoP(currSeq);
827 if(!success) { trashCode += 'h'; }
830 success = cullAmbigs(currSeq);
831 if(!success) { trashCode += 'n'; }
834 if(flip){ // should go last
835 currSeq.reverseComplement();
837 currQual.flipQScores();
841 if (m->debug) { m->mothurOut("[DEBUG]: " + currSeq.getName() + ", trashcode= " + trashCode); if (trashCode.length() != 0) { m->mothurOutEndLine(); } }
843 if(trashCode.length() == 0){
844 string thisGroup = "";
846 if(numBarcodes != 0){
847 thisGroup = barcodeNameVector[barcodeIndex];
848 if (numFPrimers != 0) {
849 if (primerNameVector[primerIndex] != "") {
850 if(thisGroup != "") {
851 thisGroup += "." + primerNameVector[primerIndex];
853 thisGroup = primerNameVector[primerIndex];
860 int pos = thisGroup.find("ignore");
861 if (pos == string::npos) {
862 currSeq.setAligned(currSeq.getUnaligned());
863 currSeq.printSequence(trimFASTAFile);
866 currQual.printQScores(trimQualFile);
871 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
872 if (itName != nameMap.end()) { trimNameFile << itName->first << '\t' << itName->second << endl; }
873 else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
876 int numRedundants = 0;
877 if (countfile != "") {
878 map<string, int>::iterator itCount = nameCount.find(currSeq.getName());
879 if (itCount != nameCount.end()) {
880 trimCountFile << itCount->first << '\t' << itCount->second << endl;
881 numRedundants = itCount->second-1;
882 }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your count file, please correct."); m->mothurOutEndLine(); }
886 if(numBarcodes != 0){
888 if (m->debug) { m->mothurOut(", group= " + thisGroup + "\n"); }
890 if (countfile == "") { outGroupsFile << currSeq.getName() << '\t' << thisGroup << endl; }
891 else { groupMap[currSeq.getName()] = thisGroup; }
893 if (nameFile != "") {
894 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
895 if (itName != nameMap.end()) {
896 vector<string> thisSeqsNames;
897 m->splitAtChar(itName->second, thisSeqsNames, ',');
898 numRedundants = thisSeqsNames.size()-1; //we already include ourselves below
899 for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self
900 outGroupsFile << thisSeqsNames[k] << '\t' << thisGroup << endl;
902 }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
905 map<string, int>::iterator it = groupCounts.find(thisGroup);
906 if (it == groupCounts.end()) { groupCounts[thisGroup] = 1 + numRedundants; }
907 else { groupCounts[it->first] += (1 + numRedundants); }
914 m->openOutputFileAppend(fastaFileNames[barcodeIndex][primerIndex], output);
915 currSeq.printSequence(output);
919 m->openOutputFileAppend(qualFileNames[barcodeIndex][primerIndex], output);
920 currQual.printQScores(output);
925 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
926 if (itName != nameMap.end()) {
927 m->openOutputFileAppend(nameFileNames[barcodeIndex][primerIndex], output);
928 output << itName->first << '\t' << itName->second << endl;
930 }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
936 if(nameFile != ""){ //needs to be before the currSeq name is changed
937 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
938 if (itName != nameMap.end()) { scrapNameFile << itName->first << '\t' << itName->second << endl; }
939 else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
941 if (countfile != "") {
942 map<string, int>::iterator itCount = nameCount.find(currSeq.getName());
943 if (itCount != nameCount.end()) {
944 trimCountFile << itCount->first << '\t' << itCount->second << endl;
945 }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your count file, please correct."); m->mothurOutEndLine(); }
948 currSeq.setName(currSeq.getName() + '|' + trashCode);
949 currSeq.setUnaligned(origSeq);
950 currSeq.setAligned(origSeq);
951 currSeq.printSequence(scrapFASTAFile);
953 currQual.printQScores(scrapQualFile);
959 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
960 unsigned long long pos = inFASTA.tellg();
961 if ((pos == -1) || (pos >= line.end)) { break; }
964 if (inFASTA.eof()) { break; }
968 if((count) % 1000 == 0){ m->mothurOut(toString(count)); m->mothurOutEndLine(); }
972 if((count) % 1000 != 0){ m->mothurOut(toString(count)); m->mothurOutEndLine(); }
975 if (reorient) { delete rtrimOligos; }
977 trimFASTAFile.close();
978 scrapFASTAFile.close();
979 if (createGroup) { outGroupsFile.close(); }
980 if(qFileName != "") { qFile.close(); scrapQualFile.close(); trimQualFile.close(); }
981 if(nameFile != "") { scrapNameFile.close(); trimNameFile.close(); }
982 if(countfile != "") { scrapCountFile.close(); trimCountFile.close(); }
986 catch(exception& e) {
987 m->errorOut(e, "TrimSeqsCommand", "driverCreateTrim");
992 /**************************************************************************************************/
994 int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName, string trimFASTAFileName, string scrapFASTAFileName, string trimQualFileName, string scrapQualFileName, string trimNameFileName, string scrapNameFileName, string trimCountFileName, string scrapCountFileName, string groupFile, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames) {
1001 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1002 //loop through and create all the processes you want
1003 while (process != processors) {
1007 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
1009 }else if (pid == 0){
1011 vector<vector<string> > tempFASTAFileNames = fastaFileNames;
1012 vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
1013 vector<vector<string> > tempNameFileNames = nameFileNames;
1018 for(int i=0;i<tempFASTAFileNames.size();i++){
1019 for(int j=0;j<tempFASTAFileNames[i].size();j++){
1020 if (tempFASTAFileNames[i][j] != "") {
1021 tempFASTAFileNames[i][j] += toString(getpid()) + ".temp";
1022 m->openOutputFile(tempFASTAFileNames[i][j], temp); temp.close();
1024 if(qFileName != ""){
1025 tempPrimerQualFileNames[i][j] += toString(getpid()) + ".temp";
1026 m->openOutputFile(tempPrimerQualFileNames[i][j], temp); temp.close();
1029 tempNameFileNames[i][j] += toString(getpid()) + ".temp";
1030 m->openOutputFile(tempNameFileNames[i][j], temp); temp.close();
1037 driverCreateTrim(filename,
1039 (trimFASTAFileName + toString(getpid()) + ".temp"),
1040 (scrapFASTAFileName + toString(getpid()) + ".temp"),
1041 (trimQualFileName + toString(getpid()) + ".temp"),
1042 (scrapQualFileName + toString(getpid()) + ".temp"),
1043 (trimNameFileName + toString(getpid()) + ".temp"),
1044 (scrapNameFileName + toString(getpid()) + ".temp"),
1045 (trimCountFileName + toString(getpid()) + ".temp"),
1046 (scrapCountFileName + toString(getpid()) + ".temp"),
1047 (groupFile + toString(getpid()) + ".temp"),
1049 tempPrimerQualFileNames,
1054 if (m->debug) { m->mothurOut("[DEBUG]: " + toString(lines[process].start) + '\t' + toString(qLines[process].start) + '\t' + toString(getpid()) + '\n'); }
1056 //pass groupCounts to parent
1059 string tempFile = filename + toString(getpid()) + ".num.temp";
1060 m->openOutputFile(tempFile, out);
1062 out << groupCounts.size() << endl;
1064 for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
1065 out << it->first << '\t' << it->second << endl;
1068 out << groupMap.size() << endl;
1069 for (map<string, string>::iterator it = groupMap.begin(); it != groupMap.end(); it++) {
1070 out << it->first << '\t' << it->second << endl;
1076 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
1077 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
1084 m->openOutputFile(trimFASTAFileName, temp); temp.close();
1085 m->openOutputFile(scrapFASTAFileName, temp); temp.close();
1086 if(qFileName != ""){
1087 m->openOutputFile(trimQualFileName, temp); temp.close();
1088 m->openOutputFile(scrapQualFileName, temp); temp.close();
1090 if (nameFile != "") {
1091 m->openOutputFile(trimNameFileName, temp); temp.close();
1092 m->openOutputFile(scrapNameFileName, temp); temp.close();
1094 if (countfile != "") {
1095 m->openOutputFile(trimCountFileName, temp); temp.close();
1096 m->openOutputFile(scrapCountFileName, temp); temp.close();
1099 driverCreateTrim(filename, qFileName, trimFASTAFileName, scrapFASTAFileName, trimQualFileName, scrapQualFileName, trimNameFileName, scrapNameFileName, trimCountFileName, scrapCountFileName, groupFile, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
1101 //force parent to wait until all the processes are done
1102 for (int i=0;i<processIDS.size();i++) {
1103 int temp = processIDS[i];
1107 //////////////////////////////////////////////////////////////////////////////////////////////////////
1108 //Windows version shared memory, so be careful when passing variables through the trimData struct.
1109 //Above fork() will clone, so memory is separate, but that's not the case with windows,
1110 //////////////////////////////////////////////////////////////////////////////////////////////////////
1112 vector<trimData*> pDataArray;
1113 DWORD dwThreadIdArray[processors-1];
1114 HANDLE hThreadArray[processors-1];
1116 //Create processor worker threads.
1117 for( int h=0; h<processors-1; h++){
1119 string extension = "";
1120 if (h != 0) { extension = toString(h) + ".temp"; processIDS.push_back(h); }
1121 vector<vector<string> > tempFASTAFileNames = fastaFileNames;
1122 vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
1123 vector<vector<string> > tempNameFileNames = nameFileNames;
1128 for(int i=0;i<tempFASTAFileNames.size();i++){
1129 for(int j=0;j<tempFASTAFileNames[i].size();j++){
1130 if (tempFASTAFileNames[i][j] != "") {
1131 tempFASTAFileNames[i][j] += extension;
1132 m->openOutputFile(tempFASTAFileNames[i][j], temp); temp.close();
1134 if(qFileName != ""){
1135 tempPrimerQualFileNames[i][j] += extension;
1136 m->openOutputFile(tempPrimerQualFileNames[i][j], temp); temp.close();
1139 tempNameFileNames[i][j] += extension;
1140 m->openOutputFile(tempNameFileNames[i][j], temp); temp.close();
1148 trimData* tempTrim = new trimData(filename,
1149 qFileName, nameFile, countfile,
1150 (trimFASTAFileName+extension),
1151 (scrapFASTAFileName+extension),
1152 (trimQualFileName+extension),
1153 (scrapQualFileName+extension),
1154 (trimNameFileName+extension),
1155 (scrapNameFileName+extension),
1156 (trimCountFileName+extension),
1157 (scrapCountFileName+extension),
1158 (groupFile+extension),
1160 tempPrimerQualFileNames,
1162 lines[h].start, lines[h].end, qLines[h].start, qLines[h].end, m,
1163 pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, primers, barcodes, revPrimer, linker, spacer, pairedBarcodes, pairedPrimers, pairedOligos,
1164 primerNameVector, barcodeNameVector, createGroup, allFiles, keepforward, keepFirst, removeLast,
1165 qWindowStep, qWindowSize, qWindowAverage, qtrim, qThreshold, qAverage, qRollAverage,
1166 minLength, maxAmbig, maxHomoP, maxLength, flip, reorient, nameMap, nameCount);
1167 pDataArray.push_back(tempTrim);
1169 hThreadArray[h] = CreateThread(NULL, 0, MyTrimThreadFunction, pDataArray[h], 0, &dwThreadIdArray[h]);
1174 m->openOutputFile(trimFASTAFileName, temp); temp.close();
1175 m->openOutputFile(scrapFASTAFileName, temp); temp.close();
1176 if(qFileName != ""){
1177 m->openOutputFile(trimQualFileName, temp); temp.close();
1178 m->openOutputFile(scrapQualFileName, temp); temp.close();
1180 if (nameFile != "") {
1181 m->openOutputFile(trimNameFileName, temp); temp.close();
1182 m->openOutputFile(scrapNameFileName, temp); temp.close();
1184 vector<vector<string> > tempFASTAFileNames = fastaFileNames;
1185 vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
1186 vector<vector<string> > tempNameFileNames = nameFileNames;
1189 string extension = toString(processors-1) + ".temp";
1190 for(int i=0;i<tempFASTAFileNames.size();i++){
1191 for(int j=0;j<tempFASTAFileNames[i].size();j++){
1192 if (tempFASTAFileNames[i][j] != "") {
1193 tempFASTAFileNames[i][j] += extension;
1194 m->openOutputFile(tempFASTAFileNames[i][j], temp); temp.close();
1196 if(qFileName != ""){
1197 tempPrimerQualFileNames[i][j] += extension;
1198 m->openOutputFile(tempPrimerQualFileNames[i][j], temp); temp.close();
1201 tempNameFileNames[i][j] += extension;
1202 m->openOutputFile(tempNameFileNames[i][j], temp); temp.close();
1209 driverCreateTrim(filename, qFileName, (trimFASTAFileName + toString(processors-1) + ".temp"), (scrapFASTAFileName + toString(processors-1) + ".temp"), (trimQualFileName + toString(processors-1) + ".temp"), (scrapQualFileName + toString(processors-1) + ".temp"), (trimNameFileName + toString(processors-1) + ".temp"), (scrapNameFileName + toString(processors-1) + ".temp"), (trimCountFileName + toString(processors-1) + ".temp"), (scrapCountFileName + toString(processors-1) + ".temp"), (groupFile + toString(processors-1) + ".temp"), tempFASTAFileNames, tempPrimerQualFileNames, tempNameFileNames, lines[processors-1], qLines[processors-1]);
1210 processIDS.push_back(processors-1);
1213 //Wait until all threads have terminated.
1214 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
1216 //Close all thread handles and free memory allocations.
1217 for(int i=0; i < pDataArray.size(); i++){
1218 if (pDataArray[i]->count != pDataArray[i]->lineEnd) {
1219 m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of " + toString(pDataArray[i]->lineEnd) + " sequences assigned to it, quitting. \n"); m->control_pressed = true;
1221 for (map<string, int>::iterator it = pDataArray[i]->groupCounts.begin(); it != pDataArray[i]->groupCounts.end(); it++) {
1222 map<string, int>::iterator it2 = groupCounts.find(it->first);
1223 if (it2 == groupCounts.end()) { groupCounts[it->first] = it->second; }
1224 else { groupCounts[it->first] += it->second; }
1226 for (map<string, string>::iterator it = pDataArray[i]->groupMap.begin(); it != pDataArray[i]->groupMap.end(); it++) {
1227 map<string, string>::iterator it2 = groupMap.find(it->first);
1228 if (it2 == groupMap.end()) { groupMap[it->first] = it->second; }
1229 else { m->mothurOut("[ERROR]: " + it->first + " is in your fasta file more than once. Sequence names must be unique. please correct.\n"); }
1231 CloseHandle(hThreadArray[i]);
1232 delete pDataArray[i];
1239 for(int i=0;i<processIDS.size();i++){
1241 m->mothurOut("Appending files from process " + toString(processIDS[i])); m->mothurOutEndLine();
1243 m->appendFiles((trimFASTAFileName + toString(processIDS[i]) + ".temp"), trimFASTAFileName);
1244 m->mothurRemove((trimFASTAFileName + toString(processIDS[i]) + ".temp"));
1245 m->appendFiles((scrapFASTAFileName + toString(processIDS[i]) + ".temp"), scrapFASTAFileName);
1246 m->mothurRemove((scrapFASTAFileName + toString(processIDS[i]) + ".temp"));
1248 if(qFileName != ""){
1249 m->appendFiles((trimQualFileName + toString(processIDS[i]) + ".temp"), trimQualFileName);
1250 m->mothurRemove((trimQualFileName + toString(processIDS[i]) + ".temp"));
1251 m->appendFiles((scrapQualFileName + toString(processIDS[i]) + ".temp"), scrapQualFileName);
1252 m->mothurRemove((scrapQualFileName + toString(processIDS[i]) + ".temp"));
1256 m->appendFiles((trimNameFileName + toString(processIDS[i]) + ".temp"), trimNameFileName);
1257 m->mothurRemove((trimNameFileName + toString(processIDS[i]) + ".temp"));
1258 m->appendFiles((scrapNameFileName + toString(processIDS[i]) + ".temp"), scrapNameFileName);
1259 m->mothurRemove((scrapNameFileName + toString(processIDS[i]) + ".temp"));
1262 if(countfile != ""){
1263 m->appendFiles((trimCountFileName + toString(processIDS[i]) + ".temp"), trimCountFileName);
1264 m->mothurRemove((trimCountFileName + toString(processIDS[i]) + ".temp"));
1265 m->appendFiles((scrapCountFileName + toString(processIDS[i]) + ".temp"), scrapCountFileName);
1266 m->mothurRemove((scrapCountFileName + toString(processIDS[i]) + ".temp"));
1269 if((createGroup)&&(countfile == "")){
1270 m->appendFiles((groupFile + toString(processIDS[i]) + ".temp"), groupFile);
1271 m->mothurRemove((groupFile + toString(processIDS[i]) + ".temp"));
1276 for(int j=0;j<fastaFileNames.size();j++){
1277 for(int k=0;k<fastaFileNames[j].size();k++){
1278 if (fastaFileNames[j][k] != "") {
1279 m->appendFiles((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"), fastaFileNames[j][k]);
1280 m->mothurRemove((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"));
1282 if(qFileName != ""){
1283 m->appendFiles((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"), qualFileNames[j][k]);
1284 m->mothurRemove((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"));
1288 m->appendFiles((nameFileNames[j][k] + toString(processIDS[i]) + ".temp"), nameFileNames[j][k]);
1289 m->mothurRemove((nameFileNames[j][k] + toString(processIDS[i]) + ".temp"));
1296 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1299 string tempFile = filename + toString(processIDS[i]) + ".num.temp";
1300 m->openInputFile(tempFile, in);
1304 in >> tempNum; m->gobble(in);
1307 for (int i = 0; i < tempNum; i++) {
1309 in >> group >> groupNum; m->gobble(in);
1311 map<string, int>::iterator it = groupCounts.find(group);
1312 if (it == groupCounts.end()) { groupCounts[group] = groupNum; }
1313 else { groupCounts[it->first] += groupNum; }
1316 in >> tempNum; m->gobble(in);
1318 for (int i = 0; i < tempNum; i++) {
1319 string group, seqName;
1320 in >> seqName >> group; m->gobble(in);
1322 map<string, string>::iterator it = groupMap.find(seqName);
1323 if (it == groupMap.end()) { groupMap[seqName] = group; }
1324 else { m->mothurOut("[ERROR]: " + seqName + " is in your fasta file more than once. Sequence names must be unique. please correct.\n"); }
1328 in.close(); m->mothurRemove(tempFile);
1335 catch(exception& e) {
1336 m->errorOut(e, "TrimSeqsCommand", "createProcessesCreateTrim");
1341 /**************************************************************************************************/
1343 int TrimSeqsCommand::setLines(string filename, string qfilename) {
1346 vector<unsigned long long> fastaFilePos;
1347 vector<unsigned long long> qfileFilePos;
1349 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1350 //set file positions for fasta file
1351 fastaFilePos = m->divideFile(filename, processors);
1353 //get name of first sequence in each chunk
1354 map<string, int> firstSeqNames;
1355 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
1357 m->openInputFile(filename, in);
1358 in.seekg(fastaFilePos[i]);
1361 firstSeqNames[temp.getName()] = i;
1366 if(qfilename != "") {
1367 //seach for filePos of each first name in the qfile and save in qfileFilePos
1369 m->openInputFile(qfilename, inQual);
1372 while(!inQual.eof()){
1373 input = m->getline(inQual);
1375 if (input.length() != 0) {
1376 if(input[0] == '>'){ //this is a sequence name line
1377 istringstream nameStream(input);
1379 string sname = ""; nameStream >> sname;
1380 sname = sname.substr(1);
1382 for (int i = 0; i < sname.length(); i++) {
1383 if (sname[i] == ':') { sname[i] = '_'; m->changedSeqNames = true; }
1386 map<string, int>::iterator it = firstSeqNames.find(sname);
1388 if(it != firstSeqNames.end()) { //this is the start of a new chunk
1389 unsigned long long pos = inQual.tellg();
1390 qfileFilePos.push_back(pos - input.length() - 1);
1391 firstSeqNames.erase(it);
1396 if (firstSeqNames.size() == 0) { break; }
1401 if (firstSeqNames.size() != 0) {
1402 for (map<string, int>::iterator it = firstSeqNames.begin(); it != firstSeqNames.end(); it++) {
1403 m->mothurOut(it->first + " is in your fasta file and not in your quality file, not using quality file."); m->mothurOutEndLine();
1409 //get last file position of qfile
1411 unsigned long long size;
1413 //get num bytes in file
1414 pFile = fopen (qfilename.c_str(),"rb");
1415 if (pFile==NULL) perror ("Error opening file");
1417 fseek (pFile, 0, SEEK_END);
1422 qfileFilePos.push_back(size);
1425 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
1426 if (m->debug) { m->mothurOut("[DEBUG]: " + toString(i) +'\t' + toString(fastaFilePos[i]) + '\t' + toString(fastaFilePos[i+1]) + '\n'); }
1427 lines.push_back(linePair(fastaFilePos[i], fastaFilePos[(i+1)]));
1428 if (qfilename != "") { qLines.push_back(linePair(qfileFilePos[i], qfileFilePos[(i+1)])); }
1430 if(qfilename == "") { qLines = lines; } //files with duds
1436 if (processors == 1) { //save time
1437 //fastaFilePos.push_back(0); qfileFilePos.push_back(0);
1438 //fastaFilePos.push_back(1000); qfileFilePos.push_back(1000);
1439 lines.push_back(linePair(0, 1000));
1440 if (qfilename != "") { qLines.push_back(linePair(0, 1000)); }
1442 int numFastaSeqs = 0;
1443 fastaFilePos = m->setFilePosFasta(filename, numFastaSeqs);
1444 if (fastaFilePos.size() < processors) { processors = fastaFilePos.size(); }
1446 if (qfilename != "") {
1447 int numQualSeqs = 0;
1448 qfileFilePos = m->setFilePosFasta(qfilename, numQualSeqs);
1450 if (numFastaSeqs != numQualSeqs) {
1451 m->mothurOut("[ERROR]: You have " + toString(numFastaSeqs) + " sequences in your fasta file, but " + toString(numQualSeqs) + " sequences in your quality file."); m->mothurOutEndLine(); m->control_pressed = true;
1455 //figure out how many sequences you have to process
1456 int numSeqsPerProcessor = numFastaSeqs / processors;
1457 for (int i = 0; i < processors; i++) {
1458 int startIndex = i * numSeqsPerProcessor;
1459 if(i == (processors - 1)){ numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor; }
1460 lines.push_back(linePair(fastaFilePos[startIndex], numSeqsPerProcessor));
1461 if (qfilename != "") { qLines.push_back(linePair(qfileFilePos[startIndex], numSeqsPerProcessor)); }
1464 if(qfilename == "") { qLines = lines; } //files with duds
1469 catch(exception& e) {
1470 m->errorOut(e, "TrimSeqsCommand", "setLines");
1475 //***************************************************************************************************************
1477 bool TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<vector<string> >& qualFileNames, vector<vector<string> >& nameFileNames){
1480 m->openInputFile(oligoFile, inOligos);
1484 string type, oligo, roligo, group;
1485 bool hasPrimer = false; bool hasPairedBarcodes = false;
1487 int indexPrimer = 0;
1488 int indexBarcode = 0;
1489 int indexPairedPrimer = 0;
1490 int indexPairedBarcode = 0;
1491 set<string> uniquePrimers;
1492 set<string> uniqueBarcodes;
1494 while(!inOligos.eof()){
1498 if (m->debug) { m->mothurOut("[DEBUG]: reading type - " + type + ".\n"); }
1501 while (!inOligos.eof()) { char c = inOligos.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there
1502 m->gobble(inOligos);
1505 m->gobble(inOligos);
1506 //make type case insensitive
1507 for(int i=0;i<type.length();i++){ type[i] = toupper(type[i]); }
1511 if (m->debug) { m->mothurOut("[DEBUG]: reading - " + oligo + ".\n"); }
1513 for(int i=0;i<oligo.length();i++){
1514 oligo[i] = toupper(oligo[i]);
1515 if(oligo[i] == 'U') { oligo[i] = 'T'; }
1518 if(type == "FORWARD"){
1521 // get rest of line in case there is a primer name
1522 while (!inOligos.eof()) {
1523 char c = inOligos.get();
1524 if (c == 10 || c == 13){ break; }
1525 else if (c == 32 || c == 9){;} //space or tab
1526 else { group += c; }
1529 //check for repeat barcodes
1530 map<string, int>::iterator itPrime = primers.find(oligo);
1531 if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); }
1533 if (m->debug) { if (group != "") { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); }else{ m->mothurOut("[DEBUG]: no group for primer " + oligo + ".\n"); } }
1535 primers[oligo]=indexPrimer; indexPrimer++;
1536 primerNameVector.push_back(group);
1538 else if (type == "PRIMER"){
1539 m->gobble(inOligos);
1543 for(int i=0;i<roligo.length();i++){
1544 roligo[i] = toupper(roligo[i]);
1545 if(roligo[i] == 'U') { roligo[i] = 'T'; }
1547 roligo = reverseOligo(roligo);
1551 // get rest of line in case there is a primer name
1552 while (!inOligos.eof()) {
1553 char c = inOligos.get();
1554 if (c == 10 || c == 13){ break; }
1555 else if (c == 32 || c == 9){;} //space or tab
1556 else { group += c; }
1559 oligosPair newPrimer(oligo, roligo);
1561 if (m->debug) { m->mothurOut("[DEBUG]: primer pair " + newPrimer.forward + " " + newPrimer.reverse + ", and group = " + group + ".\n"); }
1563 //check for repeat barcodes
1564 string tempPair = oligo+roligo;
1565 if (uniquePrimers.count(tempPair) != 0) { m->mothurOut("primer pair " + newPrimer.forward + " " + newPrimer.reverse + " is in your oligos file already."); m->mothurOutEndLine(); }
1566 else { uniquePrimers.insert(tempPair); }
1568 if (m->debug) { if (group != "") { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); }else{ m->mothurOut("[DEBUG]: no group for primer pair " + newPrimer.forward + " " + newPrimer.reverse + ".\n"); } }
1570 pairedPrimers[indexPairedPrimer]=newPrimer; indexPairedPrimer++;
1571 primerNameVector.push_back(group);
1574 else if(type == "REVERSE"){
1575 //Sequence oligoRC("reverse", oligo);
1576 //oligoRC.reverseComplement();
1577 string oligoRC = reverseOligo(oligo);
1578 revPrimer.push_back(oligoRC);
1580 else if(type == "BARCODE"){
1583 //barcode lines can look like BARCODE atgcatgc groupName - for 454 seqs
1584 //or BARCODE atgcatgc atgcatgc groupName - for illumina data that has forward and reverse info
1587 while (!inOligos.eof()) {
1588 char c = inOligos.get();
1589 if (c == 10 || c == 13){ break; }
1590 else if (c == 32 || c == 9){;} //space or tab
1594 //then this is illumina data with 4 columns
1596 hasPairedBarcodes = true;
1597 string reverseBarcode = group; //reverseOligo(group); //reverse barcode
1600 for(int i=0;i<reverseBarcode.length();i++){
1601 reverseBarcode[i] = toupper(reverseBarcode[i]);
1602 if(reverseBarcode[i] == 'U') { reverseBarcode[i] = 'T'; }
1605 reverseBarcode = reverseOligo(reverseBarcode);
1606 oligosPair newPair(oligo, reverseBarcode);
1608 if (m->debug) { m->mothurOut("[DEBUG]: barcode pair " + newPair.forward + " " + newPair.reverse + ", and group = " + group + ".\n"); }
1610 //check for repeat barcodes
1611 string tempPair = oligo+reverseBarcode;
1612 if (uniqueBarcodes.count(tempPair) != 0) { m->mothurOut("barcode pair " + newPair.forward + " " + newPair.reverse + " is in your oligos file already, disregarding."); m->mothurOutEndLine(); }
1613 else { uniqueBarcodes.insert(tempPair); }
1615 pairedBarcodes[indexPairedBarcode]=newPair; indexPairedBarcode++;
1616 barcodeNameVector.push_back(group);
1618 //check for repeat barcodes
1619 map<string, int>::iterator itBar = barcodes.find(oligo);
1620 if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); }
1622 barcodes[oligo]=indexBarcode; indexBarcode++;
1623 barcodeNameVector.push_back(group);
1625 }else if(type == "LINKER"){
1626 linker.push_back(oligo);
1627 }else if(type == "SPACER"){
1628 spacer.push_back(oligo);
1630 else{ m->mothurOut("[WARNING]: " + type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); }
1632 m->gobble(inOligos);
1636 if (hasPairedBarcodes || hasPrimer) {
1637 pairedOligos = true;
1638 if ((primers.size() != 0) || (barcodes.size() != 0) || (linker.size() != 0) || (spacer.size() != 0) || (revPrimer.size() != 0)) { m->control_pressed = true; m->mothurOut("[ERROR]: cannot mix paired primers and barcodes with non paired or linkers and spacers, quitting."); m->mothurOutEndLine(); return 0; }
1641 if(barcodeNameVector.size() == 0 && primerNameVector[0] == ""){ allFiles = 0; }
1643 //add in potential combos
1644 if(barcodeNameVector.size() == 0){
1646 barcodeNameVector.push_back("");
1649 if(primerNameVector.size() == 0){
1651 primerNameVector.push_back("");
1654 fastaFileNames.resize(barcodeNameVector.size());
1655 for(int i=0;i<fastaFileNames.size();i++){
1656 fastaFileNames[i].assign(primerNameVector.size(), "");
1658 if(qFileName != "") { qualFileNames = fastaFileNames; }
1659 if(nameFile != "") { nameFileNames = fastaFileNames; }
1662 set<string> uniqueNames; //used to cleanup outputFileNames
1664 for(map<int, oligosPair>::iterator itBar = pairedBarcodes.begin();itBar != pairedBarcodes.end();itBar++){
1665 for(map<int, oligosPair>::iterator itPrimer = pairedPrimers.begin();itPrimer != pairedPrimers.end(); itPrimer++){
1667 string primerName = primerNameVector[itPrimer->first];
1668 string barcodeName = barcodeNameVector[itBar->first];
1670 if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing
1672 string comboGroupName = "";
1673 string fastaFileName = "";
1674 string qualFileName = "";
1675 string nameFileName = "";
1676 string countFileName = "";
1678 if(primerName == ""){
1679 comboGroupName = barcodeNameVector[itBar->first];
1682 if(barcodeName == ""){
1683 comboGroupName = primerNameVector[itPrimer->first];
1686 comboGroupName = barcodeNameVector[itBar->first] + "." + primerNameVector[itPrimer->first];
1692 map<string, string> variables;
1693 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFile));
1694 variables["[tag]"] = comboGroupName;
1695 fastaFileName = getOutputFileName("fasta", variables);
1696 if (uniqueNames.count(fastaFileName) == 0) {
1697 outputNames.push_back(fastaFileName);
1698 outputTypes["fasta"].push_back(fastaFileName);
1699 uniqueNames.insert(fastaFileName);
1702 fastaFileNames[itBar->first][itPrimer->first] = fastaFileName;
1703 m->openOutputFile(fastaFileName, temp); temp.close();
1705 if(qFileName != ""){
1706 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(qFileName));
1707 qualFileName = getOutputFileName("qfile", variables);
1708 if (uniqueNames.count(qualFileName) == 0) {
1709 outputNames.push_back(qualFileName);
1710 outputTypes["qfile"].push_back(qualFileName);
1713 qualFileNames[itBar->first][itPrimer->first] = qualFileName;
1714 m->openOutputFile(qualFileName, temp); temp.close();
1718 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(nameFile));
1719 nameFileName = getOutputFileName("name", variables);
1720 if (uniqueNames.count(nameFileName) == 0) {
1721 outputNames.push_back(nameFileName);
1722 outputTypes["name"].push_back(nameFileName);
1725 nameFileNames[itBar->first][itPrimer->first] = nameFileName;
1726 m->openOutputFile(nameFileName, temp); temp.close();
1732 for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
1733 for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
1735 string primerName = primerNameVector[itPrimer->second];
1736 string barcodeName = barcodeNameVector[itBar->second];
1738 if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing
1740 string comboGroupName = "";
1741 string fastaFileName = "";
1742 string qualFileName = "";
1743 string nameFileName = "";
1744 string countFileName = "";
1746 if(primerName == ""){
1747 comboGroupName = barcodeNameVector[itBar->second];
1750 if(barcodeName == ""){
1751 comboGroupName = primerNameVector[itPrimer->second];
1754 comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second];
1760 map<string, string> variables;
1761 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFile));
1762 variables["[tag]"] = comboGroupName;
1763 fastaFileName = getOutputFileName("fasta", variables);
1764 if (uniqueNames.count(fastaFileName) == 0) {
1765 outputNames.push_back(fastaFileName);
1766 outputTypes["fasta"].push_back(fastaFileName);
1767 uniqueNames.insert(fastaFileName);
1770 fastaFileNames[itBar->second][itPrimer->second] = fastaFileName;
1771 m->openOutputFile(fastaFileName, temp); temp.close();
1773 if(qFileName != ""){
1774 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(qFileName));
1775 qualFileName = getOutputFileName("qfile", variables);
1776 if (uniqueNames.count(qualFileName) == 0) {
1777 outputNames.push_back(qualFileName);
1778 outputTypes["qfile"].push_back(qualFileName);
1781 qualFileNames[itBar->second][itPrimer->second] = qualFileName;
1782 m->openOutputFile(qualFileName, temp); temp.close();
1786 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(nameFile));
1787 nameFileName = getOutputFileName("name", variables);
1788 if (uniqueNames.count(nameFileName) == 0) {
1789 outputNames.push_back(nameFileName);
1790 outputTypes["name"].push_back(nameFileName);
1793 nameFileNames[itBar->second][itPrimer->second] = nameFileName;
1794 m->openOutputFile(nameFileName, temp); temp.close();
1801 numFPrimers = primers.size();
1802 if (pairedOligos) { numFPrimers = pairedPrimers.size(); }
1803 numRPrimers = revPrimer.size();
1804 numLinkers = linker.size();
1805 numSpacers = spacer.size();
1807 bool allBlank = true;
1808 for (int i = 0; i < barcodeNameVector.size(); i++) {
1809 if (barcodeNameVector[i] != "") {
1814 for (int i = 0; i < primerNameVector.size(); i++) {
1815 if (primerNameVector[i] != "") {
1822 m->mothurOut("[WARNING]: your oligos file does not contain any group names. mothur will not create a groupfile."); m->mothurOutEndLine();
1830 catch(exception& e) {
1831 m->errorOut(e, "TrimSeqsCommand", "getOligos");
1835 //***************************************************************************************************************
1837 bool TrimSeqsCommand::keepFirstTrim(Sequence& sequence, QualityScores& qscores){
1840 if(qscores.getName() != ""){
1841 qscores.trimQScores(-1, keepFirst);
1843 sequence.trim(keepFirst);
1846 catch(exception& e) {
1847 m->errorOut(e, "keepFirstTrim", "countDiffs");
1853 //***************************************************************************************************************
1855 bool TrimSeqsCommand::removeLastTrim(Sequence& sequence, QualityScores& qscores){
1859 int length = sequence.getNumBases() - removeLast;
1862 if(qscores.getName() != ""){
1863 qscores.trimQScores(-1, length);
1865 sequence.trim(length);
1874 catch(exception& e) {
1875 m->errorOut(e, "removeLastTrim", "countDiffs");
1881 //***************************************************************************************************************
1883 bool TrimSeqsCommand::cullLength(Sequence& seq){
1886 int length = seq.getNumBases();
1887 bool success = 0; //guilty until proven innocent
1889 if(length >= minLength && maxLength == 0) { success = 1; }
1890 else if(length >= minLength && length <= maxLength) { success = 1; }
1891 else { success = 0; }
1896 catch(exception& e) {
1897 m->errorOut(e, "TrimSeqsCommand", "cullLength");
1903 //***************************************************************************************************************
1905 bool TrimSeqsCommand::cullHomoP(Sequence& seq){
1907 int longHomoP = seq.getLongHomoPolymer();
1908 bool success = 0; //guilty until proven innocent
1910 if(longHomoP <= maxHomoP){ success = 1; }
1911 else { success = 0; }
1915 catch(exception& e) {
1916 m->errorOut(e, "TrimSeqsCommand", "cullHomoP");
1921 //********************************************************************/
1922 string TrimSeqsCommand::reverseOligo(string oligo){
1924 string reverse = "";
1926 for(int i=oligo.length()-1;i>=0;i--){
1928 if(oligo[i] == 'A') { reverse += 'T'; }
1929 else if(oligo[i] == 'T'){ reverse += 'A'; }
1930 else if(oligo[i] == 'U'){ reverse += 'A'; }
1932 else if(oligo[i] == 'G'){ reverse += 'C'; }
1933 else if(oligo[i] == 'C'){ reverse += 'G'; }
1935 else if(oligo[i] == 'R'){ reverse += 'Y'; }
1936 else if(oligo[i] == 'Y'){ reverse += 'R'; }
1938 else if(oligo[i] == 'M'){ reverse += 'K'; }
1939 else if(oligo[i] == 'K'){ reverse += 'M'; }
1941 else if(oligo[i] == 'W'){ reverse += 'W'; }
1942 else if(oligo[i] == 'S'){ reverse += 'S'; }
1944 else if(oligo[i] == 'B'){ reverse += 'V'; }
1945 else if(oligo[i] == 'V'){ reverse += 'B'; }
1947 else if(oligo[i] == 'D'){ reverse += 'H'; }
1948 else if(oligo[i] == 'H'){ reverse += 'D'; }
1950 else { reverse += 'N'; }
1956 catch(exception& e) {
1957 m->errorOut(e, "TrimSeqsCommand", "reverseOligo");
1962 //***************************************************************************************************************
1964 bool TrimSeqsCommand::cullAmbigs(Sequence& seq){
1966 int numNs = seq.getAmbigBases();
1967 bool success = 0; //guilty until proven innocent
1969 if(numNs <= maxAmbig) { success = 1; }
1970 else { success = 0; }
1974 catch(exception& e) {
1975 m->errorOut(e, "TrimSeqsCommand", "cullAmbigs");
1980 //***************************************************************************************************************