5 * Created by Pat Schloss on 6/6/09.
6 * Copyright 2009 Patrick D. Schloss. All rights reserved.
10 #include "trimseqscommand.h"
11 #include "needlemanoverlap.hpp"
12 #include "trimoligos.h"
15 //**********************************************************************************************************************
16 vector<string> TrimSeqsCommand::setParameters(){
18 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","fasta",false,true,true); parameters.push_back(pfasta);
19 CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none","group",false,false,true); parameters.push_back(poligos);
20 CommandParameter pqfile("qfile", "InputTypes", "", "", "none", "none", "none","qfile",false,false,true); parameters.push_back(pqfile);
21 CommandParameter pname("name", "InputTypes", "", "", "namecount", "none", "none","name",false,false,true); parameters.push_back(pname);
22 CommandParameter pcount("count", "InputTypes", "", "", "namecount", "none", "none","count",false,false,true); parameters.push_back(pcount);
23 CommandParameter pflip("flip", "Boolean", "", "F", "", "", "","",false,false,true); parameters.push_back(pflip);
24 CommandParameter preorient("checkorient", "Boolean", "", "F", "", "", "","",false,false,true); parameters.push_back(preorient);
25 CommandParameter pmaxambig("maxambig", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(pmaxambig);
26 CommandParameter pmaxhomop("maxhomop", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pmaxhomop);
27 CommandParameter pminlength("minlength", "Number", "", "1", "", "", "","",false,false); parameters.push_back(pminlength);
28 CommandParameter pmaxlength("maxlength", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pmaxlength);
29 CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(ppdiffs);
30 CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(pbdiffs);
31 CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pldiffs);
32 CommandParameter psdiffs("sdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(psdiffs);
33 CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ptdiffs);
34 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
35 CommandParameter pallfiles("allfiles", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pallfiles);
36 CommandParameter pkeepforward("keepforward", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pkeepforward);
37 CommandParameter plogtransform("logtransform", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(plogtransform);
38 CommandParameter pqtrim("qtrim", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pqtrim);
39 CommandParameter pqthreshold("qthreshold", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pqthreshold);
40 CommandParameter pqaverage("qaverage", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pqaverage);
41 CommandParameter prollaverage("rollaverage", "Number", "", "0", "", "", "","",false,false); parameters.push_back(prollaverage);
42 CommandParameter pqwindowaverage("qwindowaverage", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pqwindowaverage);
43 CommandParameter pqstepsize("qstepsize", "Number", "", "1", "", "", "","",false,false); parameters.push_back(pqstepsize);
44 CommandParameter pqwindowsize("qwindowsize", "Number", "", "50", "", "", "","",false,false); parameters.push_back(pqwindowsize);
45 CommandParameter pkeepfirst("keepfirst", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pkeepfirst);
46 CommandParameter premovelast("removelast", "Number", "", "0", "", "", "","",false,false); parameters.push_back(premovelast);
47 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
48 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
50 vector<string> myArray;
51 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
55 m->errorOut(e, "TrimSeqsCommand", "setParameters");
59 //**********************************************************************************************************************
60 string TrimSeqsCommand::getHelpString(){
62 string helpString = "";
63 helpString += "The trim.seqs command reads a fastaFile and creates 2 new fasta files, .trim.fasta and scrap.fasta, as well as group files if you provide and oligos file.\n";
64 helpString += "The .trim.fasta contains sequences that meet your requirements, and the .scrap.fasta contains those which don't.\n";
65 helpString += "The trim.seqs command parameters are fasta, name, count, flip, checkorient, oligos, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim, keepfirst, removelast, logtransform and allfiles.\n";
66 helpString += "The fasta parameter is required.\n";
67 helpString += "The flip parameter will output the reverse compliment of your trimmed sequence. The default is false.\n";
68 helpString += "The checkorient parameter will check the reverse compliment of the sequence if the barcodes and primers cannot be found in the forward. The default is false.\n";
69 helpString += "The oligos parameter allows you to provide an oligos file.\n";
70 helpString += "The name parameter allows you to provide a names file with your fasta file.\n";
71 helpString += "The count parameter allows you to provide a count file with your fasta file.\n";
72 helpString += "The maxambig parameter allows you to set the maximum number of ambigious bases allowed. The default is -1.\n";
73 helpString += "The maxhomop parameter allows you to set a maximum homopolymer length. \n";
74 helpString += "The minlength parameter allows you to set and minimum sequence length. \n";
75 helpString += "The maxlength parameter allows you to set and maximum sequence length. \n";
76 helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n";
77 helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";
78 helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
79 helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n";
80 helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n";
81 helpString += "The qfile parameter allows you to provide a quality file.\n";
82 helpString += "The qthreshold parameter allows you to set a minimum quality score allowed. \n";
83 helpString += "The qaverage parameter allows you to set a minimum average quality score allowed. \n";
84 helpString += "The qwindowsize parameter allows you to set a number of bases in a window. Default=50.\n";
85 helpString += "The qwindowaverage parameter allows you to set a minimum average quality score allowed over a window. \n";
86 helpString += "The rollaverage parameter allows you to set a minimum rolling average quality score allowed over a window. \n";
87 helpString += "The qstepsize parameter allows you to set a number of bases to move the window over. Default=1.\n";
88 helpString += "The logtransform parameter allows you to indicate you want the averages for the qwindowaverage, rollaverage and qaverage to be calculated using a logtransform. Default=F.\n";
89 helpString += "The allfiles parameter will create separate group and fasta file for each grouping. The default is F.\n";
90 helpString += "The keepforward parameter allows you to indicate whether you want the forward primer removed or not. The default is F, meaning remove the forward primer.\n";
91 helpString += "The qtrim parameter will trim sequence from the point that they fall below the qthreshold and put it in the .trim file if set to true. The default is T.\n";
92 helpString += "The keepfirst parameter trims the sequence to the first keepfirst number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements. \n";
93 helpString += "The removelast removes the last removelast number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements.\n";
94 helpString += "The trim.seqs command should be in the following format: \n";
95 helpString += "trim.seqs(fasta=yourFastaFile, flip=yourFlip, oligos=yourOligos, maxambig=yourMaxambig, \n";
96 helpString += "maxhomop=yourMaxhomop, minlength=youMinlength, maxlength=yourMaxlength) \n";
97 helpString += "Example trim.seqs(fasta=abrecovery.fasta, flip=..., oligos=..., maxambig=..., maxhomop=..., minlength=..., maxlength=...).\n";
98 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
99 helpString += "For more details please check out the wiki http://www.mothur.org/wiki/Trim.seqs .\n";
102 catch(exception& e) {
103 m->errorOut(e, "TrimSeqsCommand", "getHelpString");
107 //**********************************************************************************************************************
108 string TrimSeqsCommand::getOutputPattern(string type) {
112 if (type == "qfile") { pattern = "[filename],[tag],qual"; }
113 else if (type == "fasta") { pattern = "[filename],[tag],fasta"; }
114 else if (type == "group") { pattern = "[filename],groups"; }
115 else if (type == "name") { pattern = "[filename],[tag],names"; }
116 else if (type == "count") { pattern = "[filename],[tag],count_table-[filename],count_table"; }
117 else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
121 catch(exception& e) {
122 m->errorOut(e, "TrimSeqsCommand", "getOutputPattern");
126 //**********************************************************************************************************************
128 TrimSeqsCommand::TrimSeqsCommand(){
130 abort = true; calledHelp = true;
132 vector<string> tempOutNames;
133 outputTypes["fasta"] = tempOutNames;
134 outputTypes["qfile"] = tempOutNames;
135 outputTypes["group"] = tempOutNames;
136 outputTypes["name"] = tempOutNames;
137 outputTypes["count"] = tempOutNames;
139 catch(exception& e) {
140 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
144 //***************************************************************************************************************
146 TrimSeqsCommand::TrimSeqsCommand(string option) {
149 abort = false; calledHelp = false;
152 //allow user to run help
153 if(option == "help") { help(); abort = true; calledHelp = true; }
154 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
157 vector<string> myArray = setParameters();
159 OptionParser parser(option);
160 map<string,string> parameters = parser.getParameters();
162 ValidParameters validParameter;
163 map<string,string>::iterator it;
165 //check to make sure all parameters are valid for command
166 for (it = parameters.begin(); it != parameters.end(); it++) {
167 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
170 //initialize outputTypes
171 vector<string> tempOutNames;
172 outputTypes["fasta"] = tempOutNames;
173 outputTypes["qfile"] = tempOutNames;
174 outputTypes["group"] = tempOutNames;
175 outputTypes["name"] = tempOutNames;
176 outputTypes["count"] = tempOutNames;
178 //if the user changes the input directory command factory will send this info to us in the output parameter
179 string inputDir = validParameter.validFile(parameters, "inputdir", false);
180 if (inputDir == "not found"){ inputDir = ""; }
183 it = parameters.find("fasta");
184 //user has given a template file
185 if(it != parameters.end()){
186 path = m->hasPath(it->second);
187 //if the user has not given a path then, add inputdir. else leave path alone.
188 if (path == "") { parameters["fasta"] = inputDir + it->second; }
191 it = parameters.find("oligos");
192 //user has given a template file
193 if(it != parameters.end()){
194 path = m->hasPath(it->second);
195 //if the user has not given a path then, add inputdir. else leave path alone.
196 if (path == "") { parameters["oligos"] = inputDir + it->second; }
199 it = parameters.find("qfile");
200 //user has given a template file
201 if(it != parameters.end()){
202 path = m->hasPath(it->second);
203 //if the user has not given a path then, add inputdir. else leave path alone.
204 if (path == "") { parameters["qfile"] = inputDir + it->second; }
207 it = parameters.find("name");
208 //user has given a template file
209 if(it != parameters.end()){
210 path = m->hasPath(it->second);
211 //if the user has not given a path then, add inputdir. else leave path alone.
212 if (path == "") { parameters["name"] = inputDir + it->second; }
215 it = parameters.find("count");
216 //user has given a template file
217 if(it != parameters.end()){
218 path = m->hasPath(it->second);
219 //if the user has not given a path then, add inputdir. else leave path alone.
220 if (path == "") { parameters["count"] = inputDir + it->second; }
226 //check for required parameters
227 fastaFile = validParameter.validFile(parameters, "fasta", true);
228 if (fastaFile == "not found") {
229 fastaFile = m->getFastaFile();
230 if (fastaFile != "") { m->mothurOut("Using " + fastaFile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
231 else { m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
232 }else if (fastaFile == "not open") { abort = true; }
233 else { m->setFastaFile(fastaFile); }
235 //if the user changes the output directory command factory will send this info to us in the output parameter
236 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
238 outputDir += m->hasPath(fastaFile); //if user entered a file with a path then preserve it
242 //check for optional parameter and set defaults
243 // ...at some point should added some additional type checking...
245 temp = validParameter.validFile(parameters, "flip", false);
246 if (temp == "not found") { flip = 0; }
247 else { flip = m->isTrue(temp); }
249 temp = validParameter.validFile(parameters, "oligos", true);
250 if (temp == "not found"){ oligoFile = ""; }
251 else if(temp == "not open"){ abort = true; }
252 else { oligoFile = temp; m->setOligosFile(oligoFile); }
255 temp = validParameter.validFile(parameters, "maxambig", false); if (temp == "not found") { temp = "-1"; }
256 m->mothurConvert(temp, maxAmbig);
258 temp = validParameter.validFile(parameters, "maxhomop", false); if (temp == "not found") { temp = "0"; }
259 m->mothurConvert(temp, maxHomoP);
261 temp = validParameter.validFile(parameters, "minlength", false); if (temp == "not found") { temp = "1"; }
262 m->mothurConvert(temp, minLength);
264 temp = validParameter.validFile(parameters, "maxlength", false); if (temp == "not found") { temp = "0"; }
265 m->mothurConvert(temp, maxLength);
267 temp = validParameter.validFile(parameters, "bdiffs", false); if (temp == "not found") { temp = "0"; }
268 m->mothurConvert(temp, bdiffs);
270 temp = validParameter.validFile(parameters, "pdiffs", false); if (temp == "not found") { temp = "0"; }
271 m->mothurConvert(temp, pdiffs);
273 temp = validParameter.validFile(parameters, "ldiffs", false); if (temp == "not found") { temp = "0"; }
274 m->mothurConvert(temp, ldiffs);
276 temp = validParameter.validFile(parameters, "sdiffs", false); if (temp == "not found") { temp = "0"; }
277 m->mothurConvert(temp, sdiffs);
279 temp = validParameter.validFile(parameters, "tdiffs", false); if (temp == "not found") { int tempTotal = pdiffs + bdiffs + ldiffs + sdiffs; temp = toString(tempTotal); }
280 m->mothurConvert(temp, tdiffs);
282 if(tdiffs == 0){ tdiffs = bdiffs + pdiffs + ldiffs + sdiffs; }
284 temp = validParameter.validFile(parameters, "qfile", true);
285 if (temp == "not found") { qFileName = ""; }
286 else if(temp == "not open") { abort = true; }
287 else { qFileName = temp; m->setQualFile(qFileName); }
289 temp = validParameter.validFile(parameters, "name", true);
290 if (temp == "not found") { nameFile = ""; }
291 else if(temp == "not open") { nameFile = ""; abort = true; }
292 else { nameFile = temp; m->setNameFile(nameFile); }
294 countfile = validParameter.validFile(parameters, "count", true);
295 if (countfile == "not open") { abort = true; countfile = ""; }
296 else if (countfile == "not found") { countfile = ""; }
297 else { m->setCountTableFile(countfile); }
299 if ((countfile != "") && (nameFile != "")) { m->mothurOut("You must enter ONLY ONE of the following: count or name."); m->mothurOutEndLine(); abort = true; }
301 temp = validParameter.validFile(parameters, "qthreshold", false); if (temp == "not found") { temp = "0"; }
302 m->mothurConvert(temp, qThreshold);
304 temp = validParameter.validFile(parameters, "qtrim", false); if (temp == "not found") { temp = "t"; }
305 qtrim = m->isTrue(temp);
307 temp = validParameter.validFile(parameters, "rollaverage", false); if (temp == "not found") { temp = "0"; }
308 convert(temp, qRollAverage);
310 temp = validParameter.validFile(parameters, "qwindowaverage", false);if (temp == "not found") { temp = "0"; }
311 convert(temp, qWindowAverage);
313 temp = validParameter.validFile(parameters, "qwindowsize", false); if (temp == "not found") { temp = "50"; }
314 convert(temp, qWindowSize);
316 temp = validParameter.validFile(parameters, "qstepsize", false); if (temp == "not found") { temp = "1"; }
317 convert(temp, qWindowStep);
319 temp = validParameter.validFile(parameters, "qaverage", false); if (temp == "not found") { temp = "0"; }
320 convert(temp, qAverage);
322 temp = validParameter.validFile(parameters, "keepfirst", false); if (temp == "not found") { temp = "0"; }
323 convert(temp, keepFirst);
325 temp = validParameter.validFile(parameters, "removelast", false); if (temp == "not found") { temp = "0"; }
326 convert(temp, removeLast);
328 temp = validParameter.validFile(parameters, "allfiles", false); if (temp == "not found") { temp = "F"; }
329 allFiles = m->isTrue(temp);
331 temp = validParameter.validFile(parameters, "keepforward", false); if (temp == "not found") { temp = "F"; }
332 keepforward = m->isTrue(temp);
334 temp = validParameter.validFile(parameters, "logtransform", false); if (temp == "not found") { temp = "F"; }
335 logtransform = m->isTrue(temp);
337 temp = validParameter.validFile(parameters, "checkorient", false); if (temp == "not found") { temp = "F"; }
338 reorient = m->isTrue(temp);
340 temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
341 m->setProcessors(temp);
342 m->mothurConvert(temp, processors);
345 if(allFiles && (oligoFile == "")){
346 m->mothurOut("You selected allfiles, but didn't enter an oligos. Ignoring the allfiles request."); m->mothurOutEndLine();
348 if((qAverage != 0 && qThreshold != 0) && qFileName == ""){
349 m->mothurOut("You didn't provide a quality file name, quality criteria will be ignored."); m->mothurOutEndLine();
353 if(!flip && oligoFile=="" && !maxLength && !minLength && (maxAmbig==-1) && !maxHomoP && qFileName == ""){
354 m->mothurOut("You didn't set any options... quiting command."); m->mothurOutEndLine();
358 if (countfile == "") {
359 if (nameFile == "") {
360 vector<string> files; files.push_back(fastaFile);
361 parser.getNameFile(files);
367 catch(exception& e) {
368 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
372 //***************************************************************************************************************
374 int TrimSeqsCommand::execute(){
377 if (abort == true) { if (calledHelp) { return 0; } return 2; }
379 pairedOligos = false;
380 numFPrimers = 0; //this needs to be initialized
385 vector<vector<string> > fastaFileNames;
386 vector<vector<string> > qualFileNames;
387 vector<vector<string> > nameFileNames;
389 map<string, string> variables;
390 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFile));
391 variables["[tag]"] = "trim";
392 string trimSeqFile = getOutputFileName("fasta",variables);
393 string trimQualFile = getOutputFileName("qfile",variables);
394 outputNames.push_back(trimSeqFile); outputTypes["fasta"].push_back(trimSeqFile);
396 variables["[tag]"] = "scrap";
397 string scrapSeqFile = getOutputFileName("fasta",variables);
398 string scrapQualFile = getOutputFileName("qfile",variables);
399 outputNames.push_back(scrapSeqFile); outputTypes["fasta"].push_back(scrapSeqFile);
401 if (qFileName != "") {
402 outputNames.push_back(trimQualFile);
403 outputNames.push_back(scrapQualFile);
404 outputTypes["qfile"].push_back(trimQualFile);
405 outputTypes["qfile"].push_back(scrapQualFile);
408 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(nameFile));
409 variables["[tag]"] = "trim";
410 string trimNameFile = getOutputFileName("name",variables);
411 variables["[tag]"] = "scrap";
412 string scrapNameFile = getOutputFileName("name",variables);
414 if (nameFile != "") {
415 m->readNames(nameFile, nameMap);
416 outputNames.push_back(trimNameFile);
417 outputNames.push_back(scrapNameFile);
418 outputTypes["name"].push_back(trimNameFile);
419 outputTypes["name"].push_back(scrapNameFile);
422 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(countfile));
423 variables["[tag]"] = "trim";
424 string trimCountFile = getOutputFileName("count",variables);
425 variables["[tag]"] = "scrap";
426 string scrapCountFile = getOutputFileName("count",variables);
428 if (countfile != "") {
430 ct.readTable(countfile, true, false);
431 nameCount = ct.getNameMap();
432 outputNames.push_back(trimCountFile);
433 outputNames.push_back(scrapCountFile);
434 outputTypes["count"].push_back(trimCountFile);
435 outputTypes["count"].push_back(scrapCountFile);
439 if (m->control_pressed) { return 0; }
441 string outputGroupFileName;
443 createGroup = getOligos(fastaFileNames, qualFileNames, nameFileNames);
444 if ((createGroup) && (countfile == "")){
445 map<string, string> myvariables;
446 myvariables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFile));
447 outputGroupFileName = getOutputFileName("group",myvariables);
448 outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName);
452 if (m->control_pressed) { return 0; }
454 //fills lines and qlines
455 setLines(fastaFile, qFileName);
458 driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, trimCountFile, scrapCountFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
460 createProcessesCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, trimCountFile, scrapCountFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames);
464 if (m->control_pressed) { return 0; }
467 map<string, string> uniqueFastaNames;// so we don't add the same groupfile multiple times
468 map<string, string>::iterator it;
469 set<string> namesToRemove;
470 for(int i=0;i<fastaFileNames.size();i++){
471 for(int j=0;j<fastaFileNames[0].size();j++){
472 if (fastaFileNames[i][j] != "") {
473 if (namesToRemove.count(fastaFileNames[i][j]) == 0) {
474 if(m->isBlank(fastaFileNames[i][j])){
475 m->mothurRemove(fastaFileNames[i][j]);
476 namesToRemove.insert(fastaFileNames[i][j]);
479 m->mothurRemove(qualFileNames[i][j]);
480 namesToRemove.insert(qualFileNames[i][j]);
484 m->mothurRemove(nameFileNames[i][j]);
485 namesToRemove.insert(nameFileNames[i][j]);
488 it = uniqueFastaNames.find(fastaFileNames[i][j]);
489 if (it == uniqueFastaNames.end()) {
490 uniqueFastaNames[fastaFileNames[i][j]] = barcodeNameVector[i];
498 //remove names for outputFileNames, just cleans up the output
499 vector<string> outputNames2;
500 for(int i = 0; i < outputNames.size(); i++) { if (namesToRemove.count(outputNames[i]) == 0) { outputNames2.push_back(outputNames[i]); } }
501 outputNames = outputNames2;
503 for (it = uniqueFastaNames.begin(); it != uniqueFastaNames.end(); it++) {
505 m->openInputFile(it->first, in);
508 map<string, string> myvariables;
509 myvariables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(it->first));
510 string thisGroupName = "";
511 if (countfile == "") { thisGroupName = getOutputFileName("group",myvariables); outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName); }
512 else { thisGroupName = getOutputFileName("count",myvariables); outputNames.push_back(thisGroupName); outputTypes["count"].push_back(thisGroupName); }
513 m->openOutputFile(thisGroupName, out);
515 if (countfile != "") { out << "Representative_Sequence\ttotal\t" << it->second << endl; }
518 if (m->control_pressed) { break; }
520 Sequence currSeq(in); m->gobble(in);
521 if (countfile == "") {
522 out << currSeq.getName() << '\t' << it->second << endl;
524 if (nameFile != "") {
525 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
526 if (itName != nameMap.end()) {
527 vector<string> thisSeqsNames;
528 m->splitAtChar(itName->second, thisSeqsNames, ',');
529 for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self
530 out << thisSeqsNames[k] << '\t' << it->second << endl;
532 }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
535 map<string, int>::iterator itTotalReps = nameCount.find(currSeq.getName());
536 if (itTotalReps != nameCount.end()) { out << currSeq.getName() << '\t' << itTotalReps->second << '\t' << itTotalReps->second << endl; }
537 else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your count file, please correct."); m->mothurOutEndLine(); }
544 if (countfile != "") { //create countfile with group info included
545 CountTable* ct = new CountTable();
546 ct->readTable(trimCountFile, true, false);
547 map<string, int> justTrimmedNames = ct->getNameMap();
551 for (map<string, int>::iterator itCount = groupCounts.begin(); itCount != groupCounts.end(); itCount++) { newCt.addGroup(itCount->first); }
552 vector<int> tempCounts; tempCounts.resize(groupCounts.size(), 0);
553 for (map<string, int>::iterator itNames = justTrimmedNames.begin(); itNames != justTrimmedNames.end(); itNames++) {
554 newCt.push_back(itNames->first, tempCounts); //add it to the table with no abundance so we can set the groups abundance
555 map<string, string>::iterator it2 = groupMap.find(itNames->first);
556 if (it2 != groupMap.end()) { newCt.setAbund(itNames->first, it2->second, itNames->second); }
557 else { m->mothurOut("[ERROR]: missing group info for " + itNames->first + "."); m->mothurOutEndLine(); m->control_pressed = true; }
559 newCt.printTable(trimCountFile);
563 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
565 //output group counts
566 m->mothurOutEndLine();
568 if (groupCounts.size() != 0) { m->mothurOut("Group count: \n"); }
569 for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
570 total += it->second; m->mothurOut(it->first + "\t" + toString(it->second)); m->mothurOutEndLine();
572 if (total != 0) { m->mothurOut("Total of all groups is " + toString(total)); m->mothurOutEndLine(); }
574 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
576 //set fasta file as new current fastafile
578 itTypes = outputTypes.find("fasta");
579 if (itTypes != outputTypes.end()) {
580 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
583 itTypes = outputTypes.find("name");
584 if (itTypes != outputTypes.end()) {
585 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
588 itTypes = outputTypes.find("qfile");
589 if (itTypes != outputTypes.end()) {
590 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); }
593 itTypes = outputTypes.find("group");
594 if (itTypes != outputTypes.end()) {
595 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
598 itTypes = outputTypes.find("count");
599 if (itTypes != outputTypes.end()) {
600 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
603 m->mothurOutEndLine();
604 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
605 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
606 m->mothurOutEndLine();
611 catch(exception& e) {
612 m->errorOut(e, "TrimSeqsCommand", "execute");
617 /**************************************************************************************/
618 int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string trimFileName, string scrapFileName, string trimQFileName, string scrapQFileName, string trimNFileName, string scrapNFileName, string trimCFileName, string scrapCFileName, string groupFileName, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames, linePair line, linePair qline) {
622 ofstream trimFASTAFile;
623 m->openOutputFile(trimFileName, trimFASTAFile);
625 ofstream scrapFASTAFile;
626 m->openOutputFile(scrapFileName, scrapFASTAFile);
628 ofstream trimQualFile;
629 ofstream scrapQualFile;
631 m->openOutputFile(trimQFileName, trimQualFile);
632 m->openOutputFile(scrapQFileName, scrapQualFile);
635 ofstream trimNameFile;
636 ofstream scrapNameFile;
638 m->openOutputFile(trimNFileName, trimNameFile);
639 m->openOutputFile(scrapNFileName, scrapNameFile);
642 ofstream trimCountFile;
643 ofstream scrapCountFile;
645 m->openOutputFile(trimCFileName, trimCountFile);
646 m->openOutputFile(scrapCFileName, scrapCountFile);
647 if (line.start == 0) { trimCountFile << "Representative_Sequence\ttotal" << endl; scrapCountFile << "Representative_Sequence\ttotal" << endl; }
650 ofstream outGroupsFile;
651 if ((createGroup) && (countfile == "")){ m->openOutputFile(groupFileName, outGroupsFile); }
653 for (int i = 0; i < fastaFileNames.size(); i++) { //clears old file
654 for (int j = 0; j < fastaFileNames[i].size(); j++) { //clears old file
655 if (fastaFileNames[i][j] != "") {
657 m->openOutputFile(fastaFileNames[i][j], temp); temp.close();
659 m->openOutputFile(qualFileNames[i][j], temp); temp.close();
663 m->openOutputFile(nameFileNames[i][j], temp); temp.close();
671 m->openInputFile(filename, inFASTA);
672 inFASTA.seekg(line.start);
675 if(qFileName != "") {
676 m->openInputFile(qFileName, qFile);
677 qFile.seekg(qline.start);
682 int numBarcodes = barcodes.size();
683 TrimOligos* trimOligos = NULL;
684 if (pairedOligos) { trimOligos = new TrimOligos(pdiffs, bdiffs, 0, 0, pairedPrimers, pairedBarcodes); numBarcodes = pairedBarcodes.size(); }
685 else { trimOligos = new TrimOligos(pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, revPrimer, linker, spacer); }
687 TrimOligos* rtrimOligos = NULL;
689 //create reoriented primer and barcode pairs
690 map<int, oligosPair> rpairedPrimers, rpairedBarcodes;
691 for (map<int, oligosPair>::iterator it = pairedPrimers.begin(); it != pairedPrimers.end(); it++) {
692 oligosPair tempPair(reverseOligo((it->second).reverse), (reverseOligo((it->second).forward))); //reversePrimer, rc ForwardPrimer
693 rpairedPrimers[it->first] = tempPair;
694 //cout << reverseOligo((it->second).reverse) << '\t' << (reverseOligo((it->second).forward)) << '\t' << primerNameVector[it->first] << endl;
696 for (map<int, oligosPair>::iterator it = pairedBarcodes.begin(); it != pairedBarcodes.end(); it++) {
697 oligosPair tempPair(reverseOligo((it->second).reverse), (reverseOligo((it->second).forward))); //reverseBarcode, rc ForwardBarcode
698 rpairedBarcodes[it->first] = tempPair;
699 //cout << reverseOligo((it->second).reverse) << '\t' << (reverseOligo((it->second).forward)) << '\t' << barcodeNameVector[it->first] << endl;
701 int index = rpairedBarcodes.size();
702 for (map<string, int>::iterator it = barcodes.begin(); it != barcodes.end(); it++) {
703 oligosPair tempPair("", reverseOligo((it->first))); //reverseBarcode, rc ForwardBarcode
704 rpairedBarcodes[index] = tempPair; index++;
705 //cout << reverseOligo((it->second).reverse) << '\t' << (reverseOligo((it->second).forward)) << '\t' << barcodeNameVector[it->first] << endl;
708 index = rpairedPrimers.size();
709 for (map<string, int>::iterator it = primers.begin(); it != primers.end(); it++) {
710 oligosPair tempPair("", reverseOligo((it->first))); //reverseBarcode, rc ForwardBarcode
711 rpairedPrimers[index] = tempPair; index++;
712 //cout << reverseOligo((it->second).reverse) << '\t' << (reverseOligo((it->second).forward)) << '\t' << primerNameVector[it->first] << endl;
715 rtrimOligos = new TrimOligos(pdiffs, bdiffs, 0, 0, rpairedPrimers, rpairedBarcodes); numBarcodes = rpairedBarcodes.size();
720 if (m->control_pressed) {
721 delete trimOligos; if (reorient) { delete rtrimOligos; }
722 inFASTA.close(); trimFASTAFile.close(); scrapFASTAFile.close();
723 if ((createGroup) && (countfile == "")) { outGroupsFile.close(); }
724 if(qFileName != "") { qFile.close(); scrapQualFile.close(); trimQualFile.close(); }
725 if(nameFile != "") { scrapNameFile.close(); trimNameFile.close(); }
726 if(countfile != "") { scrapCountFile.close(); trimCountFile.close(); }
727 for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0;
731 string trashCode = "";
732 int currentSeqsDiffs = 0;
734 Sequence currSeq(inFASTA); m->gobble(inFASTA);
735 //cout << currSeq.getName() << '\t' << currSeq.getUnaligned() << endl;
736 Sequence savedSeq(currSeq.getName(), currSeq.getAligned());
738 QualityScores currQual; QualityScores savedQual;
740 currQual = QualityScores(qFile); m->gobble(qFile);
741 savedQual.setName(currQual.getName()); savedQual.setScores(currQual.getScores());
742 //cout << currQual.getName() << endl;
745 string origSeq = currSeq.getUnaligned();
748 int barcodeIndex = 0;
752 success = trimOligos->stripLinker(currSeq, currQual);
753 if(success > ldiffs) { trashCode += 'k'; }
754 else{ currentSeqsDiffs += success; }
758 if(numBarcodes != 0){
759 success = trimOligos->stripBarcode(currSeq, currQual, barcodeIndex);
760 if(success > bdiffs) {
763 else{ currentSeqsDiffs += success; }
765 //cout << currSeq.getName() << '\t' << currSeq.getUnaligned() << endl;
767 success = trimOligos->stripSpacer(currSeq, currQual);
768 if(success > sdiffs) { trashCode += 's'; }
769 else{ currentSeqsDiffs += success; }
773 if(numFPrimers != 0){
774 success = trimOligos->stripForward(currSeq, currQual, primerIndex, keepforward);
775 if(success > pdiffs) {
778 else{ currentSeqsDiffs += success; }
781 if (currentSeqsDiffs > tdiffs) { trashCode += 't'; }
783 if(numRPrimers != 0){
784 success = trimOligos->stripReverse(currSeq, currQual);
785 if(!success) { trashCode += 'r'; }
788 if (reorient && (trashCode != "")) { //if you failed and want to check the reverse
790 string thisTrashCode = "";
791 int thisCurrentSeqsDiffs = 0;
793 int thisBarcodeIndex = 0;
794 int thisPrimerIndex = 0;
795 //cout << currSeq.getName() << '\t' << savedSeq.getUnaligned() << endl;
796 if(numBarcodes != 0){
797 thisSuccess = rtrimOligos->stripBarcode(savedSeq, savedQual, thisBarcodeIndex);
798 if(thisSuccess > bdiffs) { thisTrashCode += "b"; }
799 else{ thisCurrentSeqsDiffs += thisSuccess; }
801 //cout << currSeq.getName() << '\t' << savedSeq.getUnaligned() << endl;
802 if(numFPrimers != 0){
803 thisSuccess = rtrimOligos->stripForward(savedSeq, savedQual, thisPrimerIndex, keepforward);
804 if(thisSuccess > pdiffs) { thisTrashCode += "f"; }
805 else{ thisCurrentSeqsDiffs += thisSuccess; }
808 if (thisCurrentSeqsDiffs > tdiffs) { thisTrashCode += 't'; }
810 if (thisTrashCode == "") {
811 trashCode = thisTrashCode;
812 success = thisSuccess;
813 currentSeqsDiffs = thisCurrentSeqsDiffs;
814 barcodeIndex = thisBarcodeIndex;
815 primerIndex = thisPrimerIndex;
816 savedSeq.reverseComplement();
817 currSeq.setAligned(savedSeq.getAligned());
819 savedQual.flipQScores();
820 currQual.setScores(savedQual.getScores());
822 }else { trashCode += "(" + thisTrashCode + ")"; }
826 success = keepFirstTrim(currSeq, currQual);
830 success = removeLastTrim(currSeq, currQual);
831 if(!success) { trashCode += 'l'; }
836 int origLength = currSeq.getNumBases();
838 if(qThreshold != 0) { success = currQual.stripQualThreshold(currSeq, qThreshold); }
839 else if(qAverage != 0) { success = currQual.cullQualAverage(currSeq, qAverage, logtransform); }
840 else if(qRollAverage != 0) { success = currQual.stripQualRollingAverage(currSeq, qRollAverage, logtransform); }
841 else if(qWindowAverage != 0){ success = currQual.stripQualWindowAverage(currSeq, qWindowStep, qWindowSize, qWindowAverage, logtransform); }
842 else { success = 1; }
844 //you don't want to trim, if it fails above then scrap it
845 if ((!qtrim) && (origLength != currSeq.getNumBases())) { success = 0; }
847 if(!success) { trashCode += 'q'; }
850 if(minLength > 0 || maxLength > 0){
851 success = cullLength(currSeq);
852 if(!success) { trashCode += 'l'; }
855 success = cullHomoP(currSeq);
856 if(!success) { trashCode += 'h'; }
859 success = cullAmbigs(currSeq);
860 if(!success) { trashCode += 'n'; }
863 if(flip){ // should go last
864 currSeq.reverseComplement();
866 currQual.flipQScores();
870 if (m->debug) { m->mothurOut("[DEBUG]: " + currSeq.getName() + ", trashcode= " + trashCode); if (trashCode.length() != 0) { m->mothurOutEndLine(); } }
872 if(trashCode.length() == 0){
873 string thisGroup = "";
875 if(numBarcodes != 0){
876 thisGroup = barcodeNameVector[barcodeIndex];
877 if (numFPrimers != 0) {
878 if (primerNameVector[primerIndex] != "") {
879 if(thisGroup != "") {
880 thisGroup += "." + primerNameVector[primerIndex];
882 thisGroup = primerNameVector[primerIndex];
889 int pos = thisGroup.find("ignore");
890 if (pos == string::npos) {
891 currSeq.setAligned(currSeq.getUnaligned());
892 currSeq.printSequence(trimFASTAFile);
895 currQual.printQScores(trimQualFile);
900 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
901 if (itName != nameMap.end()) { trimNameFile << itName->first << '\t' << itName->second << endl; }
902 else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
905 int numRedundants = 0;
906 if (countfile != "") {
907 map<string, int>::iterator itCount = nameCount.find(currSeq.getName());
908 if (itCount != nameCount.end()) {
909 trimCountFile << itCount->first << '\t' << itCount->second << endl;
910 numRedundants = itCount->second-1;
911 }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your count file, please correct."); m->mothurOutEndLine(); }
915 if(numBarcodes != 0){
917 if (m->debug) { m->mothurOut(", group= " + thisGroup + "\n"); }
919 if (countfile == "") { outGroupsFile << currSeq.getName() << '\t' << thisGroup << endl; }
920 else { groupMap[currSeq.getName()] = thisGroup; }
922 if (nameFile != "") {
923 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
924 if (itName != nameMap.end()) {
925 vector<string> thisSeqsNames;
926 m->splitAtChar(itName->second, thisSeqsNames, ',');
927 numRedundants = thisSeqsNames.size()-1; //we already include ourselves below
928 for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self
929 outGroupsFile << thisSeqsNames[k] << '\t' << thisGroup << endl;
931 }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
934 map<string, int>::iterator it = groupCounts.find(thisGroup);
935 if (it == groupCounts.end()) { groupCounts[thisGroup] = 1 + numRedundants; }
936 else { groupCounts[it->first] += (1 + numRedundants); }
943 m->openOutputFileAppend(fastaFileNames[barcodeIndex][primerIndex], output);
944 currSeq.printSequence(output);
948 m->openOutputFileAppend(qualFileNames[barcodeIndex][primerIndex], output);
949 currQual.printQScores(output);
954 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
955 if (itName != nameMap.end()) {
956 m->openOutputFileAppend(nameFileNames[barcodeIndex][primerIndex], output);
957 output << itName->first << '\t' << itName->second << endl;
959 }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
965 if(nameFile != ""){ //needs to be before the currSeq name is changed
966 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
967 if (itName != nameMap.end()) { scrapNameFile << itName->first << '\t' << itName->second << endl; }
968 else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
970 if (countfile != "") {
971 map<string, int>::iterator itCount = nameCount.find(currSeq.getName());
972 if (itCount != nameCount.end()) {
973 trimCountFile << itCount->first << '\t' << itCount->second << endl;
974 }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your count file, please correct."); m->mothurOutEndLine(); }
977 currSeq.setName(currSeq.getName() + '|' + trashCode);
978 currSeq.setUnaligned(origSeq);
979 currSeq.setAligned(origSeq);
980 currSeq.printSequence(scrapFASTAFile);
982 currQual.printQScores(scrapQualFile);
988 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
989 unsigned long long pos = inFASTA.tellg();
990 if ((pos == -1) || (pos >= line.end)) { break; }
993 if (inFASTA.eof()) { break; }
997 if((count) % 1000 == 0){ m->mothurOut(toString(count)); m->mothurOutEndLine(); }
1001 if((count) % 1000 != 0){ m->mothurOut(toString(count)); m->mothurOutEndLine(); }
1004 if (reorient) { delete rtrimOligos; }
1006 trimFASTAFile.close();
1007 scrapFASTAFile.close();
1008 if (createGroup) { outGroupsFile.close(); }
1009 if(qFileName != "") { qFile.close(); scrapQualFile.close(); trimQualFile.close(); }
1010 if(nameFile != "") { scrapNameFile.close(); trimNameFile.close(); }
1011 if(countfile != "") { scrapCountFile.close(); trimCountFile.close(); }
1015 catch(exception& e) {
1016 m->errorOut(e, "TrimSeqsCommand", "driverCreateTrim");
1021 /**************************************************************************************************/
1023 int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName, string trimFASTAFileName, string scrapFASTAFileName, string trimQualFileName, string scrapQualFileName, string trimNameFileName, string scrapNameFileName, string trimCountFileName, string scrapCountFileName, string groupFile, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames) {
1027 int exitCommand = 1;
1030 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1031 //loop through and create all the processes you want
1032 while (process != processors) {
1036 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
1038 }else if (pid == 0){
1040 vector<vector<string> > tempFASTAFileNames = fastaFileNames;
1041 vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
1042 vector<vector<string> > tempNameFileNames = nameFileNames;
1047 for(int i=0;i<tempFASTAFileNames.size();i++){
1048 for(int j=0;j<tempFASTAFileNames[i].size();j++){
1049 if (tempFASTAFileNames[i][j] != "") {
1050 tempFASTAFileNames[i][j] += toString(getpid()) + ".temp";
1051 m->openOutputFile(tempFASTAFileNames[i][j], temp); temp.close();
1053 if(qFileName != ""){
1054 tempPrimerQualFileNames[i][j] += toString(getpid()) + ".temp";
1055 m->openOutputFile(tempPrimerQualFileNames[i][j], temp); temp.close();
1058 tempNameFileNames[i][j] += toString(getpid()) + ".temp";
1059 m->openOutputFile(tempNameFileNames[i][j], temp); temp.close();
1066 driverCreateTrim(filename,
1068 (trimFASTAFileName + toString(getpid()) + ".temp"),
1069 (scrapFASTAFileName + toString(getpid()) + ".temp"),
1070 (trimQualFileName + toString(getpid()) + ".temp"),
1071 (scrapQualFileName + toString(getpid()) + ".temp"),
1072 (trimNameFileName + toString(getpid()) + ".temp"),
1073 (scrapNameFileName + toString(getpid()) + ".temp"),
1074 (trimCountFileName + toString(getpid()) + ".temp"),
1075 (scrapCountFileName + toString(getpid()) + ".temp"),
1076 (groupFile + toString(getpid()) + ".temp"),
1078 tempPrimerQualFileNames,
1083 if (m->debug) { m->mothurOut("[DEBUG]: " + toString(lines[process].start) + '\t' + toString(qLines[process].start) + '\t' + toString(getpid()) + '\n'); }
1085 //pass groupCounts to parent
1088 string tempFile = filename + toString(getpid()) + ".num.temp";
1089 m->openOutputFile(tempFile, out);
1091 out << groupCounts.size() << endl;
1093 for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
1094 out << it->first << '\t' << it->second << endl;
1097 out << groupMap.size() << endl;
1098 for (map<string, string>::iterator it = groupMap.begin(); it != groupMap.end(); it++) {
1099 out << it->first << '\t' << it->second << endl;
1105 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
1106 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
1113 m->openOutputFile(trimFASTAFileName, temp); temp.close();
1114 m->openOutputFile(scrapFASTAFileName, temp); temp.close();
1115 if(qFileName != ""){
1116 m->openOutputFile(trimQualFileName, temp); temp.close();
1117 m->openOutputFile(scrapQualFileName, temp); temp.close();
1119 if (nameFile != "") {
1120 m->openOutputFile(trimNameFileName, temp); temp.close();
1121 m->openOutputFile(scrapNameFileName, temp); temp.close();
1123 if (countfile != "") {
1124 m->openOutputFile(trimCountFileName, temp); temp.close();
1125 m->openOutputFile(scrapCountFileName, temp); temp.close();
1128 driverCreateTrim(filename, qFileName, trimFASTAFileName, scrapFASTAFileName, trimQualFileName, scrapQualFileName, trimNameFileName, scrapNameFileName, trimCountFileName, scrapCountFileName, groupFile, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
1130 //force parent to wait until all the processes are done
1131 for (int i=0;i<processIDS.size();i++) {
1132 int temp = processIDS[i];
1136 //////////////////////////////////////////////////////////////////////////////////////////////////////
1137 //Windows version shared memory, so be careful when passing variables through the trimData struct.
1138 //Above fork() will clone, so memory is separate, but that's not the case with windows,
1139 //////////////////////////////////////////////////////////////////////////////////////////////////////
1141 vector<trimData*> pDataArray;
1142 DWORD dwThreadIdArray[processors-1];
1143 HANDLE hThreadArray[processors-1];
1145 //Create processor worker threads.
1146 for( int h=0; h<processors-1; h++){
1148 string extension = "";
1149 if (h != 0) { extension = toString(h) + ".temp"; processIDS.push_back(h); }
1150 vector<vector<string> > tempFASTAFileNames = fastaFileNames;
1151 vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
1152 vector<vector<string> > tempNameFileNames = nameFileNames;
1157 for(int i=0;i<tempFASTAFileNames.size();i++){
1158 for(int j=0;j<tempFASTAFileNames[i].size();j++){
1159 if (tempFASTAFileNames[i][j] != "") {
1160 tempFASTAFileNames[i][j] += extension;
1161 m->openOutputFile(tempFASTAFileNames[i][j], temp); temp.close();
1163 if(qFileName != ""){
1164 tempPrimerQualFileNames[i][j] += extension;
1165 m->openOutputFile(tempPrimerQualFileNames[i][j], temp); temp.close();
1168 tempNameFileNames[i][j] += extension;
1169 m->openOutputFile(tempNameFileNames[i][j], temp); temp.close();
1177 trimData* tempTrim = new trimData(filename,
1178 qFileName, nameFile, countfile,
1179 (trimFASTAFileName+extension),
1180 (scrapFASTAFileName+extension),
1181 (trimQualFileName+extension),
1182 (scrapQualFileName+extension),
1183 (trimNameFileName+extension),
1184 (scrapNameFileName+extension),
1185 (trimCountFileName+extension),
1186 (scrapCountFileName+extension),
1187 (groupFile+extension),
1189 tempPrimerQualFileNames,
1191 lines[h].start, lines[h].end, qLines[h].start, qLines[h].end, m,
1192 pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, primers, barcodes, revPrimer, linker, spacer, pairedBarcodes, pairedPrimers, pairedOligos,
1193 primerNameVector, barcodeNameVector, createGroup, allFiles, keepforward, keepFirst, removeLast,
1194 qWindowStep, qWindowSize, qWindowAverage, qtrim, qThreshold, qAverage, qRollAverage, logtransform,
1195 minLength, maxAmbig, maxHomoP, maxLength, flip, reorient, nameMap, nameCount);
1196 pDataArray.push_back(tempTrim);
1198 hThreadArray[h] = CreateThread(NULL, 0, MyTrimThreadFunction, pDataArray[h], 0, &dwThreadIdArray[h]);
1203 m->openOutputFile(trimFASTAFileName, temp); temp.close();
1204 m->openOutputFile(scrapFASTAFileName, temp); temp.close();
1205 if(qFileName != ""){
1206 m->openOutputFile(trimQualFileName, temp); temp.close();
1207 m->openOutputFile(scrapQualFileName, temp); temp.close();
1209 if (nameFile != "") {
1210 m->openOutputFile(trimNameFileName, temp); temp.close();
1211 m->openOutputFile(scrapNameFileName, temp); temp.close();
1213 vector<vector<string> > tempFASTAFileNames = fastaFileNames;
1214 vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
1215 vector<vector<string> > tempNameFileNames = nameFileNames;
1218 string extension = toString(processors-1) + ".temp";
1219 for(int i=0;i<tempFASTAFileNames.size();i++){
1220 for(int j=0;j<tempFASTAFileNames[i].size();j++){
1221 if (tempFASTAFileNames[i][j] != "") {
1222 tempFASTAFileNames[i][j] += extension;
1223 m->openOutputFile(tempFASTAFileNames[i][j], temp); temp.close();
1225 if(qFileName != ""){
1226 tempPrimerQualFileNames[i][j] += extension;
1227 m->openOutputFile(tempPrimerQualFileNames[i][j], temp); temp.close();
1230 tempNameFileNames[i][j] += extension;
1231 m->openOutputFile(tempNameFileNames[i][j], temp); temp.close();
1238 driverCreateTrim(filename, qFileName, (trimFASTAFileName + toString(processors-1) + ".temp"), (scrapFASTAFileName + toString(processors-1) + ".temp"), (trimQualFileName + toString(processors-1) + ".temp"), (scrapQualFileName + toString(processors-1) + ".temp"), (trimNameFileName + toString(processors-1) + ".temp"), (scrapNameFileName + toString(processors-1) + ".temp"), (trimCountFileName + toString(processors-1) + ".temp"), (scrapCountFileName + toString(processors-1) + ".temp"), (groupFile + toString(processors-1) + ".temp"), tempFASTAFileNames, tempPrimerQualFileNames, tempNameFileNames, lines[processors-1], qLines[processors-1]);
1239 processIDS.push_back(processors-1);
1242 //Wait until all threads have terminated.
1243 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
1245 //Close all thread handles and free memory allocations.
1246 for(int i=0; i < pDataArray.size(); i++){
1247 if (pDataArray[i]->count != pDataArray[i]->lineEnd) {
1248 m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of " + toString(pDataArray[i]->lineEnd) + " sequences assigned to it, quitting. \n"); m->control_pressed = true;
1250 for (map<string, int>::iterator it = pDataArray[i]->groupCounts.begin(); it != pDataArray[i]->groupCounts.end(); it++) {
1251 map<string, int>::iterator it2 = groupCounts.find(it->first);
1252 if (it2 == groupCounts.end()) { groupCounts[it->first] = it->second; }
1253 else { groupCounts[it->first] += it->second; }
1255 for (map<string, string>::iterator it = pDataArray[i]->groupMap.begin(); it != pDataArray[i]->groupMap.end(); it++) {
1256 map<string, string>::iterator it2 = groupMap.find(it->first);
1257 if (it2 == groupMap.end()) { groupMap[it->first] = it->second; }
1258 else { m->mothurOut("[ERROR]: " + it->first + " is in your fasta file more than once. Sequence names must be unique. please correct.\n"); }
1260 CloseHandle(hThreadArray[i]);
1261 delete pDataArray[i];
1268 for(int i=0;i<processIDS.size();i++){
1270 m->mothurOut("Appending files from process " + toString(processIDS[i])); m->mothurOutEndLine();
1272 m->appendFiles((trimFASTAFileName + toString(processIDS[i]) + ".temp"), trimFASTAFileName);
1273 m->mothurRemove((trimFASTAFileName + toString(processIDS[i]) + ".temp"));
1274 m->appendFiles((scrapFASTAFileName + toString(processIDS[i]) + ".temp"), scrapFASTAFileName);
1275 m->mothurRemove((scrapFASTAFileName + toString(processIDS[i]) + ".temp"));
1277 if(qFileName != ""){
1278 m->appendFiles((trimQualFileName + toString(processIDS[i]) + ".temp"), trimQualFileName);
1279 m->mothurRemove((trimQualFileName + toString(processIDS[i]) + ".temp"));
1280 m->appendFiles((scrapQualFileName + toString(processIDS[i]) + ".temp"), scrapQualFileName);
1281 m->mothurRemove((scrapQualFileName + toString(processIDS[i]) + ".temp"));
1285 m->appendFiles((trimNameFileName + toString(processIDS[i]) + ".temp"), trimNameFileName);
1286 m->mothurRemove((trimNameFileName + toString(processIDS[i]) + ".temp"));
1287 m->appendFiles((scrapNameFileName + toString(processIDS[i]) + ".temp"), scrapNameFileName);
1288 m->mothurRemove((scrapNameFileName + toString(processIDS[i]) + ".temp"));
1291 if(countfile != ""){
1292 m->appendFiles((trimCountFileName + toString(processIDS[i]) + ".temp"), trimCountFileName);
1293 m->mothurRemove((trimCountFileName + toString(processIDS[i]) + ".temp"));
1294 m->appendFiles((scrapCountFileName + toString(processIDS[i]) + ".temp"), scrapCountFileName);
1295 m->mothurRemove((scrapCountFileName + toString(processIDS[i]) + ".temp"));
1298 if((createGroup)&&(countfile == "")){
1299 m->appendFiles((groupFile + toString(processIDS[i]) + ".temp"), groupFile);
1300 m->mothurRemove((groupFile + toString(processIDS[i]) + ".temp"));
1305 for(int j=0;j<fastaFileNames.size();j++){
1306 for(int k=0;k<fastaFileNames[j].size();k++){
1307 if (fastaFileNames[j][k] != "") {
1308 m->appendFiles((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"), fastaFileNames[j][k]);
1309 m->mothurRemove((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"));
1311 if(qFileName != ""){
1312 m->appendFiles((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"), qualFileNames[j][k]);
1313 m->mothurRemove((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"));
1317 m->appendFiles((nameFileNames[j][k] + toString(processIDS[i]) + ".temp"), nameFileNames[j][k]);
1318 m->mothurRemove((nameFileNames[j][k] + toString(processIDS[i]) + ".temp"));
1325 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1328 string tempFile = filename + toString(processIDS[i]) + ".num.temp";
1329 m->openInputFile(tempFile, in);
1333 in >> tempNum; m->gobble(in);
1336 for (int i = 0; i < tempNum; i++) {
1338 in >> group >> groupNum; m->gobble(in);
1340 map<string, int>::iterator it = groupCounts.find(group);
1341 if (it == groupCounts.end()) { groupCounts[group] = groupNum; }
1342 else { groupCounts[it->first] += groupNum; }
1345 in >> tempNum; m->gobble(in);
1347 for (int i = 0; i < tempNum; i++) {
1348 string group, seqName;
1349 in >> seqName >> group; m->gobble(in);
1351 map<string, string>::iterator it = groupMap.find(seqName);
1352 if (it == groupMap.end()) { groupMap[seqName] = group; }
1353 else { m->mothurOut("[ERROR]: " + seqName + " is in your fasta file more than once. Sequence names must be unique. please correct.\n"); }
1357 in.close(); m->mothurRemove(tempFile);
1364 catch(exception& e) {
1365 m->errorOut(e, "TrimSeqsCommand", "createProcessesCreateTrim");
1370 /**************************************************************************************************/
1372 int TrimSeqsCommand::setLines(string filename, string qfilename) {
1375 vector<unsigned long long> fastaFilePos;
1376 vector<unsigned long long> qfileFilePos;
1378 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1379 //set file positions for fasta file
1380 fastaFilePos = m->divideFile(filename, processors);
1382 //get name of first sequence in each chunk
1383 map<string, int> firstSeqNames;
1384 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
1386 m->openInputFile(filename, in);
1387 in.seekg(fastaFilePos[i]);
1390 firstSeqNames[temp.getName()] = i;
1395 if(qfilename != "") {
1396 //seach for filePos of each first name in the qfile and save in qfileFilePos
1398 m->openInputFile(qfilename, inQual);
1401 while(!inQual.eof()){
1402 input = m->getline(inQual);
1404 if (input.length() != 0) {
1405 if(input[0] == '>'){ //this is a sequence name line
1406 istringstream nameStream(input);
1408 string sname = ""; nameStream >> sname;
1409 sname = sname.substr(1);
1411 m->checkName(sname);
1413 map<string, int>::iterator it = firstSeqNames.find(sname);
1415 if(it != firstSeqNames.end()) { //this is the start of a new chunk
1416 unsigned long long pos = inQual.tellg();
1417 qfileFilePos.push_back(pos - input.length() - 1);
1418 firstSeqNames.erase(it);
1423 if (firstSeqNames.size() == 0) { break; }
1428 if (firstSeqNames.size() != 0) {
1429 for (map<string, int>::iterator it = firstSeqNames.begin(); it != firstSeqNames.end(); it++) {
1430 m->mothurOut(it->first + " is in your fasta file and not in your quality file, not using quality file."); m->mothurOutEndLine();
1436 //get last file position of qfile
1438 unsigned long long size;
1440 //get num bytes in file
1441 pFile = fopen (qfilename.c_str(),"rb");
1442 if (pFile==NULL) perror ("Error opening file");
1444 fseek (pFile, 0, SEEK_END);
1449 qfileFilePos.push_back(size);
1452 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
1453 if (m->debug) { m->mothurOut("[DEBUG]: " + toString(i) +'\t' + toString(fastaFilePos[i]) + '\t' + toString(fastaFilePos[i+1]) + '\n'); }
1454 lines.push_back(linePair(fastaFilePos[i], fastaFilePos[(i+1)]));
1455 if (qfilename != "") { qLines.push_back(linePair(qfileFilePos[i], qfileFilePos[(i+1)])); }
1457 if(qfilename == "") { qLines = lines; } //files with duds
1463 if (processors == 1) { //save time
1464 //fastaFilePos.push_back(0); qfileFilePos.push_back(0);
1465 //fastaFilePos.push_back(1000); qfileFilePos.push_back(1000);
1466 lines.push_back(linePair(0, 1000));
1467 if (qfilename != "") { qLines.push_back(linePair(0, 1000)); }
1469 int numFastaSeqs = 0;
1470 fastaFilePos = m->setFilePosFasta(filename, numFastaSeqs);
1471 if (fastaFilePos.size() < processors) { processors = fastaFilePos.size(); }
1473 if (qfilename != "") {
1474 int numQualSeqs = 0;
1475 qfileFilePos = m->setFilePosFasta(qfilename, numQualSeqs);
1477 if (numFastaSeqs != numQualSeqs) {
1478 m->mothurOut("[ERROR]: You have " + toString(numFastaSeqs) + " sequences in your fasta file, but " + toString(numQualSeqs) + " sequences in your quality file."); m->mothurOutEndLine(); m->control_pressed = true;
1482 //figure out how many sequences you have to process
1483 int numSeqsPerProcessor = numFastaSeqs / processors;
1484 for (int i = 0; i < processors; i++) {
1485 int startIndex = i * numSeqsPerProcessor;
1486 if(i == (processors - 1)){ numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor; }
1487 lines.push_back(linePair(fastaFilePos[startIndex], numSeqsPerProcessor));
1488 if (qfilename != "") { qLines.push_back(linePair(qfileFilePos[startIndex], numSeqsPerProcessor)); }
1491 if(qfilename == "") { qLines = lines; } //files with duds
1496 catch(exception& e) {
1497 m->errorOut(e, "TrimSeqsCommand", "setLines");
1502 //***************************************************************************************************************
1504 bool TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<vector<string> >& qualFileNames, vector<vector<string> >& nameFileNames){
1507 m->openInputFile(oligoFile, inOligos);
1511 string type, oligo, roligo, group;
1512 bool hasPrimer = false; bool hasPairedBarcodes = false;
1514 int indexPrimer = 0;
1515 int indexBarcode = 0;
1516 int indexPairedPrimer = 0;
1517 int indexPairedBarcode = 0;
1518 set<string> uniquePrimers;
1519 set<string> uniqueBarcodes;
1521 while(!inOligos.eof()){
1525 if (m->debug) { m->mothurOut("[DEBUG]: reading type - " + type + ".\n"); }
1528 while (!inOligos.eof()) { char c = inOligos.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there
1529 m->gobble(inOligos);
1532 m->gobble(inOligos);
1533 //make type case insensitive
1534 for(int i=0;i<type.length();i++){ type[i] = toupper(type[i]); }
1538 if (m->debug) { m->mothurOut("[DEBUG]: reading - " + oligo + ".\n"); }
1540 for(int i=0;i<oligo.length();i++){
1541 oligo[i] = toupper(oligo[i]);
1542 if(oligo[i] == 'U') { oligo[i] = 'T'; }
1545 if(type == "FORWARD"){
1548 // get rest of line in case there is a primer name
1549 while (!inOligos.eof()) {
1550 char c = inOligos.get();
1551 if (c == 10 || c == 13 || c == -1){ break; }
1552 else if (c == 32 || c == 9){;} //space or tab
1553 else { group += c; }
1556 //check for repeat barcodes
1557 map<string, int>::iterator itPrime = primers.find(oligo);
1558 if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); }
1560 if (m->debug) { if (group != "") { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); }else{ m->mothurOut("[DEBUG]: no group for primer " + oligo + ".\n"); } }
1562 primers[oligo]=indexPrimer; indexPrimer++;
1563 primerNameVector.push_back(group);
1565 else if (type == "PRIMER"){
1566 m->gobble(inOligos);
1570 for(int i=0;i<roligo.length();i++){
1571 roligo[i] = toupper(roligo[i]);
1572 if(roligo[i] == 'U') { roligo[i] = 'T'; }
1574 roligo = reverseOligo(roligo);
1578 // get rest of line in case there is a primer name
1579 while (!inOligos.eof()) {
1580 char c = inOligos.get();
1581 if (c == 10 || c == 13 || c == -1){ break; }
1582 else if (c == 32 || c == 9){;} //space or tab
1583 else { group += c; }
1586 oligosPair newPrimer(oligo, roligo);
1588 if (m->debug) { m->mothurOut("[DEBUG]: primer pair " + newPrimer.forward + " " + newPrimer.reverse + ", and group = " + group + ".\n"); }
1590 //check for repeat barcodes
1591 string tempPair = oligo+roligo;
1592 if (uniquePrimers.count(tempPair) != 0) { m->mothurOut("primer pair " + newPrimer.forward + " " + newPrimer.reverse + " is in your oligos file already."); m->mothurOutEndLine(); }
1593 else { uniquePrimers.insert(tempPair); }
1595 if (m->debug) { if (group != "") { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); }else{ m->mothurOut("[DEBUG]: no group for primer pair " + newPrimer.forward + " " + newPrimer.reverse + ".\n"); } }
1597 pairedPrimers[indexPairedPrimer]=newPrimer; indexPairedPrimer++;
1598 primerNameVector.push_back(group);
1601 else if(type == "REVERSE"){
1602 //Sequence oligoRC("reverse", oligo);
1603 //oligoRC.reverseComplement();
1604 string oligoRC = reverseOligo(oligo);
1605 revPrimer.push_back(oligoRC);
1607 else if(type == "BARCODE"){
1610 //barcode lines can look like BARCODE atgcatgc groupName - for 454 seqs
1611 //or BARCODE atgcatgc atgcatgc groupName - for illumina data that has forward and reverse info
1614 while (!inOligos.eof()) {
1615 char c = inOligos.get();
1616 if (c == 10 || c == 13 || c == -1){ break; }
1617 else if (c == 32 || c == 9){;} //space or tab
1621 //then this is illumina data with 4 columns
1623 hasPairedBarcodes = true;
1624 string reverseBarcode = group; //reverseOligo(group); //reverse barcode
1627 for(int i=0;i<reverseBarcode.length();i++){
1628 reverseBarcode[i] = toupper(reverseBarcode[i]);
1629 if(reverseBarcode[i] == 'U') { reverseBarcode[i] = 'T'; }
1632 reverseBarcode = reverseOligo(reverseBarcode);
1633 oligosPair newPair(oligo, reverseBarcode);
1635 if (m->debug) { m->mothurOut("[DEBUG]: barcode pair " + newPair.forward + " " + newPair.reverse + ", and group = " + group + ".\n"); }
1637 //check for repeat barcodes
1638 string tempPair = oligo+reverseBarcode;
1639 if (uniqueBarcodes.count(tempPair) != 0) { m->mothurOut("barcode pair " + newPair.forward + " " + newPair.reverse + " is in your oligos file already, disregarding."); m->mothurOutEndLine(); }
1640 else { uniqueBarcodes.insert(tempPair); }
1642 pairedBarcodes[indexPairedBarcode]=newPair; indexPairedBarcode++;
1643 barcodeNameVector.push_back(group);
1645 //check for repeat barcodes
1646 map<string, int>::iterator itBar = barcodes.find(oligo);
1647 if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); }
1649 barcodes[oligo]=indexBarcode; indexBarcode++;
1650 barcodeNameVector.push_back(group);
1652 }else if(type == "LINKER"){
1653 linker.push_back(oligo);
1654 }else if(type == "SPACER"){
1655 spacer.push_back(oligo);
1657 else{ m->mothurOut("[WARNING]: " + type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); }
1659 m->gobble(inOligos);
1663 if (hasPairedBarcodes || hasPrimer) {
1664 pairedOligos = true;
1665 if ((primers.size() != 0) || (barcodes.size() != 0) || (linker.size() != 0) || (spacer.size() != 0) || (revPrimer.size() != 0)) { m->control_pressed = true; m->mothurOut("[ERROR]: cannot mix paired primers and barcodes with non paired or linkers and spacers, quitting."); m->mothurOutEndLine(); return 0; }
1668 if(barcodeNameVector.size() == 0 && primerNameVector[0] == ""){ allFiles = 0; }
1670 //add in potential combos
1671 if(barcodeNameVector.size() == 0){
1673 barcodeNameVector.push_back("");
1676 if(primerNameVector.size() == 0){
1678 primerNameVector.push_back("");
1681 fastaFileNames.resize(barcodeNameVector.size());
1682 for(int i=0;i<fastaFileNames.size();i++){
1683 fastaFileNames[i].assign(primerNameVector.size(), "");
1685 if(qFileName != "") { qualFileNames = fastaFileNames; }
1686 if(nameFile != "") { nameFileNames = fastaFileNames; }
1689 set<string> uniqueNames; //used to cleanup outputFileNames
1691 for(map<int, oligosPair>::iterator itBar = pairedBarcodes.begin();itBar != pairedBarcodes.end();itBar++){
1692 for(map<int, oligosPair>::iterator itPrimer = pairedPrimers.begin();itPrimer != pairedPrimers.end(); itPrimer++){
1694 string primerName = primerNameVector[itPrimer->first];
1695 string barcodeName = barcodeNameVector[itBar->first];
1697 if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing
1699 string comboGroupName = "";
1700 string fastaFileName = "";
1701 string qualFileName = "";
1702 string nameFileName = "";
1703 string countFileName = "";
1705 if(primerName == ""){
1706 comboGroupName = barcodeNameVector[itBar->first];
1709 if(barcodeName == ""){
1710 comboGroupName = primerNameVector[itPrimer->first];
1713 comboGroupName = barcodeNameVector[itBar->first] + "." + primerNameVector[itPrimer->first];
1719 map<string, string> variables;
1720 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFile));
1721 variables["[tag]"] = comboGroupName;
1722 fastaFileName = getOutputFileName("fasta", variables);
1723 if (uniqueNames.count(fastaFileName) == 0) {
1724 outputNames.push_back(fastaFileName);
1725 outputTypes["fasta"].push_back(fastaFileName);
1726 uniqueNames.insert(fastaFileName);
1729 fastaFileNames[itBar->first][itPrimer->first] = fastaFileName;
1730 m->openOutputFile(fastaFileName, temp); temp.close();
1732 if(qFileName != ""){
1733 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(qFileName));
1734 qualFileName = getOutputFileName("qfile", variables);
1735 if (uniqueNames.count(qualFileName) == 0) {
1736 outputNames.push_back(qualFileName);
1737 outputTypes["qfile"].push_back(qualFileName);
1740 qualFileNames[itBar->first][itPrimer->first] = qualFileName;
1741 m->openOutputFile(qualFileName, temp); temp.close();
1745 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(nameFile));
1746 nameFileName = getOutputFileName("name", variables);
1747 if (uniqueNames.count(nameFileName) == 0) {
1748 outputNames.push_back(nameFileName);
1749 outputTypes["name"].push_back(nameFileName);
1752 nameFileNames[itBar->first][itPrimer->first] = nameFileName;
1753 m->openOutputFile(nameFileName, temp); temp.close();
1759 for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
1760 for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
1762 string primerName = primerNameVector[itPrimer->second];
1763 string barcodeName = barcodeNameVector[itBar->second];
1765 if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing
1767 string comboGroupName = "";
1768 string fastaFileName = "";
1769 string qualFileName = "";
1770 string nameFileName = "";
1771 string countFileName = "";
1773 if(primerName == ""){
1774 comboGroupName = barcodeNameVector[itBar->second];
1777 if(barcodeName == ""){
1778 comboGroupName = primerNameVector[itPrimer->second];
1781 comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second];
1787 map<string, string> variables;
1788 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFile));
1789 variables["[tag]"] = comboGroupName;
1790 fastaFileName = getOutputFileName("fasta", variables);
1791 if (uniqueNames.count(fastaFileName) == 0) {
1792 outputNames.push_back(fastaFileName);
1793 outputTypes["fasta"].push_back(fastaFileName);
1794 uniqueNames.insert(fastaFileName);
1797 fastaFileNames[itBar->second][itPrimer->second] = fastaFileName;
1798 m->openOutputFile(fastaFileName, temp); temp.close();
1800 if(qFileName != ""){
1801 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(qFileName));
1802 qualFileName = getOutputFileName("qfile", variables);
1803 if (uniqueNames.count(qualFileName) == 0) {
1804 outputNames.push_back(qualFileName);
1805 outputTypes["qfile"].push_back(qualFileName);
1808 qualFileNames[itBar->second][itPrimer->second] = qualFileName;
1809 m->openOutputFile(qualFileName, temp); temp.close();
1813 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(nameFile));
1814 nameFileName = getOutputFileName("name", variables);
1815 if (uniqueNames.count(nameFileName) == 0) {
1816 outputNames.push_back(nameFileName);
1817 outputTypes["name"].push_back(nameFileName);
1820 nameFileNames[itBar->second][itPrimer->second] = nameFileName;
1821 m->openOutputFile(nameFileName, temp); temp.close();
1828 numFPrimers = primers.size();
1829 if (pairedOligos) { numFPrimers = pairedPrimers.size(); }
1830 numRPrimers = revPrimer.size();
1831 numLinkers = linker.size();
1832 numSpacers = spacer.size();
1834 bool allBlank = true;
1835 for (int i = 0; i < barcodeNameVector.size(); i++) {
1836 if (barcodeNameVector[i] != "") {
1841 for (int i = 0; i < primerNameVector.size(); i++) {
1842 if (primerNameVector[i] != "") {
1849 m->mothurOut("[WARNING]: your oligos file does not contain any group names. mothur will not create a groupfile."); m->mothurOutEndLine();
1857 catch(exception& e) {
1858 m->errorOut(e, "TrimSeqsCommand", "getOligos");
1862 //***************************************************************************************************************
1864 bool TrimSeqsCommand::keepFirstTrim(Sequence& sequence, QualityScores& qscores){
1867 if(qscores.getName() != ""){
1868 qscores.trimQScores(-1, keepFirst);
1871 // sequence.printSequence(cout);cout << endl;
1873 sequence.trim(keepFirst);
1875 // sequence.printSequence(cout);cout << endl << endl;;
1879 catch(exception& e) {
1880 m->errorOut(e, "keepFirstTrim", "countDiffs");
1886 //***************************************************************************************************************
1888 bool TrimSeqsCommand::removeLastTrim(Sequence& sequence, QualityScores& qscores){
1892 int length = sequence.getNumBases() - removeLast;
1895 if(qscores.getName() != ""){
1896 qscores.trimQScores(-1, length);
1898 sequence.trim(length);
1907 catch(exception& e) {
1908 m->errorOut(e, "removeLastTrim", "countDiffs");
1914 //***************************************************************************************************************
1916 bool TrimSeqsCommand::cullLength(Sequence& seq){
1919 int length = seq.getNumBases();
1920 bool success = 0; //guilty until proven innocent
1922 if(length >= minLength && maxLength == 0) { success = 1; }
1923 else if(length >= minLength && length <= maxLength) { success = 1; }
1924 else { success = 0; }
1929 catch(exception& e) {
1930 m->errorOut(e, "TrimSeqsCommand", "cullLength");
1936 //***************************************************************************************************************
1938 bool TrimSeqsCommand::cullHomoP(Sequence& seq){
1940 int longHomoP = seq.getLongHomoPolymer();
1941 bool success = 0; //guilty until proven innocent
1943 if(longHomoP <= maxHomoP){ success = 1; }
1944 else { success = 0; }
1948 catch(exception& e) {
1949 m->errorOut(e, "TrimSeqsCommand", "cullHomoP");
1954 //********************************************************************/
1955 string TrimSeqsCommand::reverseOligo(string oligo){
1957 string reverse = "";
1959 for(int i=oligo.length()-1;i>=0;i--){
1961 if(oligo[i] == 'A') { reverse += 'T'; }
1962 else if(oligo[i] == 'T'){ reverse += 'A'; }
1963 else if(oligo[i] == 'U'){ reverse += 'A'; }
1965 else if(oligo[i] == 'G'){ reverse += 'C'; }
1966 else if(oligo[i] == 'C'){ reverse += 'G'; }
1968 else if(oligo[i] == 'R'){ reverse += 'Y'; }
1969 else if(oligo[i] == 'Y'){ reverse += 'R'; }
1971 else if(oligo[i] == 'M'){ reverse += 'K'; }
1972 else if(oligo[i] == 'K'){ reverse += 'M'; }
1974 else if(oligo[i] == 'W'){ reverse += 'W'; }
1975 else if(oligo[i] == 'S'){ reverse += 'S'; }
1977 else if(oligo[i] == 'B'){ reverse += 'V'; }
1978 else if(oligo[i] == 'V'){ reverse += 'B'; }
1980 else if(oligo[i] == 'D'){ reverse += 'H'; }
1981 else if(oligo[i] == 'H'){ reverse += 'D'; }
1983 else { reverse += 'N'; }
1989 catch(exception& e) {
1990 m->errorOut(e, "TrimSeqsCommand", "reverseOligo");
1995 //***************************************************************************************************************
1997 bool TrimSeqsCommand::cullAmbigs(Sequence& seq){
1999 int numNs = seq.getAmbigBases();
2000 bool success = 0; //guilty until proven innocent
2002 if(numNs <= maxAmbig) { success = 1; }
2003 else { success = 0; }
2007 catch(exception& e) {
2008 m->errorOut(e, "TrimSeqsCommand", "cullAmbigs");
2013 //***************************************************************************************************************