5 * Created by Pat Schloss on 6/6/09.
6 * Copyright 2009 Patrick D. Schloss. All rights reserved.
10 #include "trimseqscommand.h"
11 #include "needlemanoverlap.hpp"
12 #include "trimoligos.h"
15 //**********************************************************************************************************************
16 vector<string> TrimSeqsCommand::setParameters(){
18 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","fasta",false,true,true); parameters.push_back(pfasta);
19 CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none","group",false,false,true); parameters.push_back(poligos);
20 CommandParameter pqfile("qfile", "InputTypes", "", "", "none", "none", "none","qfile",false,false,true); parameters.push_back(pqfile);
21 CommandParameter pname("name", "InputTypes", "", "", "namecount", "none", "none","name",false,false,true); parameters.push_back(pname);
22 CommandParameter pcount("count", "InputTypes", "", "", "namecount", "none", "none","count",false,false,true); parameters.push_back(pcount);
23 CommandParameter pflip("flip", "Boolean", "", "F", "", "", "","",false,false,true); parameters.push_back(pflip);
24 CommandParameter preorient("checkorient", "Boolean", "", "F", "", "", "","",false,false,true); parameters.push_back(preorient);
25 CommandParameter pmaxambig("maxambig", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(pmaxambig);
26 CommandParameter pmaxhomop("maxhomop", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pmaxhomop);
27 CommandParameter pminlength("minlength", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pminlength);
28 CommandParameter pmaxlength("maxlength", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pmaxlength);
29 CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(ppdiffs);
30 CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(pbdiffs);
31 CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pldiffs);
32 CommandParameter psdiffs("sdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(psdiffs);
33 CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ptdiffs);
34 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
35 CommandParameter pallfiles("allfiles", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pallfiles);
36 CommandParameter pkeepforward("keepforward", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pkeepforward);
37 CommandParameter pqtrim("qtrim", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pqtrim);
38 CommandParameter pqthreshold("qthreshold", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pqthreshold);
39 CommandParameter pqaverage("qaverage", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pqaverage);
40 CommandParameter prollaverage("rollaverage", "Number", "", "0", "", "", "","",false,false); parameters.push_back(prollaverage);
41 CommandParameter pqwindowaverage("qwindowaverage", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pqwindowaverage);
42 CommandParameter pqstepsize("qstepsize", "Number", "", "1", "", "", "","",false,false); parameters.push_back(pqstepsize);
43 CommandParameter pqwindowsize("qwindowsize", "Number", "", "50", "", "", "","",false,false); parameters.push_back(pqwindowsize);
44 CommandParameter pkeepfirst("keepfirst", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pkeepfirst);
45 CommandParameter premovelast("removelast", "Number", "", "0", "", "", "","",false,false); parameters.push_back(premovelast);
46 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
47 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
49 vector<string> myArray;
50 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
54 m->errorOut(e, "TrimSeqsCommand", "setParameters");
58 //**********************************************************************************************************************
59 string TrimSeqsCommand::getHelpString(){
61 string helpString = "";
62 helpString += "The trim.seqs command reads a fastaFile and creates 2 new fasta files, .trim.fasta and scrap.fasta, as well as group files if you provide and oligos file.\n";
63 helpString += "The .trim.fasta contains sequences that meet your requirements, and the .scrap.fasta contains those which don't.\n";
64 helpString += "The trim.seqs command parameters are fasta, name, count, flip, checkorient, oligos, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim, keepfirst, removelast and allfiles.\n";
65 helpString += "The fasta parameter is required.\n";
66 helpString += "The flip parameter will output the reverse compliment of your trimmed sequence. The default is false.\n";
67 helpString += "The checkorient parameter will check the reverse compliment of the sequence if the barcodes and primers cannot be found in the forward. The default is false.\n";
68 helpString += "The oligos parameter allows you to provide an oligos file.\n";
69 helpString += "The name parameter allows you to provide a names file with your fasta file.\n";
70 helpString += "The count parameter allows you to provide a count file with your fasta file.\n";
71 helpString += "The maxambig parameter allows you to set the maximum number of ambigious bases allowed. The default is -1.\n";
72 helpString += "The maxhomop parameter allows you to set a maximum homopolymer length. \n";
73 helpString += "The minlength parameter allows you to set and minimum sequence length. \n";
74 helpString += "The maxlength parameter allows you to set and maximum sequence length. \n";
75 helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n";
76 helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";
77 helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
78 helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n";
79 helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n";
80 helpString += "The qfile parameter allows you to provide a quality file.\n";
81 helpString += "The qthreshold parameter allows you to set a minimum quality score allowed. \n";
82 helpString += "The qaverage parameter allows you to set a minimum average quality score allowed. \n";
83 helpString += "The qwindowsize parameter allows you to set a number of bases in a window. Default=50.\n";
84 helpString += "The qwindowaverage parameter allows you to set a minimum average quality score allowed over a window. \n";
85 helpString += "The rollaverage parameter allows you to set a minimum rolling average quality score allowed over a window. \n";
86 helpString += "The qstepsize parameter allows you to set a number of bases to move the window over. Default=1.\n";
87 helpString += "The allfiles parameter will create separate group and fasta file for each grouping. The default is F.\n";
88 helpString += "The keepforward parameter allows you to indicate whether you want the forward primer removed or not. The default is F, meaning remove the forward primer.\n";
89 helpString += "The qtrim parameter will trim sequence from the point that they fall below the qthreshold and put it in the .trim file if set to true. The default is T.\n";
90 helpString += "The keepfirst parameter trims the sequence to the first keepfirst number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements. \n";
91 helpString += "The removelast removes the last removelast number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements.\n";
92 helpString += "The trim.seqs command should be in the following format: \n";
93 helpString += "trim.seqs(fasta=yourFastaFile, flip=yourFlip, oligos=yourOligos, maxambig=yourMaxambig, \n";
94 helpString += "maxhomop=yourMaxhomop, minlength=youMinlength, maxlength=yourMaxlength) \n";
95 helpString += "Example trim.seqs(fasta=abrecovery.fasta, flip=..., oligos=..., maxambig=..., maxhomop=..., minlength=..., maxlength=...).\n";
96 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
97 helpString += "For more details please check out the wiki http://www.mothur.org/wiki/Trim.seqs .\n";
100 catch(exception& e) {
101 m->errorOut(e, "TrimSeqsCommand", "getHelpString");
105 //**********************************************************************************************************************
106 string TrimSeqsCommand::getOutputPattern(string type) {
110 if (type == "qfile") { pattern = "[filename],[tag],qual"; }
111 else if (type == "fasta") { pattern = "[filename],[tag],fasta"; }
112 else if (type == "group") { pattern = "[filename],groups"; }
113 else if (type == "name") { pattern = "[filename],[tag],names"; }
114 else if (type == "count") { pattern = "[filename],[tag],count_table-[filename],count_table"; }
115 else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
119 catch(exception& e) {
120 m->errorOut(e, "TrimSeqsCommand", "getOutputPattern");
124 //**********************************************************************************************************************
126 TrimSeqsCommand::TrimSeqsCommand(){
128 abort = true; calledHelp = true;
130 vector<string> tempOutNames;
131 outputTypes["fasta"] = tempOutNames;
132 outputTypes["qfile"] = tempOutNames;
133 outputTypes["group"] = tempOutNames;
134 outputTypes["name"] = tempOutNames;
135 outputTypes["count"] = tempOutNames;
137 catch(exception& e) {
138 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
142 //***************************************************************************************************************
144 TrimSeqsCommand::TrimSeqsCommand(string option) {
147 abort = false; calledHelp = false;
150 //allow user to run help
151 if(option == "help") { help(); abort = true; calledHelp = true; }
152 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
155 vector<string> myArray = setParameters();
157 OptionParser parser(option);
158 map<string,string> parameters = parser.getParameters();
160 ValidParameters validParameter;
161 map<string,string>::iterator it;
163 //check to make sure all parameters are valid for command
164 for (it = parameters.begin(); it != parameters.end(); it++) {
165 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
168 //initialize outputTypes
169 vector<string> tempOutNames;
170 outputTypes["fasta"] = tempOutNames;
171 outputTypes["qfile"] = tempOutNames;
172 outputTypes["group"] = tempOutNames;
173 outputTypes["name"] = tempOutNames;
174 outputTypes["count"] = tempOutNames;
176 //if the user changes the input directory command factory will send this info to us in the output parameter
177 string inputDir = validParameter.validFile(parameters, "inputdir", false);
178 if (inputDir == "not found"){ inputDir = ""; }
181 it = parameters.find("fasta");
182 //user has given a template file
183 if(it != parameters.end()){
184 path = m->hasPath(it->second);
185 //if the user has not given a path then, add inputdir. else leave path alone.
186 if (path == "") { parameters["fasta"] = inputDir + it->second; }
189 it = parameters.find("oligos");
190 //user has given a template file
191 if(it != parameters.end()){
192 path = m->hasPath(it->second);
193 //if the user has not given a path then, add inputdir. else leave path alone.
194 if (path == "") { parameters["oligos"] = inputDir + it->second; }
197 it = parameters.find("qfile");
198 //user has given a template file
199 if(it != parameters.end()){
200 path = m->hasPath(it->second);
201 //if the user has not given a path then, add inputdir. else leave path alone.
202 if (path == "") { parameters["qfile"] = inputDir + it->second; }
205 it = parameters.find("name");
206 //user has given a template file
207 if(it != parameters.end()){
208 path = m->hasPath(it->second);
209 //if the user has not given a path then, add inputdir. else leave path alone.
210 if (path == "") { parameters["name"] = inputDir + it->second; }
213 it = parameters.find("count");
214 //user has given a template file
215 if(it != parameters.end()){
216 path = m->hasPath(it->second);
217 //if the user has not given a path then, add inputdir. else leave path alone.
218 if (path == "") { parameters["count"] = inputDir + it->second; }
224 //check for required parameters
225 fastaFile = validParameter.validFile(parameters, "fasta", true);
226 if (fastaFile == "not found") {
227 fastaFile = m->getFastaFile();
228 if (fastaFile != "") { m->mothurOut("Using " + fastaFile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
229 else { m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
230 }else if (fastaFile == "not open") { abort = true; }
231 else { m->setFastaFile(fastaFile); }
233 //if the user changes the output directory command factory will send this info to us in the output parameter
234 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
236 outputDir += m->hasPath(fastaFile); //if user entered a file with a path then preserve it
240 //check for optional parameter and set defaults
241 // ...at some point should added some additional type checking...
243 temp = validParameter.validFile(parameters, "flip", false);
244 if (temp == "not found") { flip = 0; }
245 else { flip = m->isTrue(temp); }
247 temp = validParameter.validFile(parameters, "oligos", true);
248 if (temp == "not found"){ oligoFile = ""; }
249 else if(temp == "not open"){ abort = true; }
250 else { oligoFile = temp; m->setOligosFile(oligoFile); }
253 temp = validParameter.validFile(parameters, "maxambig", false); if (temp == "not found") { temp = "-1"; }
254 m->mothurConvert(temp, maxAmbig);
256 temp = validParameter.validFile(parameters, "maxhomop", false); if (temp == "not found") { temp = "0"; }
257 m->mothurConvert(temp, maxHomoP);
259 temp = validParameter.validFile(parameters, "minlength", false); if (temp == "not found") { temp = "0"; }
260 m->mothurConvert(temp, minLength);
262 temp = validParameter.validFile(parameters, "maxlength", false); if (temp == "not found") { temp = "0"; }
263 m->mothurConvert(temp, maxLength);
265 temp = validParameter.validFile(parameters, "bdiffs", false); if (temp == "not found") { temp = "0"; }
266 m->mothurConvert(temp, bdiffs);
268 temp = validParameter.validFile(parameters, "pdiffs", false); if (temp == "not found") { temp = "0"; }
269 m->mothurConvert(temp, pdiffs);
271 temp = validParameter.validFile(parameters, "ldiffs", false); if (temp == "not found") { temp = "0"; }
272 m->mothurConvert(temp, ldiffs);
274 temp = validParameter.validFile(parameters, "sdiffs", false); if (temp == "not found") { temp = "0"; }
275 m->mothurConvert(temp, sdiffs);
277 temp = validParameter.validFile(parameters, "tdiffs", false); if (temp == "not found") { int tempTotal = pdiffs + bdiffs + ldiffs + sdiffs; temp = toString(tempTotal); }
278 m->mothurConvert(temp, tdiffs);
280 if(tdiffs == 0){ tdiffs = bdiffs + pdiffs + ldiffs + sdiffs; }
282 temp = validParameter.validFile(parameters, "qfile", true);
283 if (temp == "not found") { qFileName = ""; }
284 else if(temp == "not open") { abort = true; }
285 else { qFileName = temp; m->setQualFile(qFileName); }
287 temp = validParameter.validFile(parameters, "name", true);
288 if (temp == "not found") { nameFile = ""; }
289 else if(temp == "not open") { nameFile = ""; abort = true; }
290 else { nameFile = temp; m->setNameFile(nameFile); }
292 countfile = validParameter.validFile(parameters, "count", true);
293 if (countfile == "not open") { abort = true; countfile = ""; }
294 else if (countfile == "not found") { countfile = ""; }
295 else { m->setCountTableFile(countfile); }
297 if ((countfile != "") && (nameFile != "")) { m->mothurOut("You must enter ONLY ONE of the following: count or name."); m->mothurOutEndLine(); abort = true; }
299 temp = validParameter.validFile(parameters, "qthreshold", false); if (temp == "not found") { temp = "0"; }
300 m->mothurConvert(temp, qThreshold);
302 temp = validParameter.validFile(parameters, "qtrim", false); if (temp == "not found") { temp = "t"; }
303 qtrim = m->isTrue(temp);
305 temp = validParameter.validFile(parameters, "rollaverage", false); if (temp == "not found") { temp = "0"; }
306 convert(temp, qRollAverage);
308 temp = validParameter.validFile(parameters, "qwindowaverage", false);if (temp == "not found") { temp = "0"; }
309 convert(temp, qWindowAverage);
311 temp = validParameter.validFile(parameters, "qwindowsize", false); if (temp == "not found") { temp = "50"; }
312 convert(temp, qWindowSize);
314 temp = validParameter.validFile(parameters, "qstepsize", false); if (temp == "not found") { temp = "1"; }
315 convert(temp, qWindowStep);
317 temp = validParameter.validFile(parameters, "qaverage", false); if (temp == "not found") { temp = "0"; }
318 convert(temp, qAverage);
320 temp = validParameter.validFile(parameters, "keepfirst", false); if (temp == "not found") { temp = "0"; }
321 convert(temp, keepFirst);
323 temp = validParameter.validFile(parameters, "removelast", false); if (temp == "not found") { temp = "0"; }
324 convert(temp, removeLast);
326 temp = validParameter.validFile(parameters, "allfiles", false); if (temp == "not found") { temp = "F"; }
327 allFiles = m->isTrue(temp);
329 temp = validParameter.validFile(parameters, "keepforward", false); if (temp == "not found") { temp = "F"; }
330 keepforward = m->isTrue(temp);
332 temp = validParameter.validFile(parameters, "checkorient", false); if (temp == "not found") { temp = "F"; }
333 reorient = m->isTrue(temp);
335 temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
336 m->setProcessors(temp);
337 m->mothurConvert(temp, processors);
340 if(allFiles && (oligoFile == "")){
341 m->mothurOut("You selected allfiles, but didn't enter an oligos. Ignoring the allfiles request."); m->mothurOutEndLine();
343 if((qAverage != 0 && qThreshold != 0) && qFileName == ""){
344 m->mothurOut("You didn't provide a quality file name, quality criteria will be ignored."); m->mothurOutEndLine();
348 if(!flip && oligoFile=="" && !maxLength && !minLength && (maxAmbig==-1) && !maxHomoP && qFileName == ""){
349 m->mothurOut("You didn't set any options... quiting command."); m->mothurOutEndLine();
353 if (countfile == "") {
354 if (nameFile == "") {
355 vector<string> files; files.push_back(fastaFile);
356 parser.getNameFile(files);
362 catch(exception& e) {
363 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
367 //***************************************************************************************************************
369 int TrimSeqsCommand::execute(){
372 if (abort == true) { if (calledHelp) { return 0; } return 2; }
374 pairedOligos = false;
375 numFPrimers = 0; //this needs to be initialized
380 vector<vector<string> > fastaFileNames;
381 vector<vector<string> > qualFileNames;
382 vector<vector<string> > nameFileNames;
384 map<string, string> variables;
385 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFile));
386 variables["[tag]"] = "trim";
387 string trimSeqFile = getOutputFileName("fasta",variables);
388 string trimQualFile = getOutputFileName("qfile",variables);
389 outputNames.push_back(trimSeqFile); outputTypes["fasta"].push_back(trimSeqFile);
391 variables["[tag]"] = "scrap";
392 string scrapSeqFile = getOutputFileName("fasta",variables);
393 string scrapQualFile = getOutputFileName("qfile",variables);
394 outputNames.push_back(scrapSeqFile); outputTypes["fasta"].push_back(scrapSeqFile);
396 if (qFileName != "") {
397 outputNames.push_back(trimQualFile);
398 outputNames.push_back(scrapQualFile);
399 outputTypes["qfile"].push_back(trimQualFile);
400 outputTypes["qfile"].push_back(scrapQualFile);
403 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(nameFile));
404 variables["[tag]"] = "trim";
405 string trimNameFile = getOutputFileName("name",variables);
406 variables["[tag]"] = "scrap";
407 string scrapNameFile = getOutputFileName("name",variables);
409 if (nameFile != "") {
410 m->readNames(nameFile, nameMap);
411 outputNames.push_back(trimNameFile);
412 outputNames.push_back(scrapNameFile);
413 outputTypes["name"].push_back(trimNameFile);
414 outputTypes["name"].push_back(scrapNameFile);
417 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(countfile));
418 variables["[tag]"] = "trim";
419 string trimCountFile = getOutputFileName("count",variables);
420 variables["[tag]"] = "scrap";
421 string scrapCountFile = getOutputFileName("count",variables);
423 if (countfile != "") {
425 ct.readTable(countfile, true);
426 nameCount = ct.getNameMap();
427 outputNames.push_back(trimCountFile);
428 outputNames.push_back(scrapCountFile);
429 outputTypes["count"].push_back(trimCountFile);
430 outputTypes["count"].push_back(scrapCountFile);
434 if (m->control_pressed) { return 0; }
436 string outputGroupFileName;
438 createGroup = getOligos(fastaFileNames, qualFileNames, nameFileNames);
439 if ((createGroup) && (countfile == "")){
440 map<string, string> myvariables;
441 myvariables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFile));
442 outputGroupFileName = getOutputFileName("group",myvariables);
443 outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName);
447 if (!pairedOligos) { if (reorient) { m->mothurOut("[WARNING]: You cannot use reorient without paired barcodes or primers, skipping."); m->mothurOutEndLine(); reorient = false; } }
449 if (m->control_pressed) { return 0; }
451 //fills lines and qlines
452 setLines(fastaFile, qFileName);
455 driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, trimCountFile, scrapCountFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
457 createProcessesCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, trimCountFile, scrapCountFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames);
461 if (m->control_pressed) { return 0; }
464 map<string, string> uniqueFastaNames;// so we don't add the same groupfile multiple times
465 map<string, string>::iterator it;
466 set<string> namesToRemove;
467 for(int i=0;i<fastaFileNames.size();i++){
468 for(int j=0;j<fastaFileNames[0].size();j++){
469 if (fastaFileNames[i][j] != "") {
470 if (namesToRemove.count(fastaFileNames[i][j]) == 0) {
471 if(m->isBlank(fastaFileNames[i][j])){
472 m->mothurRemove(fastaFileNames[i][j]);
473 namesToRemove.insert(fastaFileNames[i][j]);
476 m->mothurRemove(qualFileNames[i][j]);
477 namesToRemove.insert(qualFileNames[i][j]);
481 m->mothurRemove(nameFileNames[i][j]);
482 namesToRemove.insert(nameFileNames[i][j]);
485 it = uniqueFastaNames.find(fastaFileNames[i][j]);
486 if (it == uniqueFastaNames.end()) {
487 uniqueFastaNames[fastaFileNames[i][j]] = barcodeNameVector[i];
495 //remove names for outputFileNames, just cleans up the output
496 vector<string> outputNames2;
497 for(int i = 0; i < outputNames.size(); i++) { if (namesToRemove.count(outputNames[i]) == 0) { outputNames2.push_back(outputNames[i]); } }
498 outputNames = outputNames2;
500 for (it = uniqueFastaNames.begin(); it != uniqueFastaNames.end(); it++) {
502 m->openInputFile(it->first, in);
505 map<string, string> myvariables;
506 myvariables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(it->first));
507 string thisGroupName = "";
508 if (countfile == "") { thisGroupName = getOutputFileName("group",myvariables); outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName); }
509 else { thisGroupName = getOutputFileName("count",myvariables); outputNames.push_back(thisGroupName); outputTypes["count"].push_back(thisGroupName); }
510 m->openOutputFile(thisGroupName, out);
512 if (countfile != "") { out << "Representative_Sequence\ttotal\t" << it->second << endl; }
515 if (m->control_pressed) { break; }
517 Sequence currSeq(in); m->gobble(in);
518 if (countfile == "") {
519 out << currSeq.getName() << '\t' << it->second << endl;
521 if (nameFile != "") {
522 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
523 if (itName != nameMap.end()) {
524 vector<string> thisSeqsNames;
525 m->splitAtChar(itName->second, thisSeqsNames, ',');
526 for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self
527 out << thisSeqsNames[k] << '\t' << it->second << endl;
529 }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
532 map<string, int>::iterator itTotalReps = nameCount.find(currSeq.getName());
533 if (itTotalReps != nameCount.end()) { out << currSeq.getName() << '\t' << itTotalReps->second << '\t' << itTotalReps->second << endl; }
534 else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your count file, please correct."); m->mothurOutEndLine(); }
541 if (countfile != "") { //create countfile with group info included
542 CountTable* ct = new CountTable();
543 ct->readTable(trimCountFile, true);
544 map<string, int> justTrimmedNames = ct->getNameMap();
548 for (map<string, int>::iterator itCount = groupCounts.begin(); itCount != groupCounts.end(); itCount++) { newCt.addGroup(itCount->first); }
549 vector<int> tempCounts; tempCounts.resize(groupCounts.size(), 0);
550 for (map<string, int>::iterator itNames = justTrimmedNames.begin(); itNames != justTrimmedNames.end(); itNames++) {
551 newCt.push_back(itNames->first, tempCounts); //add it to the table with no abundance so we can set the groups abundance
552 map<string, string>::iterator it2 = groupMap.find(itNames->first);
553 if (it2 != groupMap.end()) { newCt.setAbund(itNames->first, it2->second, itNames->second); }
554 else { m->mothurOut("[ERROR]: missing group info for " + itNames->first + "."); m->mothurOutEndLine(); m->control_pressed = true; }
556 newCt.printTable(trimCountFile);
560 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
562 //output group counts
563 m->mothurOutEndLine();
565 if (groupCounts.size() != 0) { m->mothurOut("Group count: \n"); }
566 for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
567 total += it->second; m->mothurOut(it->first + "\t" + toString(it->second)); m->mothurOutEndLine();
569 if (total != 0) { m->mothurOut("Total of all groups is " + toString(total)); m->mothurOutEndLine(); }
571 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
573 //set fasta file as new current fastafile
575 itTypes = outputTypes.find("fasta");
576 if (itTypes != outputTypes.end()) {
577 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
580 itTypes = outputTypes.find("name");
581 if (itTypes != outputTypes.end()) {
582 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
585 itTypes = outputTypes.find("qfile");
586 if (itTypes != outputTypes.end()) {
587 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); }
590 itTypes = outputTypes.find("group");
591 if (itTypes != outputTypes.end()) {
592 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
595 itTypes = outputTypes.find("count");
596 if (itTypes != outputTypes.end()) {
597 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
600 m->mothurOutEndLine();
601 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
602 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
603 m->mothurOutEndLine();
608 catch(exception& e) {
609 m->errorOut(e, "TrimSeqsCommand", "execute");
614 /**************************************************************************************/
615 int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string trimFileName, string scrapFileName, string trimQFileName, string scrapQFileName, string trimNFileName, string scrapNFileName, string trimCFileName, string scrapCFileName, string groupFileName, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames, linePair line, linePair qline) {
619 ofstream trimFASTAFile;
620 m->openOutputFile(trimFileName, trimFASTAFile);
622 ofstream scrapFASTAFile;
623 m->openOutputFile(scrapFileName, scrapFASTAFile);
625 ofstream trimQualFile;
626 ofstream scrapQualFile;
628 m->openOutputFile(trimQFileName, trimQualFile);
629 m->openOutputFile(scrapQFileName, scrapQualFile);
632 ofstream trimNameFile;
633 ofstream scrapNameFile;
635 m->openOutputFile(trimNFileName, trimNameFile);
636 m->openOutputFile(scrapNFileName, scrapNameFile);
639 ofstream trimCountFile;
640 ofstream scrapCountFile;
642 m->openOutputFile(trimCFileName, trimCountFile);
643 m->openOutputFile(scrapCFileName, scrapCountFile);
644 if (line.start == 0) { trimCountFile << "Representative_Sequence\ttotal" << endl; scrapCountFile << "Representative_Sequence\ttotal" << endl; }
647 ofstream outGroupsFile;
648 if ((createGroup) && (countfile == "")){ m->openOutputFile(groupFileName, outGroupsFile); }
650 for (int i = 0; i < fastaFileNames.size(); i++) { //clears old file
651 for (int j = 0; j < fastaFileNames[i].size(); j++) { //clears old file
652 if (fastaFileNames[i][j] != "") {
654 m->openOutputFile(fastaFileNames[i][j], temp); temp.close();
656 m->openOutputFile(qualFileNames[i][j], temp); temp.close();
660 m->openOutputFile(nameFileNames[i][j], temp); temp.close();
668 m->openInputFile(filename, inFASTA);
669 inFASTA.seekg(line.start);
672 if(qFileName != "") {
673 m->openInputFile(qFileName, qFile);
674 qFile.seekg(qline.start);
679 int numBarcodes = barcodes.size();
680 TrimOligos* trimOligos = NULL;
681 if (pairedOligos) { trimOligos = new TrimOligos(pdiffs, bdiffs, 0, 0, pairedPrimers, pairedBarcodes); numBarcodes = pairedBarcodes.size(); }
682 else { trimOligos = new TrimOligos(pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, revPrimer, linker, spacer); }
684 TrimOligos* rtrimOligos = NULL;
686 //create reoriented primer and barcode pairs
687 map<int, oligosPair> rpairedPrimers, rpairedBarcodes;
688 for (map<int, oligosPair>::iterator it = pairedPrimers.begin(); it != pairedPrimers.end(); it++) {
689 oligosPair tempPair(reverseOligo((it->second).reverse), (reverseOligo((it->second).forward))); //reversePrimer, rc ForwardPrimer
690 rpairedPrimers[it->first] = tempPair;
691 //cout << reverseOligo((it->second).reverse) << '\t' << (reverseOligo((it->second).forward)) << '\t' << primerNameVector[it->first] << endl;
693 for (map<int, oligosPair>::iterator it = pairedBarcodes.begin(); it != pairedBarcodes.end(); it++) {
694 oligosPair tempPair(reverseOligo((it->second).reverse), (reverseOligo((it->second).forward))); //reverseBarcode, rc ForwardBarcode
695 rpairedBarcodes[it->first] = tempPair;
696 //cout << reverseOligo((it->second).reverse) << '\t' << (reverseOligo((it->second).forward)) << '\t' << barcodeNameVector[it->first] << endl;
698 rtrimOligos = new TrimOligos(pdiffs, bdiffs, 0, 0, rpairedPrimers, rpairedBarcodes); numBarcodes = rpairedBarcodes.size();
703 if (m->control_pressed) {
704 delete trimOligos; if (reorient) { delete rtrimOligos; }
705 inFASTA.close(); trimFASTAFile.close(); scrapFASTAFile.close();
706 if ((createGroup) && (countfile == "")) { outGroupsFile.close(); }
707 if(qFileName != "") { qFile.close(); scrapQualFile.close(); trimQualFile.close(); }
708 if(nameFile != "") { scrapNameFile.close(); trimNameFile.close(); }
709 if(countfile != "") { scrapCountFile.close(); trimCountFile.close(); }
710 for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0;
714 string trashCode = "";
715 int currentSeqsDiffs = 0;
717 Sequence currSeq(inFASTA); m->gobble(inFASTA);
718 //cout << currSeq.getName() << '\t' << currSeq.getUnaligned().length() << endl;
719 Sequence savedSeq(currSeq.getName(), currSeq.getAligned());
721 QualityScores currQual; QualityScores savedQual;
723 currQual = QualityScores(qFile); m->gobble(qFile);
724 savedQual.setName(currQual.getName()); savedQual.setScores(currQual.getScores());
725 //cout << currQual.getName() << endl;
728 string origSeq = currSeq.getUnaligned();
731 int barcodeIndex = 0;
735 success = trimOligos->stripLinker(currSeq, currQual);
736 if(success > ldiffs) { trashCode += 'k'; }
737 else{ currentSeqsDiffs += success; }
741 if(numBarcodes != 0){
742 success = trimOligos->stripBarcode(currSeq, currQual, barcodeIndex);
743 if(success > bdiffs) {
746 else{ currentSeqsDiffs += success; }
750 success = trimOligos->stripSpacer(currSeq, currQual);
751 if(success > sdiffs) { trashCode += 's'; }
752 else{ currentSeqsDiffs += success; }
756 if(numFPrimers != 0){
757 success = trimOligos->stripForward(currSeq, currQual, primerIndex, keepforward);
758 if(success > pdiffs) {
761 else{ currentSeqsDiffs += success; }
764 if (currentSeqsDiffs > tdiffs) { trashCode += 't'; }
766 if(numRPrimers != 0){
767 success = trimOligos->stripReverse(currSeq, currQual);
768 if(!success) { trashCode += 'r'; }
771 if (reorient && (trashCode != "")) { //if you failed and want to check the reverse
773 string thisTrashCode = "";
774 int thisCurrentSeqsDiffs = 0;
776 int thisBarcodeIndex = 0;
777 int thisPrimerIndex = 0;
779 if(numBarcodes != 0){
780 thisSuccess = rtrimOligos->stripBarcode(savedSeq, savedQual, thisBarcodeIndex);
781 if(thisSuccess > bdiffs) { thisTrashCode += "b"; }
782 else{ thisCurrentSeqsDiffs += thisSuccess; }
785 if(numFPrimers != 0){
786 thisSuccess = rtrimOligos->stripForward(savedSeq, savedQual, thisPrimerIndex, keepforward);
787 if(thisSuccess > pdiffs) { thisTrashCode += "f"; }
788 else{ thisCurrentSeqsDiffs += thisSuccess; }
791 if (thisCurrentSeqsDiffs > tdiffs) { thisTrashCode += 't'; }
793 if (thisTrashCode == "") {
794 trashCode = thisTrashCode;
795 success = thisSuccess;
796 currentSeqsDiffs = thisCurrentSeqsDiffs;
797 barcodeIndex = thisBarcodeIndex;
798 primerIndex = thisPrimerIndex;
799 savedSeq.reverseComplement();
800 currSeq.setAligned(savedSeq.getAligned());
802 savedQual.flipQScores();
803 currQual.setScores(savedQual.getScores());
805 }else { trashCode += "(" + thisTrashCode + ")"; }
809 success = keepFirstTrim(currSeq, currQual);
813 success = removeLastTrim(currSeq, currQual);
814 if(!success) { trashCode += 'l'; }
819 int origLength = currSeq.getNumBases();
821 if(qThreshold != 0) { success = currQual.stripQualThreshold(currSeq, qThreshold); }
822 else if(qAverage != 0) { success = currQual.cullQualAverage(currSeq, qAverage); }
823 else if(qRollAverage != 0) { success = currQual.stripQualRollingAverage(currSeq, qRollAverage); }
824 else if(qWindowAverage != 0){ success = currQual.stripQualWindowAverage(currSeq, qWindowStep, qWindowSize, qWindowAverage); }
825 else { success = 1; }
827 //you don't want to trim, if it fails above then scrap it
828 if ((!qtrim) && (origLength != currSeq.getNumBases())) { success = 0; }
830 if(!success) { trashCode += 'q'; }
833 if(minLength > 0 || maxLength > 0){
834 success = cullLength(currSeq);
835 if(!success) { trashCode += 'l'; }
838 success = cullHomoP(currSeq);
839 if(!success) { trashCode += 'h'; }
842 success = cullAmbigs(currSeq);
843 if(!success) { trashCode += 'n'; }
846 if(flip){ // should go last
847 currSeq.reverseComplement();
849 currQual.flipQScores();
853 if (m->debug) { m->mothurOut("[DEBUG]: " + currSeq.getName() + ", trashcode= " + trashCode); if (trashCode.length() != 0) { m->mothurOutEndLine(); } }
855 if(trashCode.length() == 0){
856 string thisGroup = "";
858 if(numBarcodes != 0){
859 thisGroup = barcodeNameVector[barcodeIndex];
860 if (numFPrimers != 0) {
861 if (primerNameVector[primerIndex] != "") {
862 if(thisGroup != "") {
863 thisGroup += "." + primerNameVector[primerIndex];
865 thisGroup = primerNameVector[primerIndex];
872 int pos = thisGroup.find("ignore");
873 if (pos == string::npos) {
874 currSeq.setAligned(currSeq.getUnaligned());
875 currSeq.printSequence(trimFASTAFile);
878 currQual.printQScores(trimQualFile);
883 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
884 if (itName != nameMap.end()) { trimNameFile << itName->first << '\t' << itName->second << endl; }
885 else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
888 int numRedundants = 0;
889 if (countfile != "") {
890 map<string, int>::iterator itCount = nameCount.find(currSeq.getName());
891 if (itCount != nameCount.end()) {
892 trimCountFile << itCount->first << '\t' << itCount->second << endl;
893 numRedundants = itCount->second-1;
894 }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your count file, please correct."); m->mothurOutEndLine(); }
898 if(numBarcodes != 0){
900 if (m->debug) { m->mothurOut(", group= " + thisGroup + "\n"); }
902 if (countfile == "") { outGroupsFile << currSeq.getName() << '\t' << thisGroup << endl; }
903 else { groupMap[currSeq.getName()] = thisGroup; }
905 if (nameFile != "") {
906 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
907 if (itName != nameMap.end()) {
908 vector<string> thisSeqsNames;
909 m->splitAtChar(itName->second, thisSeqsNames, ',');
910 numRedundants = thisSeqsNames.size()-1; //we already include ourselves below
911 for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self
912 outGroupsFile << thisSeqsNames[k] << '\t' << thisGroup << endl;
914 }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
917 map<string, int>::iterator it = groupCounts.find(thisGroup);
918 if (it == groupCounts.end()) { groupCounts[thisGroup] = 1 + numRedundants; }
919 else { groupCounts[it->first] += (1 + numRedundants); }
926 m->openOutputFileAppend(fastaFileNames[barcodeIndex][primerIndex], output);
927 currSeq.printSequence(output);
931 m->openOutputFileAppend(qualFileNames[barcodeIndex][primerIndex], output);
932 currQual.printQScores(output);
937 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
938 if (itName != nameMap.end()) {
939 m->openOutputFileAppend(nameFileNames[barcodeIndex][primerIndex], output);
940 output << itName->first << '\t' << itName->second << endl;
942 }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
948 if(nameFile != ""){ //needs to be before the currSeq name is changed
949 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
950 if (itName != nameMap.end()) { scrapNameFile << itName->first << '\t' << itName->second << endl; }
951 else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
953 if (countfile != "") {
954 map<string, int>::iterator itCount = nameCount.find(currSeq.getName());
955 if (itCount != nameCount.end()) {
956 trimCountFile << itCount->first << '\t' << itCount->second << endl;
957 }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your count file, please correct."); m->mothurOutEndLine(); }
960 currSeq.setName(currSeq.getName() + '|' + trashCode);
961 currSeq.setUnaligned(origSeq);
962 currSeq.setAligned(origSeq);
963 currSeq.printSequence(scrapFASTAFile);
965 currQual.printQScores(scrapQualFile);
971 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
972 unsigned long long pos = inFASTA.tellg();
973 if ((pos == -1) || (pos >= line.end)) { break; }
976 if (inFASTA.eof()) { break; }
980 if((count) % 1000 == 0){ m->mothurOut(toString(count)); m->mothurOutEndLine(); }
984 if((count) % 1000 != 0){ m->mothurOut(toString(count)); m->mothurOutEndLine(); }
987 if (reorient) { delete rtrimOligos; }
989 trimFASTAFile.close();
990 scrapFASTAFile.close();
991 if (createGroup) { outGroupsFile.close(); }
992 if(qFileName != "") { qFile.close(); scrapQualFile.close(); trimQualFile.close(); }
993 if(nameFile != "") { scrapNameFile.close(); trimNameFile.close(); }
994 if(countfile != "") { scrapCountFile.close(); trimCountFile.close(); }
998 catch(exception& e) {
999 m->errorOut(e, "TrimSeqsCommand", "driverCreateTrim");
1004 /**************************************************************************************************/
1006 int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName, string trimFASTAFileName, string scrapFASTAFileName, string trimQualFileName, string scrapQualFileName, string trimNameFileName, string scrapNameFileName, string trimCountFileName, string scrapCountFileName, string groupFile, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames) {
1010 int exitCommand = 1;
1013 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1014 //loop through and create all the processes you want
1015 while (process != processors) {
1019 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
1021 }else if (pid == 0){
1023 vector<vector<string> > tempFASTAFileNames = fastaFileNames;
1024 vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
1025 vector<vector<string> > tempNameFileNames = nameFileNames;
1030 for(int i=0;i<tempFASTAFileNames.size();i++){
1031 for(int j=0;j<tempFASTAFileNames[i].size();j++){
1032 if (tempFASTAFileNames[i][j] != "") {
1033 tempFASTAFileNames[i][j] += toString(getpid()) + ".temp";
1034 m->openOutputFile(tempFASTAFileNames[i][j], temp); temp.close();
1036 if(qFileName != ""){
1037 tempPrimerQualFileNames[i][j] += toString(getpid()) + ".temp";
1038 m->openOutputFile(tempPrimerQualFileNames[i][j], temp); temp.close();
1041 tempNameFileNames[i][j] += toString(getpid()) + ".temp";
1042 m->openOutputFile(tempNameFileNames[i][j], temp); temp.close();
1049 driverCreateTrim(filename,
1051 (trimFASTAFileName + toString(getpid()) + ".temp"),
1052 (scrapFASTAFileName + toString(getpid()) + ".temp"),
1053 (trimQualFileName + toString(getpid()) + ".temp"),
1054 (scrapQualFileName + toString(getpid()) + ".temp"),
1055 (trimNameFileName + toString(getpid()) + ".temp"),
1056 (scrapNameFileName + toString(getpid()) + ".temp"),
1057 (trimCountFileName + toString(getpid()) + ".temp"),
1058 (scrapCountFileName + toString(getpid()) + ".temp"),
1059 (groupFile + toString(getpid()) + ".temp"),
1061 tempPrimerQualFileNames,
1066 if (m->debug) { m->mothurOut("[DEBUG]: " + toString(lines[process].start) + '\t' + toString(qLines[process].start) + '\t' + toString(getpid()) + '\n'); }
1068 //pass groupCounts to parent
1071 string tempFile = filename + toString(getpid()) + ".num.temp";
1072 m->openOutputFile(tempFile, out);
1074 out << groupCounts.size() << endl;
1076 for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
1077 out << it->first << '\t' << it->second << endl;
1080 out << groupMap.size() << endl;
1081 for (map<string, string>::iterator it = groupMap.begin(); it != groupMap.end(); it++) {
1082 out << it->first << '\t' << it->second << endl;
1088 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
1089 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
1096 m->openOutputFile(trimFASTAFileName, temp); temp.close();
1097 m->openOutputFile(scrapFASTAFileName, temp); temp.close();
1098 if(qFileName != ""){
1099 m->openOutputFile(trimQualFileName, temp); temp.close();
1100 m->openOutputFile(scrapQualFileName, temp); temp.close();
1102 if (nameFile != "") {
1103 m->openOutputFile(trimNameFileName, temp); temp.close();
1104 m->openOutputFile(scrapNameFileName, temp); temp.close();
1106 if (countfile != "") {
1107 m->openOutputFile(trimCountFileName, temp); temp.close();
1108 m->openOutputFile(scrapCountFileName, temp); temp.close();
1111 driverCreateTrim(filename, qFileName, trimFASTAFileName, scrapFASTAFileName, trimQualFileName, scrapQualFileName, trimNameFileName, scrapNameFileName, trimCountFileName, scrapCountFileName, groupFile, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
1113 //force parent to wait until all the processes are done
1114 for (int i=0;i<processIDS.size();i++) {
1115 int temp = processIDS[i];
1119 //////////////////////////////////////////////////////////////////////////////////////////////////////
1120 //Windows version shared memory, so be careful when passing variables through the trimData struct.
1121 //Above fork() will clone, so memory is separate, but that's not the case with windows,
1122 //////////////////////////////////////////////////////////////////////////////////////////////////////
1124 vector<trimData*> pDataArray;
1125 DWORD dwThreadIdArray[processors-1];
1126 HANDLE hThreadArray[processors-1];
1128 //Create processor worker threads.
1129 for( int h=0; h<processors-1; h++){
1131 string extension = "";
1132 if (h != 0) { extension = toString(h) + ".temp"; processIDS.push_back(h); }
1133 vector<vector<string> > tempFASTAFileNames = fastaFileNames;
1134 vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
1135 vector<vector<string> > tempNameFileNames = nameFileNames;
1140 for(int i=0;i<tempFASTAFileNames.size();i++){
1141 for(int j=0;j<tempFASTAFileNames[i].size();j++){
1142 if (tempFASTAFileNames[i][j] != "") {
1143 tempFASTAFileNames[i][j] += extension;
1144 m->openOutputFile(tempFASTAFileNames[i][j], temp); temp.close();
1146 if(qFileName != ""){
1147 tempPrimerQualFileNames[i][j] += extension;
1148 m->openOutputFile(tempPrimerQualFileNames[i][j], temp); temp.close();
1151 tempNameFileNames[i][j] += extension;
1152 m->openOutputFile(tempNameFileNames[i][j], temp); temp.close();
1160 trimData* tempTrim = new trimData(filename,
1161 qFileName, nameFile, countfile,
1162 (trimFASTAFileName+extension),
1163 (scrapFASTAFileName+extension),
1164 (trimQualFileName+extension),
1165 (scrapQualFileName+extension),
1166 (trimNameFileName+extension),
1167 (scrapNameFileName+extension),
1168 (trimCountFileName+extension),
1169 (scrapCountFileName+extension),
1170 (groupFile+extension),
1172 tempPrimerQualFileNames,
1174 lines[h].start, lines[h].end, qLines[h].start, qLines[h].end, m,
1175 pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, primers, barcodes, revPrimer, linker, spacer, pairedBarcodes, pairedPrimers, pairedOligos,
1176 primerNameVector, barcodeNameVector, createGroup, allFiles, keepforward, keepFirst, removeLast,
1177 qWindowStep, qWindowSize, qWindowAverage, qtrim, qThreshold, qAverage, qRollAverage,
1178 minLength, maxAmbig, maxHomoP, maxLength, flip, reorient, nameMap, nameCount);
1179 pDataArray.push_back(tempTrim);
1181 hThreadArray[h] = CreateThread(NULL, 0, MyTrimThreadFunction, pDataArray[h], 0, &dwThreadIdArray[h]);
1186 m->openOutputFile(trimFASTAFileName, temp); temp.close();
1187 m->openOutputFile(scrapFASTAFileName, temp); temp.close();
1188 if(qFileName != ""){
1189 m->openOutputFile(trimQualFileName, temp); temp.close();
1190 m->openOutputFile(scrapQualFileName, temp); temp.close();
1192 if (nameFile != "") {
1193 m->openOutputFile(trimNameFileName, temp); temp.close();
1194 m->openOutputFile(scrapNameFileName, temp); temp.close();
1196 vector<vector<string> > tempFASTAFileNames = fastaFileNames;
1197 vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
1198 vector<vector<string> > tempNameFileNames = nameFileNames;
1201 string extension = toString(processors-1) + ".temp";
1202 for(int i=0;i<tempFASTAFileNames.size();i++){
1203 for(int j=0;j<tempFASTAFileNames[i].size();j++){
1204 if (tempFASTAFileNames[i][j] != "") {
1205 tempFASTAFileNames[i][j] += extension;
1206 m->openOutputFile(tempFASTAFileNames[i][j], temp); temp.close();
1208 if(qFileName != ""){
1209 tempPrimerQualFileNames[i][j] += extension;
1210 m->openOutputFile(tempPrimerQualFileNames[i][j], temp); temp.close();
1213 tempNameFileNames[i][j] += extension;
1214 m->openOutputFile(tempNameFileNames[i][j], temp); temp.close();
1221 driverCreateTrim(filename, qFileName, (trimFASTAFileName + toString(processors-1) + ".temp"), (scrapFASTAFileName + toString(processors-1) + ".temp"), (trimQualFileName + toString(processors-1) + ".temp"), (scrapQualFileName + toString(processors-1) + ".temp"), (trimNameFileName + toString(processors-1) + ".temp"), (scrapNameFileName + toString(processors-1) + ".temp"), (trimCountFileName + toString(processors-1) + ".temp"), (scrapCountFileName + toString(processors-1) + ".temp"), (groupFile + toString(processors-1) + ".temp"), tempFASTAFileNames, tempPrimerQualFileNames, tempNameFileNames, lines[processors-1], qLines[processors-1]);
1222 processIDS.push_back(processors-1);
1225 //Wait until all threads have terminated.
1226 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
1228 //Close all thread handles and free memory allocations.
1229 for(int i=0; i < pDataArray.size(); i++){
1230 if (pDataArray[i]->count != pDataArray[i]->lineEnd) {
1231 m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of " + toString(pDataArray[i]->lineEnd) + " sequences assigned to it, quitting. \n"); m->control_pressed = true;
1233 for (map<string, int>::iterator it = pDataArray[i]->groupCounts.begin(); it != pDataArray[i]->groupCounts.end(); it++) {
1234 map<string, int>::iterator it2 = groupCounts.find(it->first);
1235 if (it2 == groupCounts.end()) { groupCounts[it->first] = it->second; }
1236 else { groupCounts[it->first] += it->second; }
1238 for (map<string, string>::iterator it = pDataArray[i]->groupMap.begin(); it != pDataArray[i]->groupMap.end(); it++) {
1239 map<string, string>::iterator it2 = groupMap.find(it->first);
1240 if (it2 == groupMap.end()) { groupMap[it->first] = it->second; }
1241 else { m->mothurOut("[ERROR]: " + it->first + " is in your fasta file more than once. Sequence names must be unique. please correct.\n"); }
1243 CloseHandle(hThreadArray[i]);
1244 delete pDataArray[i];
1251 for(int i=0;i<processIDS.size();i++){
1253 m->mothurOut("Appending files from process " + toString(processIDS[i])); m->mothurOutEndLine();
1255 m->appendFiles((trimFASTAFileName + toString(processIDS[i]) + ".temp"), trimFASTAFileName);
1256 m->mothurRemove((trimFASTAFileName + toString(processIDS[i]) + ".temp"));
1257 m->appendFiles((scrapFASTAFileName + toString(processIDS[i]) + ".temp"), scrapFASTAFileName);
1258 m->mothurRemove((scrapFASTAFileName + toString(processIDS[i]) + ".temp"));
1260 if(qFileName != ""){
1261 m->appendFiles((trimQualFileName + toString(processIDS[i]) + ".temp"), trimQualFileName);
1262 m->mothurRemove((trimQualFileName + toString(processIDS[i]) + ".temp"));
1263 m->appendFiles((scrapQualFileName + toString(processIDS[i]) + ".temp"), scrapQualFileName);
1264 m->mothurRemove((scrapQualFileName + toString(processIDS[i]) + ".temp"));
1268 m->appendFiles((trimNameFileName + toString(processIDS[i]) + ".temp"), trimNameFileName);
1269 m->mothurRemove((trimNameFileName + toString(processIDS[i]) + ".temp"));
1270 m->appendFiles((scrapNameFileName + toString(processIDS[i]) + ".temp"), scrapNameFileName);
1271 m->mothurRemove((scrapNameFileName + toString(processIDS[i]) + ".temp"));
1274 if(countfile != ""){
1275 m->appendFiles((trimCountFileName + toString(processIDS[i]) + ".temp"), trimCountFileName);
1276 m->mothurRemove((trimCountFileName + toString(processIDS[i]) + ".temp"));
1277 m->appendFiles((scrapCountFileName + toString(processIDS[i]) + ".temp"), scrapCountFileName);
1278 m->mothurRemove((scrapCountFileName + toString(processIDS[i]) + ".temp"));
1281 if((createGroup)&&(countfile == "")){
1282 m->appendFiles((groupFile + toString(processIDS[i]) + ".temp"), groupFile);
1283 m->mothurRemove((groupFile + toString(processIDS[i]) + ".temp"));
1288 for(int j=0;j<fastaFileNames.size();j++){
1289 for(int k=0;k<fastaFileNames[j].size();k++){
1290 if (fastaFileNames[j][k] != "") {
1291 m->appendFiles((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"), fastaFileNames[j][k]);
1292 m->mothurRemove((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"));
1294 if(qFileName != ""){
1295 m->appendFiles((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"), qualFileNames[j][k]);
1296 m->mothurRemove((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"));
1300 m->appendFiles((nameFileNames[j][k] + toString(processIDS[i]) + ".temp"), nameFileNames[j][k]);
1301 m->mothurRemove((nameFileNames[j][k] + toString(processIDS[i]) + ".temp"));
1308 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1311 string tempFile = filename + toString(processIDS[i]) + ".num.temp";
1312 m->openInputFile(tempFile, in);
1316 in >> tempNum; m->gobble(in);
1319 for (int i = 0; i < tempNum; i++) {
1321 in >> group >> groupNum; m->gobble(in);
1323 map<string, int>::iterator it = groupCounts.find(group);
1324 if (it == groupCounts.end()) { groupCounts[group] = groupNum; }
1325 else { groupCounts[it->first] += groupNum; }
1328 in >> tempNum; m->gobble(in);
1330 for (int i = 0; i < tempNum; i++) {
1331 string group, seqName;
1332 in >> seqName >> group; m->gobble(in);
1334 map<string, string>::iterator it = groupMap.find(seqName);
1335 if (it == groupMap.end()) { groupMap[seqName] = group; }
1336 else { m->mothurOut("[ERROR]: " + seqName + " is in your fasta file more than once. Sequence names must be unique. please correct.\n"); }
1340 in.close(); m->mothurRemove(tempFile);
1347 catch(exception& e) {
1348 m->errorOut(e, "TrimSeqsCommand", "createProcessesCreateTrim");
1353 /**************************************************************************************************/
1355 int TrimSeqsCommand::setLines(string filename, string qfilename) {
1358 vector<unsigned long long> fastaFilePos;
1359 vector<unsigned long long> qfileFilePos;
1361 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1362 //set file positions for fasta file
1363 fastaFilePos = m->divideFile(filename, processors);
1365 //get name of first sequence in each chunk
1366 map<string, int> firstSeqNames;
1367 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
1369 m->openInputFile(filename, in);
1370 in.seekg(fastaFilePos[i]);
1373 firstSeqNames[temp.getName()] = i;
1378 if(qfilename != "") {
1379 //seach for filePos of each first name in the qfile and save in qfileFilePos
1381 m->openInputFile(qfilename, inQual);
1384 while(!inQual.eof()){
1385 input = m->getline(inQual);
1387 if (input.length() != 0) {
1388 if(input[0] == '>'){ //this is a sequence name line
1389 istringstream nameStream(input);
1391 string sname = ""; nameStream >> sname;
1392 sname = sname.substr(1);
1394 for (int i = 0; i < sname.length(); i++) {
1395 if (sname[i] == ':') { sname[i] = '_'; m->changedSeqNames = true; }
1398 map<string, int>::iterator it = firstSeqNames.find(sname);
1400 if(it != firstSeqNames.end()) { //this is the start of a new chunk
1401 unsigned long long pos = inQual.tellg();
1402 qfileFilePos.push_back(pos - input.length() - 1);
1403 firstSeqNames.erase(it);
1408 if (firstSeqNames.size() == 0) { break; }
1413 if (firstSeqNames.size() != 0) {
1414 for (map<string, int>::iterator it = firstSeqNames.begin(); it != firstSeqNames.end(); it++) {
1415 m->mothurOut(it->first + " is in your fasta file and not in your quality file, not using quality file."); m->mothurOutEndLine();
1421 //get last file position of qfile
1423 unsigned long long size;
1425 //get num bytes in file
1426 pFile = fopen (qfilename.c_str(),"rb");
1427 if (pFile==NULL) perror ("Error opening file");
1429 fseek (pFile, 0, SEEK_END);
1434 qfileFilePos.push_back(size);
1437 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
1438 if (m->debug) { m->mothurOut("[DEBUG]: " + toString(i) +'\t' + toString(fastaFilePos[i]) + '\t' + toString(fastaFilePos[i+1]) + '\n'); }
1439 lines.push_back(linePair(fastaFilePos[i], fastaFilePos[(i+1)]));
1440 if (qfilename != "") { qLines.push_back(linePair(qfileFilePos[i], qfileFilePos[(i+1)])); }
1442 if(qfilename == "") { qLines = lines; } //files with duds
1448 if (processors == 1) { //save time
1449 //fastaFilePos.push_back(0); qfileFilePos.push_back(0);
1450 //fastaFilePos.push_back(1000); qfileFilePos.push_back(1000);
1451 lines.push_back(linePair(0, 1000));
1452 if (qfilename != "") { qLines.push_back(linePair(0, 1000)); }
1454 int numFastaSeqs = 0;
1455 fastaFilePos = m->setFilePosFasta(filename, numFastaSeqs);
1456 if (fastaFilePos.size() < processors) { processors = fastaFilePos.size(); }
1458 if (qfilename != "") {
1459 int numQualSeqs = 0;
1460 qfileFilePos = m->setFilePosFasta(qfilename, numQualSeqs);
1462 if (numFastaSeqs != numQualSeqs) {
1463 m->mothurOut("[ERROR]: You have " + toString(numFastaSeqs) + " sequences in your fasta file, but " + toString(numQualSeqs) + " sequences in your quality file."); m->mothurOutEndLine(); m->control_pressed = true;
1467 //figure out how many sequences you have to process
1468 int numSeqsPerProcessor = numFastaSeqs / processors;
1469 for (int i = 0; i < processors; i++) {
1470 int startIndex = i * numSeqsPerProcessor;
1471 if(i == (processors - 1)){ numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor; }
1472 lines.push_back(linePair(fastaFilePos[startIndex], numSeqsPerProcessor));
1473 if (qfilename != "") { qLines.push_back(linePair(qfileFilePos[startIndex], numSeqsPerProcessor)); }
1476 if(qfilename == "") { qLines = lines; } //files with duds
1481 catch(exception& e) {
1482 m->errorOut(e, "TrimSeqsCommand", "setLines");
1487 //***************************************************************************************************************
1489 bool TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<vector<string> >& qualFileNames, vector<vector<string> >& nameFileNames){
1492 m->openInputFile(oligoFile, inOligos);
1496 string type, oligo, roligo, group;
1497 bool hasPrimer = false; bool hasPairedBarcodes = false;
1499 int indexPrimer = 0;
1500 int indexBarcode = 0;
1501 int indexPairedPrimer = 0;
1502 int indexPairedBarcode = 0;
1503 set<string> uniquePrimers;
1504 set<string> uniqueBarcodes;
1506 while(!inOligos.eof()){
1510 if (m->debug) { m->mothurOut("[DEBUG]: reading type - " + type + ".\n"); }
1513 while (!inOligos.eof()) { char c = inOligos.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there
1514 m->gobble(inOligos);
1517 m->gobble(inOligos);
1518 //make type case insensitive
1519 for(int i=0;i<type.length();i++){ type[i] = toupper(type[i]); }
1523 if (m->debug) { m->mothurOut("[DEBUG]: reading - " + oligo + ".\n"); }
1525 for(int i=0;i<oligo.length();i++){
1526 oligo[i] = toupper(oligo[i]);
1527 if(oligo[i] == 'U') { oligo[i] = 'T'; }
1530 if(type == "FORWARD"){
1533 // get rest of line in case there is a primer name
1534 while (!inOligos.eof()) {
1535 char c = inOligos.get();
1536 if (c == 10 || c == 13 || c == -1){ break; }
1537 else if (c == 32 || c == 9){;} //space or tab
1538 else { group += c; }
1541 //check for repeat barcodes
1542 map<string, int>::iterator itPrime = primers.find(oligo);
1543 if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); }
1545 if (m->debug) { if (group != "") { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); }else{ m->mothurOut("[DEBUG]: no group for primer " + oligo + ".\n"); } }
1547 primers[oligo]=indexPrimer; indexPrimer++;
1548 primerNameVector.push_back(group);
1550 else if (type == "PRIMER"){
1551 m->gobble(inOligos);
1555 for(int i=0;i<roligo.length();i++){
1556 roligo[i] = toupper(roligo[i]);
1557 if(roligo[i] == 'U') { roligo[i] = 'T'; }
1559 roligo = reverseOligo(roligo);
1563 // get rest of line in case there is a primer name
1564 while (!inOligos.eof()) {
1565 char c = inOligos.get();
1566 if (c == 10 || c == 13 || c == -1){ break; }
1567 else if (c == 32 || c == 9){;} //space or tab
1568 else { group += c; }
1571 oligosPair newPrimer(oligo, roligo);
1573 if (m->debug) { m->mothurOut("[DEBUG]: primer pair " + newPrimer.forward + " " + newPrimer.reverse + ", and group = " + group + ".\n"); }
1575 //check for repeat barcodes
1576 string tempPair = oligo+roligo;
1577 if (uniquePrimers.count(tempPair) != 0) { m->mothurOut("primer pair " + newPrimer.forward + " " + newPrimer.reverse + " is in your oligos file already."); m->mothurOutEndLine(); }
1578 else { uniquePrimers.insert(tempPair); }
1580 if (m->debug) { if (group != "") { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); }else{ m->mothurOut("[DEBUG]: no group for primer pair " + newPrimer.forward + " " + newPrimer.reverse + ".\n"); } }
1582 pairedPrimers[indexPairedPrimer]=newPrimer; indexPairedPrimer++;
1583 primerNameVector.push_back(group);
1586 else if(type == "REVERSE"){
1587 //Sequence oligoRC("reverse", oligo);
1588 //oligoRC.reverseComplement();
1589 string oligoRC = reverseOligo(oligo);
1590 revPrimer.push_back(oligoRC);
1592 else if(type == "BARCODE"){
1595 //barcode lines can look like BARCODE atgcatgc groupName - for 454 seqs
1596 //or BARCODE atgcatgc atgcatgc groupName - for illumina data that has forward and reverse info
1599 while (!inOligos.eof()) {
1600 char c = inOligos.get();
1601 if (c == 10 || c == 13 || c == -1){ break; }
1602 else if (c == 32 || c == 9){;} //space or tab
1606 //then this is illumina data with 4 columns
1608 hasPairedBarcodes = true;
1609 string reverseBarcode = group; //reverseOligo(group); //reverse barcode
1612 for(int i=0;i<reverseBarcode.length();i++){
1613 reverseBarcode[i] = toupper(reverseBarcode[i]);
1614 if(reverseBarcode[i] == 'U') { reverseBarcode[i] = 'T'; }
1617 reverseBarcode = reverseOligo(reverseBarcode);
1618 oligosPair newPair(oligo, reverseBarcode);
1620 if (m->debug) { m->mothurOut("[DEBUG]: barcode pair " + newPair.forward + " " + newPair.reverse + ", and group = " + group + ".\n"); }
1622 //check for repeat barcodes
1623 string tempPair = oligo+reverseBarcode;
1624 if (uniqueBarcodes.count(tempPair) != 0) { m->mothurOut("barcode pair " + newPair.forward + " " + newPair.reverse + " is in your oligos file already, disregarding."); m->mothurOutEndLine(); }
1625 else { uniqueBarcodes.insert(tempPair); }
1627 pairedBarcodes[indexPairedBarcode]=newPair; indexPairedBarcode++;
1628 barcodeNameVector.push_back(group);
1630 //check for repeat barcodes
1631 map<string, int>::iterator itBar = barcodes.find(oligo);
1632 if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); }
1634 barcodes[oligo]=indexBarcode; indexBarcode++;
1635 barcodeNameVector.push_back(group);
1637 }else if(type == "LINKER"){
1638 linker.push_back(oligo);
1639 }else if(type == "SPACER"){
1640 spacer.push_back(oligo);
1642 else{ m->mothurOut("[WARNING]: " + type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); }
1644 m->gobble(inOligos);
1648 if (hasPairedBarcodes || hasPrimer) {
1649 pairedOligos = true;
1650 if ((primers.size() != 0) || (barcodes.size() != 0) || (linker.size() != 0) || (spacer.size() != 0) || (revPrimer.size() != 0)) { m->control_pressed = true; m->mothurOut("[ERROR]: cannot mix paired primers and barcodes with non paired or linkers and spacers, quitting."); m->mothurOutEndLine(); return 0; }
1651 }else if (reorient) { m->mothurOut("[Warning]: cannot use checkorient without paired barcodes or primers, ignoring.\n"); m->mothurOutEndLine(); reorient = false; }
1653 if(barcodeNameVector.size() == 0 && primerNameVector[0] == ""){ allFiles = 0; }
1655 //add in potential combos
1656 if(barcodeNameVector.size() == 0){
1658 barcodeNameVector.push_back("");
1661 if(primerNameVector.size() == 0){
1663 primerNameVector.push_back("");
1666 fastaFileNames.resize(barcodeNameVector.size());
1667 for(int i=0;i<fastaFileNames.size();i++){
1668 fastaFileNames[i].assign(primerNameVector.size(), "");
1670 if(qFileName != "") { qualFileNames = fastaFileNames; }
1671 if(nameFile != "") { nameFileNames = fastaFileNames; }
1674 set<string> uniqueNames; //used to cleanup outputFileNames
1676 for(map<int, oligosPair>::iterator itBar = pairedBarcodes.begin();itBar != pairedBarcodes.end();itBar++){
1677 for(map<int, oligosPair>::iterator itPrimer = pairedPrimers.begin();itPrimer != pairedPrimers.end(); itPrimer++){
1679 string primerName = primerNameVector[itPrimer->first];
1680 string barcodeName = barcodeNameVector[itBar->first];
1682 if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing
1684 string comboGroupName = "";
1685 string fastaFileName = "";
1686 string qualFileName = "";
1687 string nameFileName = "";
1688 string countFileName = "";
1690 if(primerName == ""){
1691 comboGroupName = barcodeNameVector[itBar->first];
1694 if(barcodeName == ""){
1695 comboGroupName = primerNameVector[itPrimer->first];
1698 comboGroupName = barcodeNameVector[itBar->first] + "." + primerNameVector[itPrimer->first];
1704 map<string, string> variables;
1705 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFile));
1706 variables["[tag]"] = comboGroupName;
1707 fastaFileName = getOutputFileName("fasta", variables);
1708 if (uniqueNames.count(fastaFileName) == 0) {
1709 outputNames.push_back(fastaFileName);
1710 outputTypes["fasta"].push_back(fastaFileName);
1711 uniqueNames.insert(fastaFileName);
1714 fastaFileNames[itBar->first][itPrimer->first] = fastaFileName;
1715 m->openOutputFile(fastaFileName, temp); temp.close();
1717 if(qFileName != ""){
1718 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(qFileName));
1719 qualFileName = getOutputFileName("qfile", variables);
1720 if (uniqueNames.count(qualFileName) == 0) {
1721 outputNames.push_back(qualFileName);
1722 outputTypes["qfile"].push_back(qualFileName);
1725 qualFileNames[itBar->first][itPrimer->first] = qualFileName;
1726 m->openOutputFile(qualFileName, temp); temp.close();
1730 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(nameFile));
1731 nameFileName = getOutputFileName("name", variables);
1732 if (uniqueNames.count(nameFileName) == 0) {
1733 outputNames.push_back(nameFileName);
1734 outputTypes["name"].push_back(nameFileName);
1737 nameFileNames[itBar->first][itPrimer->first] = nameFileName;
1738 m->openOutputFile(nameFileName, temp); temp.close();
1744 for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
1745 for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
1747 string primerName = primerNameVector[itPrimer->second];
1748 string barcodeName = barcodeNameVector[itBar->second];
1750 if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing
1752 string comboGroupName = "";
1753 string fastaFileName = "";
1754 string qualFileName = "";
1755 string nameFileName = "";
1756 string countFileName = "";
1758 if(primerName == ""){
1759 comboGroupName = barcodeNameVector[itBar->second];
1762 if(barcodeName == ""){
1763 comboGroupName = primerNameVector[itPrimer->second];
1766 comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second];
1772 map<string, string> variables;
1773 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFile));
1774 variables["[tag]"] = comboGroupName;
1775 fastaFileName = getOutputFileName("fasta", variables);
1776 if (uniqueNames.count(fastaFileName) == 0) {
1777 outputNames.push_back(fastaFileName);
1778 outputTypes["fasta"].push_back(fastaFileName);
1779 uniqueNames.insert(fastaFileName);
1782 fastaFileNames[itBar->second][itPrimer->second] = fastaFileName;
1783 m->openOutputFile(fastaFileName, temp); temp.close();
1785 if(qFileName != ""){
1786 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(qFileName));
1787 qualFileName = getOutputFileName("qfile", variables);
1788 if (uniqueNames.count(qualFileName) == 0) {
1789 outputNames.push_back(qualFileName);
1790 outputTypes["qfile"].push_back(qualFileName);
1793 qualFileNames[itBar->second][itPrimer->second] = qualFileName;
1794 m->openOutputFile(qualFileName, temp); temp.close();
1798 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(nameFile));
1799 nameFileName = getOutputFileName("name", variables);
1800 if (uniqueNames.count(nameFileName) == 0) {
1801 outputNames.push_back(nameFileName);
1802 outputTypes["name"].push_back(nameFileName);
1805 nameFileNames[itBar->second][itPrimer->second] = nameFileName;
1806 m->openOutputFile(nameFileName, temp); temp.close();
1813 numFPrimers = primers.size();
1814 if (pairedOligos) { numFPrimers = pairedPrimers.size(); }
1815 numRPrimers = revPrimer.size();
1816 numLinkers = linker.size();
1817 numSpacers = spacer.size();
1819 bool allBlank = true;
1820 for (int i = 0; i < barcodeNameVector.size(); i++) {
1821 if (barcodeNameVector[i] != "") {
1826 for (int i = 0; i < primerNameVector.size(); i++) {
1827 if (primerNameVector[i] != "") {
1834 m->mothurOut("[WARNING]: your oligos file does not contain any group names. mothur will not create a groupfile."); m->mothurOutEndLine();
1842 catch(exception& e) {
1843 m->errorOut(e, "TrimSeqsCommand", "getOligos");
1847 //***************************************************************************************************************
1849 bool TrimSeqsCommand::keepFirstTrim(Sequence& sequence, QualityScores& qscores){
1852 if(qscores.getName() != ""){
1853 qscores.trimQScores(-1, keepFirst);
1856 // sequence.printSequence(cout);cout << endl;
1858 sequence.trim(keepFirst);
1860 // sequence.printSequence(cout);cout << endl << endl;;
1864 catch(exception& e) {
1865 m->errorOut(e, "keepFirstTrim", "countDiffs");
1871 //***************************************************************************************************************
1873 bool TrimSeqsCommand::removeLastTrim(Sequence& sequence, QualityScores& qscores){
1877 int length = sequence.getNumBases() - removeLast;
1880 if(qscores.getName() != ""){
1881 qscores.trimQScores(-1, length);
1883 sequence.trim(length);
1892 catch(exception& e) {
1893 m->errorOut(e, "removeLastTrim", "countDiffs");
1899 //***************************************************************************************************************
1901 bool TrimSeqsCommand::cullLength(Sequence& seq){
1904 int length = seq.getNumBases();
1905 bool success = 0; //guilty until proven innocent
1907 if(length >= minLength && maxLength == 0) { success = 1; }
1908 else if(length >= minLength && length <= maxLength) { success = 1; }
1909 else { success = 0; }
1914 catch(exception& e) {
1915 m->errorOut(e, "TrimSeqsCommand", "cullLength");
1921 //***************************************************************************************************************
1923 bool TrimSeqsCommand::cullHomoP(Sequence& seq){
1925 int longHomoP = seq.getLongHomoPolymer();
1926 bool success = 0; //guilty until proven innocent
1928 if(longHomoP <= maxHomoP){ success = 1; }
1929 else { success = 0; }
1933 catch(exception& e) {
1934 m->errorOut(e, "TrimSeqsCommand", "cullHomoP");
1939 //********************************************************************/
1940 string TrimSeqsCommand::reverseOligo(string oligo){
1942 string reverse = "";
1944 for(int i=oligo.length()-1;i>=0;i--){
1946 if(oligo[i] == 'A') { reverse += 'T'; }
1947 else if(oligo[i] == 'T'){ reverse += 'A'; }
1948 else if(oligo[i] == 'U'){ reverse += 'A'; }
1950 else if(oligo[i] == 'G'){ reverse += 'C'; }
1951 else if(oligo[i] == 'C'){ reverse += 'G'; }
1953 else if(oligo[i] == 'R'){ reverse += 'Y'; }
1954 else if(oligo[i] == 'Y'){ reverse += 'R'; }
1956 else if(oligo[i] == 'M'){ reverse += 'K'; }
1957 else if(oligo[i] == 'K'){ reverse += 'M'; }
1959 else if(oligo[i] == 'W'){ reverse += 'W'; }
1960 else if(oligo[i] == 'S'){ reverse += 'S'; }
1962 else if(oligo[i] == 'B'){ reverse += 'V'; }
1963 else if(oligo[i] == 'V'){ reverse += 'B'; }
1965 else if(oligo[i] == 'D'){ reverse += 'H'; }
1966 else if(oligo[i] == 'H'){ reverse += 'D'; }
1968 else { reverse += 'N'; }
1974 catch(exception& e) {
1975 m->errorOut(e, "TrimSeqsCommand", "reverseOligo");
1980 //***************************************************************************************************************
1982 bool TrimSeqsCommand::cullAmbigs(Sequence& seq){
1984 int numNs = seq.getAmbigBases();
1985 bool success = 0; //guilty until proven innocent
1987 if(numNs <= maxAmbig) { success = 1; }
1988 else { success = 0; }
1992 catch(exception& e) {
1993 m->errorOut(e, "TrimSeqsCommand", "cullAmbigs");
1998 //***************************************************************************************************************