5 * Created by Pat Schloss on 6/6/09.
6 * Copyright 2009 Patrick D. Schloss. All rights reserved.
10 #include "trimseqscommand.h"
11 #include "needlemanoverlap.hpp"
12 #include "trimoligos.h"
16 //**********************************************************************************************************************
17 vector<string> TrimSeqsCommand::setParameters(){
19 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","fasta",false,true,true); parameters.push_back(pfasta);
20 CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none","group",false,false,true); parameters.push_back(poligos);
21 CommandParameter pqfile("qfile", "InputTypes", "", "", "none", "none", "none","qfile",false,false,true); parameters.push_back(pqfile);
22 CommandParameter pname("name", "InputTypes", "", "", "namecount", "none", "none","name",false,false,true); parameters.push_back(pname);
23 CommandParameter pcount("count", "InputTypes", "", "", "namecount", "none", "none","count",false,false,true); parameters.push_back(pcount);
24 CommandParameter pflip("flip", "Boolean", "", "F", "", "", "","",false,false,true); parameters.push_back(pflip);
25 CommandParameter preorient("checkorient", "Boolean", "", "F", "", "", "","",false,false,true); parameters.push_back(preorient);
26 CommandParameter pmaxambig("maxambig", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(pmaxambig);
27 CommandParameter pmaxhomop("maxhomop", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pmaxhomop);
28 CommandParameter pminlength("minlength", "Number", "", "1", "", "", "","",false,false); parameters.push_back(pminlength);
29 CommandParameter pmaxlength("maxlength", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pmaxlength);
30 CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(ppdiffs);
31 CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(pbdiffs);
32 CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pldiffs);
33 CommandParameter psdiffs("sdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(psdiffs);
34 CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ptdiffs);
35 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
36 CommandParameter pallfiles("allfiles", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pallfiles);
37 CommandParameter pkeepforward("keepforward", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pkeepforward);
38 CommandParameter plogtransform("logtransform", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(plogtransform);
39 CommandParameter pqtrim("qtrim", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pqtrim);
40 CommandParameter pqthreshold("qthreshold", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pqthreshold);
41 CommandParameter pqaverage("qaverage", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pqaverage);
42 CommandParameter prollaverage("rollaverage", "Number", "", "0", "", "", "","",false,false); parameters.push_back(prollaverage);
43 CommandParameter pqwindowaverage("qwindowaverage", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pqwindowaverage);
44 CommandParameter pqstepsize("qstepsize", "Number", "", "1", "", "", "","",false,false); parameters.push_back(pqstepsize);
45 CommandParameter pqwindowsize("qwindowsize", "Number", "", "50", "", "", "","",false,false); parameters.push_back(pqwindowsize);
46 CommandParameter pkeepfirst("keepfirst", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pkeepfirst);
47 CommandParameter premovelast("removelast", "Number", "", "0", "", "", "","",false,false); parameters.push_back(premovelast);
48 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
49 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
51 vector<string> myArray;
52 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
56 m->errorOut(e, "TrimSeqsCommand", "setParameters");
60 //**********************************************************************************************************************
61 string TrimSeqsCommand::getHelpString(){
63 string helpString = "";
64 helpString += "The trim.seqs command reads a fastaFile and creates 2 new fasta files, .trim.fasta and scrap.fasta, as well as group files if you provide and oligos file.\n";
65 helpString += "The .trim.fasta contains sequences that meet your requirements, and the .scrap.fasta contains those which don't.\n";
66 helpString += "The trim.seqs command parameters are fasta, name, count, flip, checkorient, oligos, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim, keepfirst, removelast, logtransform and allfiles.\n";
67 helpString += "The fasta parameter is required.\n";
68 helpString += "The flip parameter will output the reverse compliment of your trimmed sequence. The default is false.\n";
69 helpString += "The checkorient parameter will check look for the reverse compliment of the barcode or primer in the sequence. If found the sequence is flipped. The default is false.\n";
70 helpString += "The oligos parameter allows you to provide an oligos file.\n";
71 helpString += "The name parameter allows you to provide a names file with your fasta file.\n";
72 helpString += "The count parameter allows you to provide a count file with your fasta file.\n";
73 helpString += "The maxambig parameter allows you to set the maximum number of ambigious bases allowed. The default is -1.\n";
74 helpString += "The maxhomop parameter allows you to set a maximum homopolymer length. \n";
75 helpString += "The minlength parameter allows you to set and minimum sequence length. \n";
76 helpString += "The maxlength parameter allows you to set and maximum sequence length. \n";
77 helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n";
78 helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";
79 helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
80 helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n";
81 helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n";
82 helpString += "The qfile parameter allows you to provide a quality file.\n";
83 helpString += "The qthreshold parameter allows you to set a minimum quality score allowed. \n";
84 helpString += "The qaverage parameter allows you to set a minimum average quality score allowed. \n";
85 helpString += "The qwindowsize parameter allows you to set a number of bases in a window. Default=50.\n";
86 helpString += "The qwindowaverage parameter allows you to set a minimum average quality score allowed over a window. \n";
87 helpString += "The rollaverage parameter allows you to set a minimum rolling average quality score allowed over a window. \n";
88 helpString += "The qstepsize parameter allows you to set a number of bases to move the window over. Default=1.\n";
89 helpString += "The logtransform parameter allows you to indicate you want the averages for the qwindowaverage, rollaverage and qaverage to be calculated using a logtransform. Default=F.\n";
90 helpString += "The allfiles parameter will create separate group and fasta file for each grouping. The default is F.\n";
91 helpString += "The keepforward parameter allows you to indicate whether you want the forward primer removed or not. The default is F, meaning remove the forward primer.\n";
92 helpString += "The qtrim parameter will trim sequence from the point that they fall below the qthreshold and put it in the .trim file if set to true. The default is T.\n";
93 helpString += "The keepfirst parameter trims the sequence to the first keepfirst number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements. \n";
94 helpString += "The removelast removes the last removelast number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements.\n";
95 helpString += "The trim.seqs command should be in the following format: \n";
96 helpString += "trim.seqs(fasta=yourFastaFile, flip=yourFlip, oligos=yourOligos, maxambig=yourMaxambig, \n";
97 helpString += "maxhomop=yourMaxhomop, minlength=youMinlength, maxlength=yourMaxlength) \n";
98 helpString += "Example trim.seqs(fasta=abrecovery.fasta, flip=..., oligos=..., maxambig=..., maxhomop=..., minlength=..., maxlength=...).\n";
99 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
100 helpString += "For more details please check out the wiki http://www.mothur.org/wiki/Trim.seqs .\n";
103 catch(exception& e) {
104 m->errorOut(e, "TrimSeqsCommand", "getHelpString");
108 //**********************************************************************************************************************
109 string TrimSeqsCommand::getOutputPattern(string type) {
113 if (type == "qfile") { pattern = "[filename],[tag],qual"; }
114 else if (type == "fasta") { pattern = "[filename],[tag],fasta"; }
115 else if (type == "group") { pattern = "[filename],groups"; }
116 else if (type == "name") { pattern = "[filename],[tag],names"; }
117 else if (type == "count") { pattern = "[filename],[tag],count_table-[filename],count_table"; }
118 else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
122 catch(exception& e) {
123 m->errorOut(e, "TrimSeqsCommand", "getOutputPattern");
127 //**********************************************************************************************************************
129 TrimSeqsCommand::TrimSeqsCommand(){
131 abort = true; calledHelp = true;
133 vector<string> tempOutNames;
134 outputTypes["fasta"] = tempOutNames;
135 outputTypes["qfile"] = tempOutNames;
136 outputTypes["group"] = tempOutNames;
137 outputTypes["name"] = tempOutNames;
138 outputTypes["count"] = tempOutNames;
140 catch(exception& e) {
141 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
145 //***************************************************************************************************************
147 TrimSeqsCommand::TrimSeqsCommand(string option) {
150 abort = false; calledHelp = false;
153 //allow user to run help
154 if(option == "help") { help(); abort = true; calledHelp = true; }
155 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
158 vector<string> myArray = setParameters();
160 OptionParser parser(option);
161 map<string,string> parameters = parser.getParameters();
163 ValidParameters validParameter;
164 map<string,string>::iterator it;
166 //check to make sure all parameters are valid for command
167 for (it = parameters.begin(); it != parameters.end(); it++) {
168 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
171 //initialize outputTypes
172 vector<string> tempOutNames;
173 outputTypes["fasta"] = tempOutNames;
174 outputTypes["qfile"] = tempOutNames;
175 outputTypes["group"] = tempOutNames;
176 outputTypes["name"] = tempOutNames;
177 outputTypes["count"] = tempOutNames;
179 //if the user changes the input directory command factory will send this info to us in the output parameter
180 string inputDir = validParameter.validFile(parameters, "inputdir", false);
181 if (inputDir == "not found"){ inputDir = ""; }
184 it = parameters.find("fasta");
185 //user has given a template file
186 if(it != parameters.end()){
187 path = m->hasPath(it->second);
188 //if the user has not given a path then, add inputdir. else leave path alone.
189 if (path == "") { parameters["fasta"] = inputDir + it->second; }
192 it = parameters.find("oligos");
193 //user has given a template file
194 if(it != parameters.end()){
195 path = m->hasPath(it->second);
196 //if the user has not given a path then, add inputdir. else leave path alone.
197 if (path == "") { parameters["oligos"] = inputDir + it->second; }
200 it = parameters.find("qfile");
201 //user has given a template file
202 if(it != parameters.end()){
203 path = m->hasPath(it->second);
204 //if the user has not given a path then, add inputdir. else leave path alone.
205 if (path == "") { parameters["qfile"] = inputDir + it->second; }
208 it = parameters.find("name");
209 //user has given a template file
210 if(it != parameters.end()){
211 path = m->hasPath(it->second);
212 //if the user has not given a path then, add inputdir. else leave path alone.
213 if (path == "") { parameters["name"] = inputDir + it->second; }
216 it = parameters.find("count");
217 //user has given a template file
218 if(it != parameters.end()){
219 path = m->hasPath(it->second);
220 //if the user has not given a path then, add inputdir. else leave path alone.
221 if (path == "") { parameters["count"] = inputDir + it->second; }
227 //check for required parameters
228 fastaFile = validParameter.validFile(parameters, "fasta", true);
229 if (fastaFile == "not found") {
230 fastaFile = m->getFastaFile();
231 if (fastaFile != "") { m->mothurOut("Using " + fastaFile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
232 else { m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
233 }else if (fastaFile == "not open") { abort = true; }
234 else { m->setFastaFile(fastaFile); }
236 //if the user changes the output directory command factory will send this info to us in the output parameter
237 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
239 outputDir += m->hasPath(fastaFile); //if user entered a file with a path then preserve it
243 //check for optional parameter and set defaults
244 // ...at some point should added some additional type checking...
246 temp = validParameter.validFile(parameters, "flip", false);
247 if (temp == "not found") { flip = 0; }
248 else { flip = m->isTrue(temp); }
250 temp = validParameter.validFile(parameters, "oligos", true);
251 if (temp == "not found"){ oligoFile = ""; }
252 else if(temp == "not open"){ abort = true; }
253 else { oligoFile = temp; m->setOligosFile(oligoFile); }
256 temp = validParameter.validFile(parameters, "maxambig", false); if (temp == "not found") { temp = "-1"; }
257 m->mothurConvert(temp, maxAmbig);
259 temp = validParameter.validFile(parameters, "maxhomop", false); if (temp == "not found") { temp = "0"; }
260 m->mothurConvert(temp, maxHomoP);
262 temp = validParameter.validFile(parameters, "minlength", false); if (temp == "not found") { temp = "1"; }
263 m->mothurConvert(temp, minLength);
265 temp = validParameter.validFile(parameters, "maxlength", false); if (temp == "not found") { temp = "0"; }
266 m->mothurConvert(temp, maxLength);
268 temp = validParameter.validFile(parameters, "bdiffs", false); if (temp == "not found") { temp = "0"; }
269 m->mothurConvert(temp, bdiffs);
271 temp = validParameter.validFile(parameters, "pdiffs", false); if (temp == "not found") { temp = "0"; }
272 m->mothurConvert(temp, pdiffs);
274 temp = validParameter.validFile(parameters, "ldiffs", false); if (temp == "not found") { temp = "0"; }
275 m->mothurConvert(temp, ldiffs);
277 temp = validParameter.validFile(parameters, "sdiffs", false); if (temp == "not found") { temp = "0"; }
278 m->mothurConvert(temp, sdiffs);
280 temp = validParameter.validFile(parameters, "tdiffs", false); if (temp == "not found") { int tempTotal = pdiffs + bdiffs + ldiffs + sdiffs; temp = toString(tempTotal); }
281 m->mothurConvert(temp, tdiffs);
283 if(tdiffs == 0){ tdiffs = bdiffs + pdiffs + ldiffs + sdiffs; }
285 temp = validParameter.validFile(parameters, "qfile", true);
286 if (temp == "not found") { qFileName = ""; }
287 else if(temp == "not open") { abort = true; }
288 else { qFileName = temp; m->setQualFile(qFileName); }
290 temp = validParameter.validFile(parameters, "name", true);
291 if (temp == "not found") { nameFile = ""; }
292 else if(temp == "not open") { nameFile = ""; abort = true; }
293 else { nameFile = temp; m->setNameFile(nameFile); }
295 countfile = validParameter.validFile(parameters, "count", true);
296 if (countfile == "not open") { abort = true; countfile = ""; }
297 else if (countfile == "not found") { countfile = ""; }
298 else { m->setCountTableFile(countfile); }
300 if ((countfile != "") && (nameFile != "")) { m->mothurOut("You must enter ONLY ONE of the following: count or name."); m->mothurOutEndLine(); abort = true; }
302 temp = validParameter.validFile(parameters, "qthreshold", false); if (temp == "not found") { temp = "0"; }
303 m->mothurConvert(temp, qThreshold);
305 temp = validParameter.validFile(parameters, "qtrim", false); if (temp == "not found") { temp = "t"; }
306 qtrim = m->isTrue(temp);
308 temp = validParameter.validFile(parameters, "rollaverage", false); if (temp == "not found") { temp = "0"; }
309 convert(temp, qRollAverage);
311 temp = validParameter.validFile(parameters, "qwindowaverage", false);if (temp == "not found") { temp = "0"; }
312 convert(temp, qWindowAverage);
314 temp = validParameter.validFile(parameters, "qwindowsize", false); if (temp == "not found") { temp = "50"; }
315 convert(temp, qWindowSize);
317 temp = validParameter.validFile(parameters, "qstepsize", false); if (temp == "not found") { temp = "1"; }
318 convert(temp, qWindowStep);
320 temp = validParameter.validFile(parameters, "qaverage", false); if (temp == "not found") { temp = "0"; }
321 convert(temp, qAverage);
323 temp = validParameter.validFile(parameters, "keepfirst", false); if (temp == "not found") { temp = "0"; }
324 convert(temp, keepFirst);
326 temp = validParameter.validFile(parameters, "removelast", false); if (temp == "not found") { temp = "0"; }
327 convert(temp, removeLast);
329 temp = validParameter.validFile(parameters, "allfiles", false); if (temp == "not found") { temp = "F"; }
330 allFiles = m->isTrue(temp);
332 temp = validParameter.validFile(parameters, "keepforward", false); if (temp == "not found") { temp = "F"; }
333 keepforward = m->isTrue(temp);
335 temp = validParameter.validFile(parameters, "logtransform", false); if (temp == "not found") { temp = "F"; }
336 logtransform = m->isTrue(temp);
338 temp = validParameter.validFile(parameters, "checkorient", false); if (temp == "not found") { temp = "F"; }
339 reorient = m->isTrue(temp);
341 temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
342 m->setProcessors(temp);
343 m->mothurConvert(temp, processors);
346 if(allFiles && (oligoFile == "")){
347 m->mothurOut("You selected allfiles, but didn't enter an oligos. Ignoring the allfiles request."); m->mothurOutEndLine();
349 if((qAverage != 0 && qThreshold != 0) && qFileName == ""){
350 m->mothurOut("You didn't provide a quality file name, quality criteria will be ignored."); m->mothurOutEndLine();
354 if(!flip && oligoFile=="" && !maxLength && !minLength && (maxAmbig==-1) && !maxHomoP && qFileName == ""){
355 m->mothurOut("You didn't set any options... quiting command."); m->mothurOutEndLine();
359 if (countfile == "") {
360 if (nameFile == "") {
361 vector<string> files; files.push_back(fastaFile);
362 parser.getNameFile(files);
368 catch(exception& e) {
369 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
373 //***************************************************************************************************************
375 int TrimSeqsCommand::execute(){
378 if (abort == true) { if (calledHelp) { return 0; } return 2; }
380 pairedOligos = false;
381 numFPrimers = 0; //this needs to be initialized
386 vector<vector<string> > fastaFileNames;
387 vector<vector<string> > qualFileNames;
388 vector<vector<string> > nameFileNames;
389 map<string, string> uniqueFastaNames;// so we don't add the same groupfile multiple times
391 map<string, string> variables;
392 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFile));
393 variables["[tag]"] = "trim";
394 string trimSeqFile = getOutputFileName("fasta",variables);
395 string trimQualFile = getOutputFileName("qfile",variables);
396 outputNames.push_back(trimSeqFile); outputTypes["fasta"].push_back(trimSeqFile);
398 variables["[tag]"] = "scrap";
399 string scrapSeqFile = getOutputFileName("fasta",variables);
400 string scrapQualFile = getOutputFileName("qfile",variables);
401 outputNames.push_back(scrapSeqFile); outputTypes["fasta"].push_back(scrapSeqFile);
403 if (qFileName != "") {
404 outputNames.push_back(trimQualFile);
405 outputNames.push_back(scrapQualFile);
406 outputTypes["qfile"].push_back(trimQualFile);
407 outputTypes["qfile"].push_back(scrapQualFile);
410 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(nameFile));
411 variables["[tag]"] = "trim";
412 string trimNameFile = getOutputFileName("name",variables);
413 variables["[tag]"] = "scrap";
414 string scrapNameFile = getOutputFileName("name",variables);
416 if (nameFile != "") {
417 m->readNames(nameFile, nameMap);
418 outputNames.push_back(trimNameFile);
419 outputNames.push_back(scrapNameFile);
420 outputTypes["name"].push_back(trimNameFile);
421 outputTypes["name"].push_back(scrapNameFile);
424 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(countfile));
425 variables["[tag]"] = "trim";
426 string trimCountFile = getOutputFileName("count",variables);
427 variables["[tag]"] = "scrap";
428 string scrapCountFile = getOutputFileName("count",variables);
430 if (countfile != "") {
432 ct.readTable(countfile, true, false);
433 nameCount = ct.getNameMap();
434 outputNames.push_back(trimCountFile);
435 outputNames.push_back(scrapCountFile);
436 outputTypes["count"].push_back(trimCountFile);
437 outputTypes["count"].push_back(scrapCountFile);
441 if (m->control_pressed) { return 0; }
443 string outputGroupFileName;
445 createGroup = getOligos(fastaFileNames, qualFileNames, nameFileNames, uniqueFastaNames);
446 if ((createGroup) && (countfile == "")){
447 map<string, string> myvariables;
448 myvariables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFile));
449 outputGroupFileName = getOutputFileName("group",myvariables);
450 outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName);
454 if (m->control_pressed) { return 0; }
456 //fills lines and qlines
457 setLines(fastaFile, qFileName);
460 driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, trimCountFile, scrapCountFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
462 createProcessesCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, trimCountFile, scrapCountFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames);
466 if (m->control_pressed) { return 0; }
469 map<string, string>::iterator it;
470 set<string> namesToRemove;
471 for(int i=0;i<fastaFileNames.size();i++){
472 for(int j=0;j<fastaFileNames[0].size();j++){
473 if (fastaFileNames[i][j] != "") {
474 if (namesToRemove.count(fastaFileNames[i][j]) == 0) {
475 if(m->isBlank(fastaFileNames[i][j])){
476 m->mothurRemove(fastaFileNames[i][j]);
477 namesToRemove.insert(fastaFileNames[i][j]);
480 m->mothurRemove(qualFileNames[i][j]);
481 namesToRemove.insert(qualFileNames[i][j]);
485 m->mothurRemove(nameFileNames[i][j]);
486 namesToRemove.insert(nameFileNames[i][j]);
488 uniqueFastaNames.erase(fastaFileNames[i][j]); //remove from list for group file print
495 //remove names for outputFileNames, just cleans up the output
496 vector<string> outputNames2;
497 for(int i = 0; i < outputNames.size(); i++) { if (namesToRemove.count(outputNames[i]) == 0) { outputNames2.push_back(outputNames[i]); } }
498 outputNames = outputNames2;
500 for (it = uniqueFastaNames.begin(); it != uniqueFastaNames.end(); it++) {
502 m->openInputFile(it->first, in);
505 map<string, string> myvariables;
506 myvariables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(it->first));
507 string thisGroupName = "";
508 if (countfile == "") { thisGroupName = getOutputFileName("group",myvariables); outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName); }
509 else { thisGroupName = getOutputFileName("count",myvariables); outputNames.push_back(thisGroupName); outputTypes["count"].push_back(thisGroupName); }
510 m->openOutputFile(thisGroupName, out);
512 if (countfile != "") { out << "Representative_Sequence\ttotal\t" << it->second << endl; }
515 if (m->control_pressed) { break; }
517 Sequence currSeq(in); m->gobble(in);
518 if (countfile == "") {
519 out << currSeq.getName() << '\t' << it->second << endl;
521 if (nameFile != "") {
522 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
523 if (itName != nameMap.end()) {
524 vector<string> thisSeqsNames;
525 m->splitAtChar(itName->second, thisSeqsNames, ',');
526 for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self
527 out << thisSeqsNames[k] << '\t' << it->second << endl;
529 }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
532 map<string, int>::iterator itTotalReps = nameCount.find(currSeq.getName());
533 if (itTotalReps != nameCount.end()) { out << currSeq.getName() << '\t' << itTotalReps->second << '\t' << itTotalReps->second << endl; }
534 else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your count file, please correct."); m->mothurOutEndLine(); }
541 if (countfile != "") { //create countfile with group info included
542 CountTable* ct = new CountTable();
543 ct->readTable(trimCountFile, true, false);
544 map<string, int> justTrimmedNames = ct->getNameMap();
548 for (map<string, int>::iterator itCount = groupCounts.begin(); itCount != groupCounts.end(); itCount++) { newCt.addGroup(itCount->first); }
549 vector<int> tempCounts; tempCounts.resize(groupCounts.size(), 0);
550 for (map<string, int>::iterator itNames = justTrimmedNames.begin(); itNames != justTrimmedNames.end(); itNames++) {
551 newCt.push_back(itNames->first, tempCounts); //add it to the table with no abundance so we can set the groups abundance
552 map<string, string>::iterator it2 = groupMap.find(itNames->first);
553 if (it2 != groupMap.end()) { newCt.setAbund(itNames->first, it2->second, itNames->second); }
554 else { m->mothurOut("[ERROR]: missing group info for " + itNames->first + "."); m->mothurOutEndLine(); m->control_pressed = true; }
556 newCt.printTable(trimCountFile);
560 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
562 //output group counts
563 m->mothurOutEndLine();
565 if (groupCounts.size() != 0) { m->mothurOut("Group count: \n"); }
566 for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
567 total += it->second; m->mothurOut(it->first + "\t" + toString(it->second)); m->mothurOutEndLine();
569 if (total != 0) { m->mothurOut("Total of all groups is " + toString(total)); m->mothurOutEndLine(); }
571 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
573 //set fasta file as new current fastafile
575 itTypes = outputTypes.find("fasta");
576 if (itTypes != outputTypes.end()) {
577 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
580 itTypes = outputTypes.find("name");
581 if (itTypes != outputTypes.end()) {
582 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
585 itTypes = outputTypes.find("qfile");
586 if (itTypes != outputTypes.end()) {
587 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); }
590 itTypes = outputTypes.find("group");
591 if (itTypes != outputTypes.end()) {
592 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
595 itTypes = outputTypes.find("count");
596 if (itTypes != outputTypes.end()) {
597 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
600 m->mothurOutEndLine();
601 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
602 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
603 m->mothurOutEndLine();
608 catch(exception& e) {
609 m->errorOut(e, "TrimSeqsCommand", "execute");
614 /**************************************************************************************/
615 int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string trimFileName, string scrapFileName, string trimQFileName, string scrapQFileName, string trimNFileName, string scrapNFileName, string trimCFileName, string scrapCFileName, string groupFileName, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames, linePair line, linePair qline) {
619 ofstream trimFASTAFile;
620 m->openOutputFile(trimFileName, trimFASTAFile);
622 ofstream scrapFASTAFile;
623 m->openOutputFile(scrapFileName, scrapFASTAFile);
625 ofstream trimQualFile;
626 ofstream scrapQualFile;
628 m->openOutputFile(trimQFileName, trimQualFile);
629 m->openOutputFile(scrapQFileName, scrapQualFile);
632 ofstream trimNameFile;
633 ofstream scrapNameFile;
635 m->openOutputFile(trimNFileName, trimNameFile);
636 m->openOutputFile(scrapNFileName, scrapNameFile);
639 ofstream trimCountFile;
640 ofstream scrapCountFile;
642 m->openOutputFile(trimCFileName, trimCountFile);
643 m->openOutputFile(scrapCFileName, scrapCountFile);
644 if (line.start == 0) { trimCountFile << "Representative_Sequence\ttotal" << endl; scrapCountFile << "Representative_Sequence\ttotal" << endl; }
647 ofstream outGroupsFile;
648 if ((createGroup) && (countfile == "")){ m->openOutputFile(groupFileName, outGroupsFile); }
650 for (int i = 0; i < fastaFileNames.size(); i++) { //clears old file
651 for (int j = 0; j < fastaFileNames[i].size(); j++) { //clears old file
652 if (fastaFileNames[i][j] != "") {
654 m->openOutputFile(fastaFileNames[i][j], temp); temp.close();
656 m->openOutputFile(qualFileNames[i][j], temp); temp.close();
660 m->openOutputFile(nameFileNames[i][j], temp); temp.close();
668 m->openInputFile(filename, inFASTA);
669 inFASTA.seekg(line.start);
672 if(qFileName != "") {
673 m->openInputFile(qFileName, qFile);
674 qFile.seekg(qline.start);
679 TrimOligos* trimOligos = NULL;
680 if (pairedOligos) { trimOligos = new TrimOligos(pdiffs, bdiffs, 0, 0, oligos.getPairedPrimers(), oligos.getPairedBarcodes()); }
681 else { trimOligos = new TrimOligos(pdiffs, bdiffs, ldiffs, sdiffs, oligos.getPrimers(), oligos.getBarcodes(), oligos.getReversePrimers(), oligos.getLinkers(), oligos.getSpacers()); }
683 TrimOligos* rtrimOligos = NULL;
685 rtrimOligos = new TrimOligos(pdiffs, bdiffs, 0, 0, oligos.getReorientedPairedPrimers(), oligos.getReorientedPairedBarcodes()); numBarcodes = oligos.getReorientedPairedBarcodes().size();
690 if (m->control_pressed) {
691 delete trimOligos; if (reorient) { delete rtrimOligos; }
692 inFASTA.close(); trimFASTAFile.close(); scrapFASTAFile.close();
693 if ((createGroup) && (countfile == "")) { outGroupsFile.close(); }
694 if(qFileName != "") { qFile.close(); scrapQualFile.close(); trimQualFile.close(); }
695 if(nameFile != "") { scrapNameFile.close(); trimNameFile.close(); }
696 if(countfile != "") { scrapCountFile.close(); trimCountFile.close(); }
697 for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0;
701 string trashCode = "";
702 int currentSeqsDiffs = 0;
704 Sequence currSeq(inFASTA); m->gobble(inFASTA);
705 //cout << currSeq.getName() << '\t' << currSeq.getUnaligned() << endl;
706 Sequence savedSeq(currSeq.getName(), currSeq.getAligned());
708 QualityScores currQual; QualityScores savedQual;
710 currQual = QualityScores(qFile); m->gobble(qFile);
711 savedQual.setName(currQual.getName()); savedQual.setScores(currQual.getScores());
712 //cout << currQual.getName() << endl;
715 string origSeq = currSeq.getUnaligned();
718 int barcodeIndex = 0;
722 success = trimOligos->stripLinker(currSeq, currQual);
723 if(success > ldiffs) { trashCode += 'k'; }
724 else{ currentSeqsDiffs += success; }
728 if(numBarcodes != 0){
729 success = trimOligos->stripBarcode(currSeq, currQual, barcodeIndex);
730 if(success > bdiffs) {
733 else{ currentSeqsDiffs += success; }
735 //cout << currSeq.getName() << '\t' << currSeq.getUnaligned() << endl;
737 success = trimOligos->stripSpacer(currSeq, currQual);
738 if(success > sdiffs) { trashCode += 's'; }
739 else{ currentSeqsDiffs += success; }
743 if(numFPrimers != 0){
744 success = trimOligos->stripForward(currSeq, currQual, primerIndex, keepforward);
745 if(success > pdiffs) {
748 else{ currentSeqsDiffs += success; }
751 if (currentSeqsDiffs > tdiffs) { trashCode += 't'; }
753 if(numRPrimers != 0){
754 success = trimOligos->stripReverse(currSeq, currQual);
755 if(!success) { trashCode += 'r'; }
758 if (reorient && (trashCode != "")) { //if you failed and want to check the reverse
760 string thisTrashCode = "";
761 int thisCurrentSeqsDiffs = 0;
763 int thisBarcodeIndex = 0;
764 int thisPrimerIndex = 0;
765 //cout << currSeq.getName() << '\t' << savedSeq.getUnaligned() << endl;
766 if(numBarcodes != 0){
767 thisSuccess = rtrimOligos->stripBarcode(savedSeq, savedQual, thisBarcodeIndex);
768 if(thisSuccess > bdiffs) { thisTrashCode += "b"; }
769 else{ thisCurrentSeqsDiffs += thisSuccess; }
771 //cout << currSeq.getName() << '\t' << savedSeq.getUnaligned() << endl;
772 if(numFPrimers != 0){
773 thisSuccess = rtrimOligos->stripForward(savedSeq, savedQual, thisPrimerIndex, keepforward);
774 if(thisSuccess > pdiffs) { thisTrashCode += "f"; }
775 else{ thisCurrentSeqsDiffs += thisSuccess; }
778 if (thisCurrentSeqsDiffs > tdiffs) { thisTrashCode += 't'; }
780 if (thisTrashCode == "") {
781 trashCode = thisTrashCode;
782 success = thisSuccess;
783 currentSeqsDiffs = thisCurrentSeqsDiffs;
784 barcodeIndex = thisBarcodeIndex;
785 primerIndex = thisPrimerIndex;
786 savedSeq.reverseComplement();
787 currSeq.setAligned(savedSeq.getAligned());
789 savedQual.flipQScores();
790 currQual.setScores(savedQual.getScores());
792 }else { trashCode += "(" + thisTrashCode + ")"; }
796 success = keepFirstTrim(currSeq, currQual);
800 success = removeLastTrim(currSeq, currQual);
801 if(!success) { trashCode += 'l'; }
806 int origLength = currSeq.getNumBases();
808 if(qThreshold != 0) { success = currQual.stripQualThreshold(currSeq, qThreshold); }
809 else if(qAverage != 0) { success = currQual.cullQualAverage(currSeq, qAverage, logtransform); }
810 else if(qRollAverage != 0) { success = currQual.stripQualRollingAverage(currSeq, qRollAverage, logtransform); }
811 else if(qWindowAverage != 0){ success = currQual.stripQualWindowAverage(currSeq, qWindowStep, qWindowSize, qWindowAverage, logtransform); }
812 else { success = 1; }
814 //you don't want to trim, if it fails above then scrap it
815 if ((!qtrim) && (origLength != currSeq.getNumBases())) { success = 0; }
817 if(!success) { trashCode += 'q'; }
820 if(minLength > 0 || maxLength > 0){
821 success = cullLength(currSeq);
822 if(!success) { trashCode += 'l'; }
825 success = cullHomoP(currSeq);
826 if(!success) { trashCode += 'h'; }
829 success = cullAmbigs(currSeq);
830 if(!success) { trashCode += 'n'; }
833 if(flip){ // should go last
834 currSeq.reverseComplement();
836 currQual.flipQScores();
840 if (m->debug) { m->mothurOut("[DEBUG]: " + currSeq.getName() + ", trashcode= " + trashCode); if (trashCode.length() != 0) { m->mothurOutEndLine(); } }
842 if(trashCode.length() == 0){
843 string thisGroup = "";
844 if (createGroup) { thisGroup = oligos.getGroupName(barcodeIndex, primerIndex); }
846 int pos = thisGroup.find("ignore");
847 if (pos == string::npos) {
848 currSeq.setAligned(currSeq.getUnaligned());
849 currSeq.printSequence(trimFASTAFile);
852 currQual.printQScores(trimQualFile);
857 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
858 if (itName != nameMap.end()) { trimNameFile << itName->first << '\t' << itName->second << endl; }
859 else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
862 int numRedundants = 0;
863 if (countfile != "") {
864 map<string, int>::iterator itCount = nameCount.find(currSeq.getName());
865 if (itCount != nameCount.end()) {
866 trimCountFile << itCount->first << '\t' << itCount->second << endl;
867 numRedundants = itCount->second-1;
868 }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your count file, please correct."); m->mothurOutEndLine(); }
872 if(numBarcodes != 0){
874 if (m->debug) { m->mothurOut(", group= " + thisGroup + "\n"); }
876 if (countfile == "") { outGroupsFile << currSeq.getName() << '\t' << thisGroup << endl; }
877 else { groupMap[currSeq.getName()] = thisGroup; }
879 if (nameFile != "") {
880 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
881 if (itName != nameMap.end()) {
882 vector<string> thisSeqsNames;
883 m->splitAtChar(itName->second, thisSeqsNames, ',');
884 numRedundants = thisSeqsNames.size()-1; //we already include ourselves below
885 for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self
886 outGroupsFile << thisSeqsNames[k] << '\t' << thisGroup << endl;
888 }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
891 map<string, int>::iterator it = groupCounts.find(thisGroup);
892 if (it == groupCounts.end()) { groupCounts[thisGroup] = 1 + numRedundants; }
893 else { groupCounts[it->first] += (1 + numRedundants); }
900 m->openOutputFileAppend(fastaFileNames[barcodeIndex][primerIndex], output);
901 currSeq.printSequence(output);
905 m->openOutputFileAppend(qualFileNames[barcodeIndex][primerIndex], output);
906 currQual.printQScores(output);
911 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
912 if (itName != nameMap.end()) {
913 m->openOutputFileAppend(nameFileNames[barcodeIndex][primerIndex], output);
914 output << itName->first << '\t' << itName->second << endl;
916 }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
922 if(nameFile != ""){ //needs to be before the currSeq name is changed
923 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
924 if (itName != nameMap.end()) { scrapNameFile << itName->first << '\t' << itName->second << endl; }
925 else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
927 if (countfile != "") {
928 map<string, int>::iterator itCount = nameCount.find(currSeq.getName());
929 if (itCount != nameCount.end()) {
930 trimCountFile << itCount->first << '\t' << itCount->second << endl;
931 }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your count file, please correct."); m->mothurOutEndLine(); }
934 currSeq.setName(currSeq.getName() + '|' + trashCode);
935 currSeq.setUnaligned(origSeq);
936 currSeq.setAligned(origSeq);
937 currSeq.printSequence(scrapFASTAFile);
939 currQual.printQScores(scrapQualFile);
945 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
946 unsigned long long pos = inFASTA.tellg();
947 if ((pos == -1) || (pos >= line.end)) { break; }
950 if (inFASTA.eof()) { break; }
954 if((count) % 1000 == 0){ m->mothurOut(toString(count)); m->mothurOutEndLine(); }
958 if((count) % 1000 != 0){ m->mothurOut(toString(count)); m->mothurOutEndLine(); }
961 if (reorient) { delete rtrimOligos; }
963 trimFASTAFile.close();
964 scrapFASTAFile.close();
965 if (createGroup) { outGroupsFile.close(); }
966 if(qFileName != "") { qFile.close(); scrapQualFile.close(); trimQualFile.close(); }
967 if(nameFile != "") { scrapNameFile.close(); trimNameFile.close(); }
968 if(countfile != "") { scrapCountFile.close(); trimCountFile.close(); }
972 catch(exception& e) {
973 m->errorOut(e, "TrimSeqsCommand", "driverCreateTrim");
978 /**************************************************************************************************/
980 int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName, string trimFASTAFileName, string scrapFASTAFileName, string trimQualFileName, string scrapQualFileName, string trimNameFileName, string scrapNameFileName, string trimCountFileName, string scrapCountFileName, string groupFile, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames) {
987 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
988 //loop through and create all the processes you want
989 while (process != processors) {
993 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
997 vector<vector<string> > tempFASTAFileNames = fastaFileNames;
998 vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
999 vector<vector<string> > tempNameFileNames = nameFileNames;
1004 for(int i=0;i<tempFASTAFileNames.size();i++){
1005 for(int j=0;j<tempFASTAFileNames[i].size();j++){
1006 if (tempFASTAFileNames[i][j] != "") {
1007 tempFASTAFileNames[i][j] += m->mothurGetpid(process) + ".temp";
1008 m->openOutputFile(tempFASTAFileNames[i][j], temp); temp.close();
1010 if(qFileName != ""){
1011 tempPrimerQualFileNames[i][j] += m->mothurGetpid(process) + ".temp";
1012 m->openOutputFile(tempPrimerQualFileNames[i][j], temp); temp.close();
1015 tempNameFileNames[i][j] += m->mothurGetpid(process) + ".temp";
1016 m->openOutputFile(tempNameFileNames[i][j], temp); temp.close();
1023 driverCreateTrim(filename,
1025 (trimFASTAFileName + m->mothurGetpid(process) + ".temp"),
1026 (scrapFASTAFileName + m->mothurGetpid(process) + ".temp"),
1027 (trimQualFileName + m->mothurGetpid(process) + ".temp"),
1028 (scrapQualFileName + m->mothurGetpid(process) + ".temp"),
1029 (trimNameFileName + m->mothurGetpid(process) + ".temp"),
1030 (scrapNameFileName + m->mothurGetpid(process) + ".temp"),
1031 (trimCountFileName + m->mothurGetpid(process) + ".temp"),
1032 (scrapCountFileName + m->mothurGetpid(process) + ".temp"),
1033 (groupFile + m->mothurGetpid(process) + ".temp"),
1035 tempPrimerQualFileNames,
1040 if (m->debug) { m->mothurOut("[DEBUG]: " + toString(lines[process].start) + '\t' + toString(qLines[process].start) + '\t' + m->mothurGetpid(process) + '\n'); }
1042 //pass groupCounts to parent
1045 string tempFile = filename + m->mothurGetpid(process) + ".num.temp";
1046 m->openOutputFile(tempFile, out);
1048 out << groupCounts.size() << endl;
1050 for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
1051 out << it->first << '\t' << it->second << endl;
1054 out << groupMap.size() << endl;
1055 for (map<string, string>::iterator it = groupMap.begin(); it != groupMap.end(); it++) {
1056 out << it->first << '\t' << it->second << endl;
1062 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
1063 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
1070 m->openOutputFile(trimFASTAFileName, temp); temp.close();
1071 m->openOutputFile(scrapFASTAFileName, temp); temp.close();
1072 if(qFileName != ""){
1073 m->openOutputFile(trimQualFileName, temp); temp.close();
1074 m->openOutputFile(scrapQualFileName, temp); temp.close();
1076 if (nameFile != "") {
1077 m->openOutputFile(trimNameFileName, temp); temp.close();
1078 m->openOutputFile(scrapNameFileName, temp); temp.close();
1080 if (countfile != "") {
1081 m->openOutputFile(trimCountFileName, temp); temp.close();
1082 m->openOutputFile(scrapCountFileName, temp); temp.close();
1085 driverCreateTrim(filename, qFileName, trimFASTAFileName, scrapFASTAFileName, trimQualFileName, scrapQualFileName, trimNameFileName, scrapNameFileName, trimCountFileName, scrapCountFileName, groupFile, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
1087 //force parent to wait until all the processes are done
1088 for (int i=0;i<processIDS.size();i++) {
1089 int temp = processIDS[i];
1093 //////////////////////////////////////////////////////////////////////////////////////////////////////
1094 //Windows version shared memory, so be careful when passing variables through the trimData struct.
1095 //Above fork() will clone, so memory is separate, but that's not the case with windows,
1096 //////////////////////////////////////////////////////////////////////////////////////////////////////
1098 vector<trimData*> pDataArray;
1099 DWORD dwThreadIdArray[processors-1];
1100 HANDLE hThreadArray[processors-1];
1102 //Create processor worker threads.
1103 for( int h=0; h<processors-1; h++){
1105 string extension = "";
1106 if (h != 0) { extension = toString(h) + ".temp"; processIDS.push_back(h); }
1107 vector<vector<string> > tempFASTAFileNames = fastaFileNames;
1108 vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
1109 vector<vector<string> > tempNameFileNames = nameFileNames;
1114 for(int i=0;i<tempFASTAFileNames.size();i++){
1115 for(int j=0;j<tempFASTAFileNames[i].size();j++){
1116 if (tempFASTAFileNames[i][j] != "") {
1117 tempFASTAFileNames[i][j] += extension;
1118 m->openOutputFile(tempFASTAFileNames[i][j], temp); temp.close();
1120 if(qFileName != ""){
1121 tempPrimerQualFileNames[i][j] += extension;
1122 m->openOutputFile(tempPrimerQualFileNames[i][j], temp); temp.close();
1125 tempNameFileNames[i][j] += extension;
1126 m->openOutputFile(tempNameFileNames[i][j], temp); temp.close();
1134 trimData* tempTrim = new trimData(filename,
1135 qFileName, nameFile, countfile,
1136 (trimFASTAFileName+extension),
1137 (scrapFASTAFileName+extension),
1138 (trimQualFileName+extension),
1139 (scrapQualFileName+extension),
1140 (trimNameFileName+extension),
1141 (scrapNameFileName+extension),
1142 (trimCountFileName+extension),
1143 (scrapCountFileName+extension),
1144 (groupFile+extension),
1146 tempPrimerQualFileNames,
1148 lines[h].start, lines[h].end, qLines[h].start, qLines[h].end, m,
1149 pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, oligoFile,
1150 createGroup, allFiles, keepforward, keepFirst, removeLast,
1151 qWindowStep, qWindowSize, qWindowAverage, qtrim, qThreshold, qAverage, qRollAverage, logtransform,
1152 minLength, maxAmbig, maxHomoP, maxLength, flip, reorient, nameMap, nameCount);
1153 pDataArray.push_back(tempTrim);
1155 hThreadArray[h] = CreateThread(NULL, 0, MyTrimThreadFunction, pDataArray[h], 0, &dwThreadIdArray[h]);
1160 m->openOutputFile(trimFASTAFileName, temp); temp.close();
1161 m->openOutputFile(scrapFASTAFileName, temp); temp.close();
1162 if(qFileName != ""){
1163 m->openOutputFile(trimQualFileName, temp); temp.close();
1164 m->openOutputFile(scrapQualFileName, temp); temp.close();
1166 if (nameFile != "") {
1167 m->openOutputFile(trimNameFileName, temp); temp.close();
1168 m->openOutputFile(scrapNameFileName, temp); temp.close();
1170 vector<vector<string> > tempFASTAFileNames = fastaFileNames;
1171 vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
1172 vector<vector<string> > tempNameFileNames = nameFileNames;
1175 string extension = toString(processors-1) + ".temp";
1176 for(int i=0;i<tempFASTAFileNames.size();i++){
1177 for(int j=0;j<tempFASTAFileNames[i].size();j++){
1178 if (tempFASTAFileNames[i][j] != "") {
1179 tempFASTAFileNames[i][j] += extension;
1180 m->openOutputFile(tempFASTAFileNames[i][j], temp); temp.close();
1182 if(qFileName != ""){
1183 tempPrimerQualFileNames[i][j] += extension;
1184 m->openOutputFile(tempPrimerQualFileNames[i][j], temp); temp.close();
1187 tempNameFileNames[i][j] += extension;
1188 m->openOutputFile(tempNameFileNames[i][j], temp); temp.close();
1195 driverCreateTrim(filename, qFileName, (trimFASTAFileName + toString(processors-1) + ".temp"), (scrapFASTAFileName + toString(processors-1) + ".temp"), (trimQualFileName + toString(processors-1) + ".temp"), (scrapQualFileName + toString(processors-1) + ".temp"), (trimNameFileName + toString(processors-1) + ".temp"), (scrapNameFileName + toString(processors-1) + ".temp"), (trimCountFileName + toString(processors-1) + ".temp"), (scrapCountFileName + toString(processors-1) + ".temp"), (groupFile + toString(processors-1) + ".temp"), tempFASTAFileNames, tempPrimerQualFileNames, tempNameFileNames, lines[processors-1], qLines[processors-1]);
1196 processIDS.push_back(processors-1);
1199 //Wait until all threads have terminated.
1200 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
1202 //Close all thread handles and free memory allocations.
1203 for(int i=0; i < pDataArray.size(); i++){
1204 if (pDataArray[i]->count != pDataArray[i]->lineEnd) {
1205 m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of " + toString(pDataArray[i]->lineEnd) + " sequences assigned to it, quitting. \n"); m->control_pressed = true;
1207 for (map<string, int>::iterator it = pDataArray[i]->groupCounts.begin(); it != pDataArray[i]->groupCounts.end(); it++) {
1208 map<string, int>::iterator it2 = groupCounts.find(it->first);
1209 if (it2 == groupCounts.end()) { groupCounts[it->first] = it->second; }
1210 else { groupCounts[it->first] += it->second; }
1212 for (map<string, string>::iterator it = pDataArray[i]->groupMap.begin(); it != pDataArray[i]->groupMap.end(); it++) {
1213 map<string, string>::iterator it2 = groupMap.find(it->first);
1214 if (it2 == groupMap.end()) { groupMap[it->first] = it->second; }
1215 else { m->mothurOut("[ERROR]: " + it->first + " is in your fasta file more than once. Sequence names must be unique. please correct.\n"); }
1217 CloseHandle(hThreadArray[i]);
1218 delete pDataArray[i];
1225 for(int i=0;i<processIDS.size();i++){
1227 m->mothurOut("Appending files from process " + toString(processIDS[i])); m->mothurOutEndLine();
1229 m->appendFiles((trimFASTAFileName + toString(processIDS[i]) + ".temp"), trimFASTAFileName);
1230 m->mothurRemove((trimFASTAFileName + toString(processIDS[i]) + ".temp"));
1231 m->appendFiles((scrapFASTAFileName + toString(processIDS[i]) + ".temp"), scrapFASTAFileName);
1232 m->mothurRemove((scrapFASTAFileName + toString(processIDS[i]) + ".temp"));
1234 if(qFileName != ""){
1235 m->appendFiles((trimQualFileName + toString(processIDS[i]) + ".temp"), trimQualFileName);
1236 m->mothurRemove((trimQualFileName + toString(processIDS[i]) + ".temp"));
1237 m->appendFiles((scrapQualFileName + toString(processIDS[i]) + ".temp"), scrapQualFileName);
1238 m->mothurRemove((scrapQualFileName + toString(processIDS[i]) + ".temp"));
1242 m->appendFiles((trimNameFileName + toString(processIDS[i]) + ".temp"), trimNameFileName);
1243 m->mothurRemove((trimNameFileName + toString(processIDS[i]) + ".temp"));
1244 m->appendFiles((scrapNameFileName + toString(processIDS[i]) + ".temp"), scrapNameFileName);
1245 m->mothurRemove((scrapNameFileName + toString(processIDS[i]) + ".temp"));
1248 if(countfile != ""){
1249 m->appendFiles((trimCountFileName + toString(processIDS[i]) + ".temp"), trimCountFileName);
1250 m->mothurRemove((trimCountFileName + toString(processIDS[i]) + ".temp"));
1251 m->appendFiles((scrapCountFileName + toString(processIDS[i]) + ".temp"), scrapCountFileName);
1252 m->mothurRemove((scrapCountFileName + toString(processIDS[i]) + ".temp"));
1255 if((createGroup)&&(countfile == "")){
1256 m->appendFiles((groupFile + toString(processIDS[i]) + ".temp"), groupFile);
1257 m->mothurRemove((groupFile + toString(processIDS[i]) + ".temp"));
1262 for(int j=0;j<fastaFileNames.size();j++){
1263 for(int k=0;k<fastaFileNames[j].size();k++){
1264 if (fastaFileNames[j][k] != "") {
1265 m->appendFiles((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"), fastaFileNames[j][k]);
1266 m->mothurRemove((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"));
1268 if(qFileName != ""){
1269 m->appendFiles((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"), qualFileNames[j][k]);
1270 m->mothurRemove((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"));
1274 m->appendFiles((nameFileNames[j][k] + toString(processIDS[i]) + ".temp"), nameFileNames[j][k]);
1275 m->mothurRemove((nameFileNames[j][k] + toString(processIDS[i]) + ".temp"));
1282 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1285 string tempFile = filename + toString(processIDS[i]) + ".num.temp";
1286 m->openInputFile(tempFile, in);
1290 in >> tempNum; m->gobble(in);
1293 for (int i = 0; i < tempNum; i++) {
1295 in >> group >> groupNum; m->gobble(in);
1297 map<string, int>::iterator it = groupCounts.find(group);
1298 if (it == groupCounts.end()) { groupCounts[group] = groupNum; }
1299 else { groupCounts[it->first] += groupNum; }
1302 in >> tempNum; m->gobble(in);
1304 for (int i = 0; i < tempNum; i++) {
1305 string group, seqName;
1306 in >> seqName >> group; m->gobble(in);
1308 map<string, string>::iterator it = groupMap.find(seqName);
1309 if (it == groupMap.end()) { groupMap[seqName] = group; }
1310 else { m->mothurOut("[ERROR]: " + seqName + " is in your fasta file more than once. Sequence names must be unique. please correct.\n"); }
1314 in.close(); m->mothurRemove(tempFile);
1321 catch(exception& e) {
1322 m->errorOut(e, "TrimSeqsCommand", "createProcessesCreateTrim");
1327 /**************************************************************************************************/
1329 int TrimSeqsCommand::setLines(string filename, string qfilename) {
1332 vector<unsigned long long> fastaFilePos;
1333 vector<unsigned long long> qfileFilePos;
1335 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1336 //set file positions for fasta file
1337 fastaFilePos = m->divideFile(filename, processors);
1339 //get name of first sequence in each chunk
1340 map<string, int> firstSeqNames;
1341 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
1343 m->openInputFile(filename, in);
1344 in.seekg(fastaFilePos[i]);
1347 firstSeqNames[temp.getName()] = i;
1352 if(qfilename != "") {
1353 //seach for filePos of each first name in the qfile and save in qfileFilePos
1355 m->openInputFile(qfilename, inQual);
1358 while(!inQual.eof()){
1359 input = m->getline(inQual);
1361 if (input.length() != 0) {
1362 if(input[0] == '>'){ //this is a sequence name line
1363 istringstream nameStream(input);
1365 string sname = ""; nameStream >> sname;
1366 sname = sname.substr(1);
1368 m->checkName(sname);
1370 map<string, int>::iterator it = firstSeqNames.find(sname);
1372 if(it != firstSeqNames.end()) { //this is the start of a new chunk
1373 unsigned long long pos = inQual.tellg();
1374 qfileFilePos.push_back(pos - input.length() - 1);
1375 firstSeqNames.erase(it);
1380 if (firstSeqNames.size() == 0) { break; }
1385 if (firstSeqNames.size() != 0) {
1386 for (map<string, int>::iterator it = firstSeqNames.begin(); it != firstSeqNames.end(); it++) {
1387 m->mothurOut(it->first + " is in your fasta file and not in your quality file, not using quality file."); m->mothurOutEndLine();
1393 //get last file position of qfile
1395 unsigned long long size;
1397 //get num bytes in file
1398 pFile = fopen (qfilename.c_str(),"rb");
1399 if (pFile==NULL) perror ("Error opening file");
1401 fseek (pFile, 0, SEEK_END);
1406 qfileFilePos.push_back(size);
1409 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
1410 if (m->debug) { m->mothurOut("[DEBUG]: " + toString(i) +'\t' + toString(fastaFilePos[i]) + '\t' + toString(fastaFilePos[i+1]) + '\n'); }
1411 lines.push_back(linePair(fastaFilePos[i], fastaFilePos[(i+1)]));
1412 if (qfilename != "") { qLines.push_back(linePair(qfileFilePos[i], qfileFilePos[(i+1)])); }
1414 if(qfilename == "") { qLines = lines; } //files with duds
1420 if (processors == 1) { //save time
1421 //fastaFilePos.push_back(0); qfileFilePos.push_back(0);
1422 //fastaFilePos.push_back(1000); qfileFilePos.push_back(1000);
1423 lines.push_back(linePair(0, 1000));
1424 if (qfilename != "") { qLines.push_back(linePair(0, 1000)); }
1426 int numFastaSeqs = 0;
1427 fastaFilePos = m->setFilePosFasta(filename, numFastaSeqs);
1428 if (fastaFilePos.size() < processors) { processors = fastaFilePos.size(); }
1430 if (qfilename != "") {
1431 int numQualSeqs = 0;
1432 qfileFilePos = m->setFilePosFasta(qfilename, numQualSeqs);
1434 if (numFastaSeqs != numQualSeqs) {
1435 m->mothurOut("[ERROR]: You have " + toString(numFastaSeqs) + " sequences in your fasta file, but " + toString(numQualSeqs) + " sequences in your quality file."); m->mothurOutEndLine(); m->control_pressed = true;
1439 //figure out how many sequences you have to process
1440 int numSeqsPerProcessor = numFastaSeqs / processors;
1441 for (int i = 0; i < processors; i++) {
1442 int startIndex = i * numSeqsPerProcessor;
1443 if(i == (processors - 1)){ numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor; }
1444 lines.push_back(linePair(fastaFilePos[startIndex], numSeqsPerProcessor));
1445 if (qfilename != "") { qLines.push_back(linePair(qfileFilePos[startIndex], numSeqsPerProcessor)); }
1448 if(qfilename == "") { qLines = lines; } //files with duds
1453 catch(exception& e) {
1454 m->errorOut(e, "TrimSeqsCommand", "setLines");
1459 //***************************************************************************************************************
1461 bool TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<vector<string> >& qualFileNames, vector<vector<string> >& nameFileNames, map<string, string>& fastaFile2Group){
1464 bool allBlank = false;
1465 oligos.read(oligoFile);
1467 if (m->control_pressed) { return false; } //error in reading oligos
1469 if (oligos.hasPairedBarcodes()) {
1470 pairedOligos = true;
1471 numFPrimers = oligos.getPairedPrimers().size();
1472 numBarcodes = oligos.getPairedBarcodes().size();
1474 pairedOligos = false;
1475 numFPrimers = oligos.getPrimers().size();
1476 numBarcodes = oligos.getBarcodes().size();
1479 numLinkers = oligos.getLinkers().size();
1480 numSpacers = oligos.getSpacers().size();
1481 numRPrimers = oligos.getReversePrimers().size();
1483 vector<string> groupNames = oligos.getGroupNames();
1484 if (groupNames.size() == 0) { allFiles = 0; allBlank = true; }
1487 fastaFileNames.resize(oligos.getBarcodeNames().size());
1488 for(int i=0;i<fastaFileNames.size();i++){
1489 for(int j=0;j<oligos.getPrimerNames().size();j++){ fastaFileNames[i].push_back(""); }
1492 if(qFileName != "") { qualFileNames = fastaFileNames; }
1493 if(nameFile != "") { nameFileNames = fastaFileNames; }
1497 set<string> uniqueNames; //used to cleanup outputFileNames
1499 map<int, oligosPair> barcodes = oligos.getPairedBarcodes();
1500 map<int, oligosPair> primers = oligos.getPairedPrimers();
1501 for(map<int, oligosPair>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
1502 for(map<int, oligosPair>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
1504 string primerName = oligos.getPrimerName(itPrimer->first);
1505 string barcodeName = oligos.getBarcodeName(itBar->first);
1507 if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing
1508 else if ((primerName == "") && (barcodeName == "")) { } //do nothing
1510 string comboGroupName = "";
1511 string fastaFileName = "";
1512 string qualFileName = "";
1513 string nameFileName = "";
1514 string countFileName = "";
1516 if(primerName == ""){
1517 comboGroupName = barcodeName;
1519 if(barcodeName == ""){
1520 comboGroupName = primerName;
1523 comboGroupName = barcodeName + "." + primerName;
1529 map<string, string> variables;
1530 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFile));
1531 variables["[tag]"] = comboGroupName;
1532 fastaFileName = getOutputFileName("fasta", variables);
1533 if (uniqueNames.count(fastaFileName) == 0) {
1534 outputNames.push_back(fastaFileName);
1535 outputTypes["fasta"].push_back(fastaFileName);
1536 uniqueNames.insert(fastaFileName);
1537 fastaFile2Group[fastaFileName] = comboGroupName;
1540 fastaFileNames[itBar->first][itPrimer->first] = fastaFileName;
1541 m->openOutputFile(fastaFileName, temp); temp.close();
1543 if(qFileName != ""){
1544 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(qFileName));
1545 qualFileName = getOutputFileName("qfile", variables);
1546 if (uniqueNames.count(qualFileName) == 0) {
1547 outputNames.push_back(qualFileName);
1548 outputTypes["qfile"].push_back(qualFileName);
1551 qualFileNames[itBar->first][itPrimer->first] = qualFileName;
1552 m->openOutputFile(qualFileName, temp); temp.close();
1556 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(nameFile));
1557 nameFileName = getOutputFileName("name", variables);
1558 if (uniqueNames.count(nameFileName) == 0) {
1559 outputNames.push_back(nameFileName);
1560 outputTypes["name"].push_back(nameFileName);
1563 nameFileNames[itBar->first][itPrimer->first] = nameFileName;
1564 m->openOutputFile(nameFileName, temp); temp.close();
1570 map<string, int> barcodes = oligos.getBarcodes() ;
1571 map<string, int> primers = oligos.getPrimers();
1572 for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
1573 for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
1575 string primerName = oligos.getPrimerName(itPrimer->second);
1576 string barcodeName = oligos.getBarcodeName(itBar->second);
1578 if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing
1579 else if ((primerName == "") && (barcodeName == "")) { } //do nothing
1581 string comboGroupName = "";
1582 string fastaFileName = "";
1583 string qualFileName = "";
1584 string nameFileName = "";
1585 string countFileName = "";
1587 if(primerName == ""){
1588 comboGroupName = barcodeName;
1590 if(barcodeName == ""){
1591 comboGroupName = primerName;
1594 comboGroupName = barcodeName + "." + primerName;
1601 map<string, string> variables;
1602 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFile));
1603 variables["[tag]"] = comboGroupName;
1604 fastaFileName = getOutputFileName("fasta", variables);
1605 if (uniqueNames.count(fastaFileName) == 0) {
1606 outputNames.push_back(fastaFileName);
1607 outputTypes["fasta"].push_back(fastaFileName);
1608 uniqueNames.insert(fastaFileName);
1609 fastaFile2Group[fastaFileName] = comboGroupName;
1612 fastaFileNames[itBar->second][itPrimer->second] = fastaFileName;
1613 m->openOutputFile(fastaFileName, temp); temp.close();
1615 if(qFileName != ""){
1616 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(qFileName));
1617 qualFileName = getOutputFileName("qfile", variables);
1618 if (uniqueNames.count(qualFileName) == 0) {
1619 outputNames.push_back(qualFileName);
1620 outputTypes["qfile"].push_back(qualFileName);
1623 qualFileNames[itBar->second][itPrimer->second] = qualFileName;
1624 m->openOutputFile(qualFileName, temp); temp.close();
1628 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(nameFile));
1629 nameFileName = getOutputFileName("name", variables);
1630 if (uniqueNames.count(nameFileName) == 0) {
1631 outputNames.push_back(nameFileName);
1632 outputTypes["name"].push_back(nameFileName);
1635 nameFileNames[itBar->second][itPrimer->second] = nameFileName;
1636 m->openOutputFile(nameFileName, temp); temp.close();
1648 m->mothurOut("[WARNING]: your oligos file does not contain any group names. mothur will not create a groupfile."); m->mothurOutEndLine();
1655 /*ifstream inOligos;
1656 m->openInputFile(oligoFile, inOligos);
1660 string type, oligo, roligo, group;
1661 bool hasPrimer = false; bool hasPairedBarcodes = false;
1663 int indexPrimer = 0;
1664 int indexBarcode = 0;
1665 int indexPairedPrimer = 0;
1666 int indexPairedBarcode = 0;
1667 set<string> uniquePrimers;
1668 set<string> uniqueBarcodes;
1670 while(!inOligos.eof()){
1674 if (m->debug) { m->mothurOut("[DEBUG]: reading type - " + type + ".\n"); }
1677 while (!inOligos.eof()) { char c = inOligos.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there
1678 m->gobble(inOligos);
1681 m->gobble(inOligos);
1682 //make type case insensitive
1683 for(int i=0;i<type.length();i++){ type[i] = toupper(type[i]); }
1687 if (m->debug) { m->mothurOut("[DEBUG]: reading - " + oligo + ".\n"); }
1689 for(int i=0;i<oligo.length();i++){
1690 oligo[i] = toupper(oligo[i]);
1691 if(oligo[i] == 'U') { oligo[i] = 'T'; }
1694 if(type == "FORWARD"){
1697 // get rest of line in case there is a primer name
1698 while (!inOligos.eof()) {
1699 char c = inOligos.get();
1700 if (c == 10 || c == 13 || c == -1){ break; }
1701 else if (c == 32 || c == 9){;} //space or tab
1702 else { group += c; }
1705 //check for repeat barcodes
1706 map<string, int>::iterator itPrime = primers.find(oligo);
1707 if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); }
1709 if (m->debug) { if (group != "") { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); }else{ m->mothurOut("[DEBUG]: no group for primer " + oligo + ".\n"); } }
1711 primers[oligo]=indexPrimer; indexPrimer++;
1712 primerNameVector.push_back(group);
1714 else if (type == "PRIMER"){
1715 m->gobble(inOligos);
1719 for(int i=0;i<roligo.length();i++){
1720 roligo[i] = toupper(roligo[i]);
1721 if(roligo[i] == 'U') { roligo[i] = 'T'; }
1723 roligo = reverseOligo(roligo);
1727 // get rest of line in case there is a primer name
1728 while (!inOligos.eof()) {
1729 char c = inOligos.get();
1730 if (c == 10 || c == 13 || c == -1){ break; }
1731 else if (c == 32 || c == 9){;} //space or tab
1732 else { group += c; }
1735 oligosPair newPrimer(oligo, roligo);
1737 if (m->debug) { m->mothurOut("[DEBUG]: primer pair " + newPrimer.forward + " " + newPrimer.reverse + ", and group = " + group + ".\n"); }
1739 //check for repeat barcodes
1740 string tempPair = oligo+roligo;
1741 if (uniquePrimers.count(tempPair) != 0) { m->mothurOut("primer pair " + newPrimer.forward + " " + newPrimer.reverse + " is in your oligos file already."); m->mothurOutEndLine(); }
1742 else { uniquePrimers.insert(tempPair); }
1744 if (m->debug) { if (group != "") { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); }else{ m->mothurOut("[DEBUG]: no group for primer pair " + newPrimer.forward + " " + newPrimer.reverse + ".\n"); } }
1746 pairedPrimers[indexPairedPrimer]=newPrimer; indexPairedPrimer++;
1747 primerNameVector.push_back(group);
1750 else if(type == "REVERSE"){
1751 //Sequence oligoRC("reverse", oligo);
1752 //oligoRC.reverseComplement();
1753 string oligoRC = reverseOligo(oligo);
1754 revPrimer.push_back(oligoRC);
1756 else if(type == "BARCODE"){
1759 //barcode lines can look like BARCODE atgcatgc groupName - for 454 seqs
1760 //or BARCODE atgcatgc atgcatgc groupName - for illumina data that has forward and reverse info
1763 while (!inOligos.eof()) {
1764 char c = inOligos.get();
1765 if (c == 10 || c == 13 || c == -1){ break; }
1766 else if (c == 32 || c == 9){;} //space or tab
1770 //then this is illumina data with 4 columns
1772 hasPairedBarcodes = true;
1773 string reverseBarcode = group; //reverseOligo(group); //reverse barcode
1776 for(int i=0;i<reverseBarcode.length();i++){
1777 reverseBarcode[i] = toupper(reverseBarcode[i]);
1778 if(reverseBarcode[i] == 'U') { reverseBarcode[i] = 'T'; }
1781 reverseBarcode = reverseOligo(reverseBarcode);
1782 oligosPair newPair(oligo, reverseBarcode);
1784 if (m->debug) { m->mothurOut("[DEBUG]: barcode pair " + newPair.forward + " " + newPair.reverse + ", and group = " + group + ".\n"); }
1786 //check for repeat barcodes
1787 string tempPair = oligo+reverseBarcode;
1788 if (uniqueBarcodes.count(tempPair) != 0) { m->mothurOut("barcode pair " + newPair.forward + " " + newPair.reverse + " is in your oligos file already, disregarding."); m->mothurOutEndLine(); }
1789 else { uniqueBarcodes.insert(tempPair); }
1791 pairedBarcodes[indexPairedBarcode]=newPair; indexPairedBarcode++;
1792 barcodeNameVector.push_back(group);
1794 //check for repeat barcodes
1795 map<string, int>::iterator itBar = barcodes.find(oligo);
1796 if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); }
1798 barcodes[oligo]=indexBarcode; indexBarcode++;
1799 barcodeNameVector.push_back(group);
1801 }else if(type == "LINKER"){
1802 linker.push_back(oligo);
1803 }else if(type == "SPACER"){
1804 spacer.push_back(oligo);
1806 else{ m->mothurOut("[WARNING]: " + type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); }
1808 m->gobble(inOligos);
1812 if (hasPairedBarcodes || hasPrimer) {
1813 pairedOligos = true;
1814 if ((primers.size() != 0) || (barcodes.size() != 0) || (linker.size() != 0) || (spacer.size() != 0) || (revPrimer.size() != 0)) { m->control_pressed = true; m->mothurOut("[ERROR]: cannot mix paired primers and barcodes with non paired or linkers and spacers, quitting."); m->mothurOutEndLine(); return 0; }
1817 if(barcodeNameVector.size() == 0 && primerNameVector[0] == ""){ allFiles = 0; }
1819 //add in potential combos
1820 if(barcodeNameVector.size() == 0){
1822 barcodeNameVector.push_back("");
1825 if(primerNameVector.size() == 0){
1827 primerNameVector.push_back("");
1830 fastaFileNames.resize(barcodeNameVector.size());
1831 for(int i=0;i<fastaFileNames.size();i++){
1832 fastaFileNames[i].assign(primerNameVector.size(), "");
1834 if(qFileName != "") { qualFileNames = fastaFileNames; }
1835 if(nameFile != "") { nameFileNames = fastaFileNames; }
1838 set<string> uniqueNames; //used to cleanup outputFileNames
1840 for(map<int, oligosPair>::iterator itBar = pairedBarcodes.begin();itBar != pairedBarcodes.end();itBar++){
1841 for(map<int, oligosPair>::iterator itPrimer = pairedPrimers.begin();itPrimer != pairedPrimers.end(); itPrimer++){
1843 string primerName = primerNameVector[itPrimer->first];
1844 string barcodeName = barcodeNameVector[itBar->first];
1846 if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing
1848 string comboGroupName = "";
1849 string fastaFileName = "";
1850 string qualFileName = "";
1851 string nameFileName = "";
1852 string countFileName = "";
1854 if(primerName == ""){
1855 comboGroupName = barcodeNameVector[itBar->first];
1858 if(barcodeName == ""){
1859 comboGroupName = primerNameVector[itPrimer->first];
1862 comboGroupName = barcodeNameVector[itBar->first] + "." + primerNameVector[itPrimer->first];
1868 map<string, string> variables;
1869 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFile));
1870 variables["[tag]"] = comboGroupName;
1871 fastaFileName = getOutputFileName("fasta", variables);
1872 if (uniqueNames.count(fastaFileName) == 0) {
1873 outputNames.push_back(fastaFileName);
1874 outputTypes["fasta"].push_back(fastaFileName);
1875 uniqueNames.insert(fastaFileName);
1878 fastaFileNames[itBar->first][itPrimer->first] = fastaFileName;
1879 m->openOutputFile(fastaFileName, temp); temp.close();
1881 if(qFileName != ""){
1882 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(qFileName));
1883 qualFileName = getOutputFileName("qfile", variables);
1884 if (uniqueNames.count(qualFileName) == 0) {
1885 outputNames.push_back(qualFileName);
1886 outputTypes["qfile"].push_back(qualFileName);
1889 qualFileNames[itBar->first][itPrimer->first] = qualFileName;
1890 m->openOutputFile(qualFileName, temp); temp.close();
1894 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(nameFile));
1895 nameFileName = getOutputFileName("name", variables);
1896 if (uniqueNames.count(nameFileName) == 0) {
1897 outputNames.push_back(nameFileName);
1898 outputTypes["name"].push_back(nameFileName);
1901 nameFileNames[itBar->first][itPrimer->first] = nameFileName;
1902 m->openOutputFile(nameFileName, temp); temp.close();
1908 for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
1909 for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
1911 string primerName = primerNameVector[itPrimer->second];
1912 string barcodeName = barcodeNameVector[itBar->second];
1914 if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing
1916 string comboGroupName = "";
1917 string fastaFileName = "";
1918 string qualFileName = "";
1919 string nameFileName = "";
1920 string countFileName = "";
1922 if(primerName == ""){
1923 comboGroupName = barcodeNameVector[itBar->second];
1926 if(barcodeName == ""){
1927 comboGroupName = primerNameVector[itPrimer->second];
1930 comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second];
1936 map<string, string> variables;
1937 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFile));
1938 variables["[tag]"] = comboGroupName;
1939 fastaFileName = getOutputFileName("fasta", variables);
1940 if (uniqueNames.count(fastaFileName) == 0) {
1941 outputNames.push_back(fastaFileName);
1942 outputTypes["fasta"].push_back(fastaFileName);
1943 uniqueNames.insert(fastaFileName);
1946 fastaFileNames[itBar->second][itPrimer->second] = fastaFileName;
1947 m->openOutputFile(fastaFileName, temp); temp.close();
1949 if(qFileName != ""){
1950 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(qFileName));
1951 qualFileName = getOutputFileName("qfile", variables);
1952 if (uniqueNames.count(qualFileName) == 0) {
1953 outputNames.push_back(qualFileName);
1954 outputTypes["qfile"].push_back(qualFileName);
1957 qualFileNames[itBar->second][itPrimer->second] = qualFileName;
1958 m->openOutputFile(qualFileName, temp); temp.close();
1962 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(nameFile));
1963 nameFileName = getOutputFileName("name", variables);
1964 if (uniqueNames.count(nameFileName) == 0) {
1965 outputNames.push_back(nameFileName);
1966 outputTypes["name"].push_back(nameFileName);
1969 nameFileNames[itBar->second][itPrimer->second] = nameFileName;
1970 m->openOutputFile(nameFileName, temp); temp.close();
1981 catch(exception& e) {
1982 m->errorOut(e, "TrimSeqsCommand", "getOligos");
1986 //***************************************************************************************************************
1988 bool TrimSeqsCommand::keepFirstTrim(Sequence& sequence, QualityScores& qscores){
1991 if(qscores.getName() != ""){
1992 qscores.trimQScores(-1, keepFirst);
1995 // sequence.printSequence(cout);cout << endl;
1997 sequence.trim(keepFirst);
1999 // sequence.printSequence(cout);cout << endl << endl;;
2003 catch(exception& e) {
2004 m->errorOut(e, "keepFirstTrim", "countDiffs");
2010 //***************************************************************************************************************
2012 bool TrimSeqsCommand::removeLastTrim(Sequence& sequence, QualityScores& qscores){
2016 int length = sequence.getNumBases() - removeLast;
2019 if(qscores.getName() != ""){
2020 qscores.trimQScores(-1, length);
2022 sequence.trim(length);
2031 catch(exception& e) {
2032 m->errorOut(e, "removeLastTrim", "countDiffs");
2038 //***************************************************************************************************************
2040 bool TrimSeqsCommand::cullLength(Sequence& seq){
2043 int length = seq.getNumBases();
2044 bool success = 0; //guilty until proven innocent
2046 if(length >= minLength && maxLength == 0) { success = 1; }
2047 else if(length >= minLength && length <= maxLength) { success = 1; }
2048 else { success = 0; }
2053 catch(exception& e) {
2054 m->errorOut(e, "TrimSeqsCommand", "cullLength");
2060 //***************************************************************************************************************
2062 bool TrimSeqsCommand::cullHomoP(Sequence& seq){
2064 int longHomoP = seq.getLongHomoPolymer();
2065 bool success = 0; //guilty until proven innocent
2067 if(longHomoP <= maxHomoP){ success = 1; }
2068 else { success = 0; }
2072 catch(exception& e) {
2073 m->errorOut(e, "TrimSeqsCommand", "cullHomoP");
2079 //***************************************************************************************************************
2081 bool TrimSeqsCommand::cullAmbigs(Sequence& seq){
2083 int numNs = seq.getAmbigBases();
2084 bool success = 0; //guilty until proven innocent
2086 if(numNs <= maxAmbig) { success = 1; }
2087 else { success = 0; }
2091 catch(exception& e) {
2092 m->errorOut(e, "TrimSeqsCommand", "cullAmbigs");
2097 //***************************************************************************************************************