5 // Created by Sarah Westcott on 3/14/12.
6 // Copyright (c) 2012 Schloss Lab. All rights reserved.
9 #include "pcrseqscommand.h"
11 //**********************************************************************************************************************
12 vector<string> PcrSeqsCommand::setParameters(){
14 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
15 CommandParameter poligos("oligos", "InputTypes", "", "", "ecolioligos", "none", "none",false,false); parameters.push_back(poligos);
16 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
17 CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
18 CommandParameter ptax("taxonomy", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(ptax);
19 CommandParameter pecoli("ecoli", "InputTypes", "", "", "ecolioligos", "none", "none",false,false); parameters.push_back(pecoli);
20 CommandParameter pstart("start", "Number", "", "-1", "", "", "",false,false); parameters.push_back(pstart);
21 CommandParameter pend("end", "Number", "", "-1", "", "", "",false,false); parameters.push_back(pend);
22 CommandParameter pnomatch("nomatch", "Multiple", "reject-keep", "reject", "", "", "",false,false); parameters.push_back(pnomatch);
23 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
24 CommandParameter pkeepprimer("keepprimer", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pkeepprimer);
25 CommandParameter pkeepdots("keepdots", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pkeepdots);
26 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
27 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
29 vector<string> myArray;
30 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
34 m->errorOut(e, "PcrSeqsCommand", "setParameters");
38 //**********************************************************************************************************************
39 string PcrSeqsCommand::getHelpString(){
41 string helpString = "";
42 helpString += "The pcr.seqs command reads a fasta file.\n";
43 helpString += "The pcr.seqs command parameters are fasta, oligos, name, group, taxonomy, ecoli, start, end, nomatch, processors, keepprimer and keepdots.\n";
44 helpString += "The ecoli parameter is used to provide a fasta file containing a single reference sequence (e.g. for e. coli) this must be aligned. Mothur will trim to the start and end positions of the reference sequence.\n";
45 helpString += "The start parameter allows you to provide a starting position to trim to.\n";
46 helpString += "The end parameter allows you to provide a ending position to trim from.\n";
47 helpString += "The nomatch parameter allows you to decide what to do with sequences where the primer is not found. Default=reject, meaning remove from fasta file. if nomatch=true, then do nothing to sequence.\n";
48 helpString += "The processors parameter allows you to use multiple processors.\n";
49 helpString += "The keepprimer parameter allows you to keep the primer, default=false.\n";
50 helpString += "The keepdots parameter allows you to keep the leading and trailing .'s, default=true.\n";
51 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
52 helpString += "For more details please check out the wiki http://www.mothur.org/wiki/Pcr.seqs .\n";
56 m->errorOut(e, "PcrSeqsCommand", "getHelpString");
62 //**********************************************************************************************************************
64 PcrSeqsCommand::PcrSeqsCommand(){
66 abort = true; calledHelp = true;
68 vector<string> tempOutNames;
69 outputTypes["fasta"] = tempOutNames;
70 outputTypes["taxonomy"] = tempOutNames;
71 outputTypes["group"] = tempOutNames;
72 outputTypes["name"] = tempOutNames;
73 outputTypes["accnos"] = tempOutNames;
76 m->errorOut(e, "PcrSeqsCommand", "PcrSeqsCommand");
80 //***************************************************************************************************************
82 PcrSeqsCommand::PcrSeqsCommand(string option) {
85 abort = false; calledHelp = false;
87 //allow user to run help
88 if(option == "help") { help(); abort = true; calledHelp = true; }
89 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
92 vector<string> myArray = setParameters();
94 OptionParser parser(option);
95 map<string,string> parameters = parser.getParameters();
97 ValidParameters validParameter;
98 map<string,string>::iterator it;
100 //check to make sure all parameters are valid for command
101 for (it = parameters.begin(); it != parameters.end(); it++) {
102 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
105 //initialize outputTypes
106 vector<string> tempOutNames;
107 outputTypes["fasta"] = tempOutNames;
108 outputTypes["taxonomy"] = tempOutNames;
109 outputTypes["group"] = tempOutNames;
110 outputTypes["name"] = tempOutNames;
111 outputTypes["accnos"] = tempOutNames;
113 //if the user changes the input directory command factory will send this info to us in the output parameter
114 string inputDir = validParameter.validFile(parameters, "inputdir", false);
115 if (inputDir == "not found"){ inputDir = ""; }
118 it = parameters.find("fasta");
119 //user has given a template file
120 if(it != parameters.end()){
121 path = m->hasPath(it->second);
122 //if the user has not given a path then, add inputdir. else leave path alone.
123 if (path == "") { parameters["fasta"] = inputDir + it->second; }
126 it = parameters.find("oligos");
127 //user has given a template file
128 if(it != parameters.end()){
129 path = m->hasPath(it->second);
130 //if the user has not given a path then, add inputdir. else leave path alone.
131 if (path == "") { parameters["oligos"] = inputDir + it->second; }
134 it = parameters.find("ecoli");
135 //user has given a template file
136 if(it != parameters.end()){
137 path = m->hasPath(it->second);
138 //if the user has not given a path then, add inputdir. else leave path alone.
139 if (path == "") { parameters["ecoli"] = inputDir + it->second; }
142 it = parameters.find("taxonomy");
143 //user has given a template file
144 if(it != parameters.end()){
145 path = m->hasPath(it->second);
146 //if the user has not given a path then, add inputdir. else leave path alone.
147 if (path == "") { parameters["taxonomy"] = inputDir + it->second; }
150 it = parameters.find("name");
151 //user has given a template file
152 if(it != parameters.end()){
153 path = m->hasPath(it->second);
154 //if the user has not given a path then, add inputdir. else leave path alone.
155 if (path == "") { parameters["name"] = inputDir + it->second; }
158 it = parameters.find("group");
159 //user has given a template file
160 if(it != parameters.end()){
161 path = m->hasPath(it->second);
162 //if the user has not given a path then, add inputdir. else leave path alone.
163 if (path == "") { parameters["group"] = inputDir + it->second; }
169 //check for required parameters
170 fastafile = validParameter.validFile(parameters, "fasta", true);
171 if (fastafile == "not found") {
172 fastafile = m->getFastaFile();
173 if (fastafile != "") { m->mothurOut("Using " + fastafile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
174 else { m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
175 }else if (fastafile == "not open") { fastafile = ""; abort = true; }
176 else { m->setFastaFile(fastafile); }
178 //if the user changes the output directory command factory will send this info to us in the output parameter
179 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = m->hasPath(fastafile); }
181 //check for optional parameter and set defaults
182 // ...at some point should added some additional type checking...
184 temp = validParameter.validFile(parameters, "keepprimer", false); if (temp == "not found") { temp = "f"; }
185 keepprimer = m->isTrue(temp);
187 temp = validParameter.validFile(parameters, "keepdots", false); if (temp == "not found") { temp = "t"; }
188 keepdots = m->isTrue(temp);
190 temp = validParameter.validFile(parameters, "oligos", true);
191 if (temp == "not found"){ oligosfile = ""; }
192 else if(temp == "not open"){ oligosfile = ""; abort = true; }
193 else { oligosfile = temp; m->setOligosFile(oligosfile); }
195 ecolifile = validParameter.validFile(parameters, "ecoli", true);
196 if (ecolifile == "not found"){ ecolifile = ""; }
197 else if(ecolifile == "not open"){ ecolifile = ""; abort = true; }
199 namefile = validParameter.validFile(parameters, "name", true);
200 if (namefile == "not found"){ namefile = ""; }
201 else if(namefile == "not open"){ namefile = ""; abort = true; }
202 else { m->setNameFile(namefile); }
204 groupfile = validParameter.validFile(parameters, "group", true);
205 if (groupfile == "not found"){ groupfile = ""; }
206 else if(groupfile == "not open"){ groupfile = ""; abort = true; }
207 else { m->setGroupFile(groupfile); }
209 taxfile = validParameter.validFile(parameters, "taxonomy", true);
210 if (taxfile == "not found"){ taxfile = ""; }
211 else if(taxfile == "not open"){ taxfile = ""; abort = true; }
212 else { m->setTaxonomyFile(taxfile); }
214 temp = validParameter.validFile(parameters, "start", false); if (temp == "not found") { temp = "-1"; }
215 m->mothurConvert(temp, start);
217 temp = validParameter.validFile(parameters, "end", false); if (temp == "not found") { temp = "-1"; }
218 m->mothurConvert(temp, end);
220 temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
221 m->setProcessors(temp);
222 m->mothurConvert(temp, processors);
224 nomatch = validParameter.validFile(parameters, "nomatch", false); if (nomatch == "not found") { nomatch = "reject"; }
226 if ((nomatch != "reject") && (nomatch != "keep")) { m->mothurOut("[ERROR]: " + nomatch + " is not a valid entry for nomatch. Choices are reject and keep.\n"); abort = true; }
229 if ((oligosfile == "") && (ecolifile == "") && (start == -1) && (end == -1)) {
230 m->mothurOut("[ERROR]: You did not set any options. Please provide an oligos or ecoli file, or set start or end.\n"); abort = true;
233 if ((oligosfile == "") && (ecolifile == "") && (start < 0) && (end == -1)) { m->mothurOut("[ERROR]: Invalid start value.\n"); abort = true; }
235 if ((ecolifile != "") && (start != -1) && (end != -1)) {
236 m->mothurOut("[ERROR]: You provided an ecoli file , but set the start or end parameters. Unsure what you intend. When you provide the ecoli file, mothur thinks you want to use the start and end of the sequence in the ecoli file.\n"); abort = true;
240 if ((oligosfile != "") && (ecolifile != "")) {
241 m->mothurOut("[ERROR]: You can not use an ecoli file at the same time as an oligos file.\n"); abort = true;
244 //check to make sure you didn't forget the name file by mistake
245 if (namefile == "") {
246 vector<string> files; files.push_back(fastafile);
247 parser.getNameFile(files);
252 catch(exception& e) {
253 m->errorOut(e, "PcrSeqsCommand", "PcrSeqsCommand");
257 //***************************************************************************************************************
259 int PcrSeqsCommand::execute(){
262 if (abort == true) { if (calledHelp) { return 0; } return 2; }
264 int start = time(NULL);
266 string thisOutputDir = outputDir;
267 if (outputDir == "") { thisOutputDir += m->hasPath(fastafile); }
268 string trimSeqFile = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + "pcr.fasta";
269 outputNames.push_back(trimSeqFile); outputTypes["fasta"].push_back(trimSeqFile);
271 string badSeqFile = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + "pcr.scrap.fasta";
275 if(oligosfile != ""){ readOligos(); } if (m->control_pressed) { return 0; }
276 if(ecolifile != "") { readEcoli(); } if (m->control_pressed) { return 0; }
278 vector<unsigned long long> positions;
279 int numFastaSeqs = 0;
280 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
281 positions = m->divideFile(fastafile, processors);
282 for (int i = 0; i < (positions.size()-1); i++) { lines.push_back(linePair(positions[i], positions[(i+1)])); }
284 if (processors == 1) {
285 lines.push_back(linePair(0, 1000));
287 positions = m->setFilePosFasta(fastafile, numFastaSeqs);
288 if (positions.size() < processors) { processors = positions.size(); }
290 //figure out how many sequences you have to process
291 int numSeqsPerProcessor = numFastaSeqs / processors;
292 for (int i = 0; i < processors; i++) {
293 int startIndex = i * numSeqsPerProcessor;
294 if(i == (processors - 1)){ numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor; }
295 lines.push_back(linePair(positions[startIndex], numSeqsPerProcessor));
299 if (m->control_pressed) { return 0; }
301 set<string> badNames;
302 if(processors == 1) { numFastaSeqs = driverPcr(fastafile, trimSeqFile, badSeqFile, badNames, lines[0]); }
303 else { numFastaSeqs = createProcesses(fastafile, trimSeqFile, badSeqFile, badNames); }
305 if (m->control_pressed) { return 0; }
307 //don't write or keep if blank
308 if (badNames.size() != 0) { writeAccnos(badNames); }
309 if (m->isBlank(badSeqFile)) { m->mothurRemove(badSeqFile); }
310 else { outputNames.push_back(badSeqFile); outputTypes["fasta"].push_back(badSeqFile); }
312 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
313 if (namefile != "") { readName(badNames); }
314 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
315 if (groupfile != "") { readGroup(badNames); }
316 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
317 if (taxfile != "") { readTax(badNames); }
318 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
320 m->mothurOutEndLine();
321 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
322 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
323 m->mothurOutEndLine();
324 m->mothurOutEndLine();
326 //set fasta file as new current fastafile
328 itTypes = outputTypes.find("fasta");
329 if (itTypes != outputTypes.end()) {
330 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
333 itTypes = outputTypes.find("name");
334 if (itTypes != outputTypes.end()) {
335 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
338 itTypes = outputTypes.find("group");
339 if (itTypes != outputTypes.end()) {
340 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
343 itTypes = outputTypes.find("accnos");
344 if (itTypes != outputTypes.end()) {
345 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
348 itTypes = outputTypes.find("taxonomy");
349 if (itTypes != outputTypes.end()) {
350 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTaxonomyFile(current); }
353 m->mothurOut("It took " + toString(time(NULL) - start) + " secs to screen " + toString(numFastaSeqs) + " sequences.");
354 m->mothurOutEndLine();
360 catch(exception& e) {
361 m->errorOut(e, "PcrSeqsCommand", "execute");
365 /**************************************************************************************************/
366 int PcrSeqsCommand::createProcesses(string filename, string goodFileName, string badFileName, set<string>& badSeqNames) {
369 vector<int> processIDS;
373 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
375 //loop through and create all the processes you want
376 while (process != processors) {
380 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
383 num = driverPcr(filename, goodFileName + toString(getpid()) + ".temp", badFileName + toString(getpid()) + ".temp", badSeqNames, lines[process]);
385 //pass numSeqs to parent
387 string tempFile = filename + toString(getpid()) + ".num.temp";
388 m->openOutputFile(tempFile, out);
389 out << num << '\t' << badSeqNames.size() << endl;
390 for (set<string>::iterator it = badSeqNames.begin(); it != badSeqNames.end(); it++) {
391 out << (*it) << endl;
397 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
398 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
403 num = driverPcr(filename, goodFileName, badFileName, badSeqNames, lines[0]);
405 //force parent to wait until all the processes are done
406 for (int i=0;i<processIDS.size();i++) {
407 int temp = processIDS[i];
411 for (int i = 0; i < processIDS.size(); i++) {
413 string tempFile = filename + toString(processIDS[i]) + ".num.temp";
414 m->openInputFile(tempFile, in);
415 int numBadNames = 0; string name = "";
416 if (!in.eof()) { int tempNum = 0; in >> tempNum >> numBadNames; num += tempNum; m->gobble(in); }
417 for (int j = 0; j < numBadNames; j++) {
418 in >> name; m->gobble(in);
419 badSeqNames.insert(name);
421 in.close(); m->mothurRemove(tempFile);
423 m->appendFiles((goodFileName + toString(processIDS[i]) + ".temp"), goodFileName);
424 m->mothurRemove((goodFileName + toString(processIDS[i]) + ".temp"));
426 m->appendFiles((badFileName + toString(processIDS[i]) + ".temp"), badFileName);
427 m->mothurRemove((badFileName + toString(processIDS[i]) + ".temp"));
431 //////////////////////////////////////////////////////////////////////////////////////////////////////
432 //Windows version shared memory, so be careful when passing variables through the sumScreenData struct.
433 //Above fork() will clone, so memory is separate, but that's not the case with windows,
434 //Taking advantage of shared memory to allow both threads to add info to badSeqNames.
435 //////////////////////////////////////////////////////////////////////////////////////////////////////
437 vector<pcrData*> pDataArray;
438 DWORD dwThreadIdArray[processors-1];
439 HANDLE hThreadArray[processors-1];
441 //Create processor worker threads.
442 for( int i=0; i<processors-1; i++ ){
444 string extension = "";
445 if (i!=0) {extension += toString(i) + ".temp"; processIDS.push_back(i); }
447 // Allocate memory for thread data.
448 pcrData* tempPcr = new pcrData(filename, goodFileName+extension, badFileName+extension, m, oligosfile, ecolifile, primers, revPrimer, nomatch, keepprimer, keepdots, start, end, length, lines[i].start, lines[i].end);
449 pDataArray.push_back(tempPcr);
451 //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
452 hThreadArray[i] = CreateThread(NULL, 0, MyPcrThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);
456 num = driverPcr(filename, (goodFileName+toString(processors-1)+".temp"), (badFileName+toString(processors-1)+".temp"),badSeqNames, lines[processors-1]);
457 processIDS.push_back(processors-1);
459 //Wait until all threads have terminated.
460 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
462 //Close all thread handles and free memory allocations.
463 for(int i=0; i < pDataArray.size(); i++){
464 num += pDataArray[i]->count;
465 for (set<string>::iterator it = pDataArray[i]->badSeqNames.begin(); it != pDataArray[i]->badSeqNames.end(); it++) { badSeqNames.insert(*it); }
466 CloseHandle(hThreadArray[i]);
467 delete pDataArray[i];
470 for (int i = 0; i < processIDS.size(); i++) {
471 m->appendFiles((goodFileName + toString(processIDS[i]) + ".temp"), goodFileName);
472 m->mothurRemove((goodFileName + toString(processIDS[i]) + ".temp"));
474 m->appendFiles((badFileName + toString(processIDS[i]) + ".temp"), badFileName);
475 m->mothurRemove((badFileName + toString(processIDS[i]) + ".temp"));
483 catch(exception& e) {
484 m->errorOut(e, "PcrSeqsCommand", "createProcesses");
489 //**********************************************************************************************************************
490 int PcrSeqsCommand::driverPcr(string filename, string goodFasta, string badFasta, set<string>& badSeqNames, linePair filePos){
493 m->openOutputFile(goodFasta, goodFile);
496 m->openOutputFile(badFasta, badFile);
499 m->openInputFile(filename, inFASTA);
501 inFASTA.seekg(filePos.start);
509 if (m->control_pressed) { break; }
511 Sequence currSeq(inFASTA); m->gobble(inFASTA);
513 string trashCode = "";
514 if (currSeq.getName() != "") {
517 if (oligosfile != "") {
518 map<int, int> mapAligned;
519 bool aligned = isAligned(currSeq.getAligned(), mapAligned);
522 if (primers.size() != 0) {
523 int primerStart = 0; int primerEnd = 0;
524 bool good = findForward(currSeq, primerStart, primerEnd);
526 if(!good){ if (nomatch == "reject") { goodSeq = false; } trashCode += "f"; }
531 if (keepdots) { currSeq.filterToPos(mapAligned[primerEnd]); }
532 else { currSeq.setAligned(currSeq.getAligned().substr(mapAligned[primerEnd])); }
535 if (keepdots) { currSeq.filterToPos(mapAligned[primerStart]); }
536 else { currSeq.setAligned(currSeq.getAligned().substr(mapAligned[primerStart])); }
539 if (!keepprimer) { currSeq.setAligned(currSeq.getUnaligned().substr(primerEnd)); }
540 else { currSeq.setAligned(currSeq.getUnaligned().substr(primerStart)); }
545 //process reverse primers
546 if (revPrimer.size() != 0) {
547 int primerStart = 0; int primerEnd = 0;
548 bool good = findReverse(currSeq, primerStart, primerEnd);
549 if(!good){ if (nomatch == "reject") { goodSeq = false; } trashCode += "r"; }
554 if (keepdots) { currSeq.filterFromPos(mapAligned[primerStart]); }
555 else { currSeq.setAligned(currSeq.getAligned().substr(0, mapAligned[primerStart])); }
558 if (keepdots) { currSeq.filterFromPos(mapAligned[primerEnd]); }
559 else { currSeq.setAligned(currSeq.getAligned().substr(0, mapAligned[primerEnd])); }
563 if (!keepprimer) { currSeq.setAligned(currSeq.getUnaligned().substr(0, primerStart)); }
564 else { currSeq.setAligned(currSeq.getUnaligned().substr(0, primerEnd)); }
568 }else if (ecolifile != "") {
569 //make sure the seqs are aligned
570 lengths.insert(currSeq.getAligned().length());
571 if (lengths.size() > 1) { m->mothurOut("[ERROR]: seqs are not aligned. When using start and end your sequences must be aligned.\n"); m->control_pressed = true; break; }
572 else if (currSeq.getAligned().length() != length) {
573 m->mothurOut("[ERROR]: seqs are not the same length as ecoli seq. When using ecoli option your sequences must be aligned and the same length as the ecoli sequence.\n"); m->control_pressed = true; break;
576 currSeq.filterToPos(start);
577 currSeq.filterFromPos(end);
579 string seqString = currSeq.getAligned().substr(0, end);
580 seqString = seqString.substr(start);
581 currSeq.setAligned(seqString);
584 }else{ //using start and end to trim
585 //make sure the seqs are aligned
586 lengths.insert(currSeq.getAligned().length());
587 if (lengths.size() > 1) { m->mothurOut("[ERROR]: seqs are not aligned. When using start and end your sequences must be aligned.\n"); m->control_pressed = true; break; }
590 if (end > currSeq.getAligned().length()) { m->mothurOut("[ERROR]: end is longer than your sequence length, aborting.\n"); m->control_pressed = true; break; }
592 if (keepdots) { currSeq.filterFromPos(end); }
594 string seqString = currSeq.getAligned().substr(0, end);
595 currSeq.setAligned(seqString);
600 if (keepdots) { currSeq.filterToPos(start); }
602 string seqString = currSeq.getAligned().substr(start);
603 currSeq.setAligned(seqString);
609 //trimming removed all bases
610 if (currSeq.getUnaligned() == "") { goodSeq = false; }
612 if(goodSeq == 1) { currSeq.printSequence(goodFile); }
614 badSeqNames.insert(currSeq.getName());
615 currSeq.setName(currSeq.getName() + '|' + trashCode);
616 currSeq.printSequence(badFile);
621 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
622 unsigned long long pos = inFASTA.tellg();
623 if ((pos == -1) || (pos >= filePos.end)) { break; }
625 if (inFASTA.eof()) { break; }
629 if((count) % 100 == 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine(); }
632 if((count) % 100 != 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine(); }
640 catch(exception& e) {
641 m->errorOut(e, "PcrSeqsCommand", "driverPcr");
645 //********************************************************************/
646 bool PcrSeqsCommand::findForward(Sequence& seq, int& primerStart, int& primerEnd){
649 string rawSequence = seq.getUnaligned();
651 for(int j=0;j<primers.size();j++){
652 string oligo = primers[j];
654 if(rawSequence.length() < oligo.length()) { break; }
657 int olength = oligo.length();
658 for (int j = 0; j < rawSequence.length()-olength; j++){
659 if (m->control_pressed) { primerStart = 0; primerEnd = 0; return false; }
660 string rawChunk = rawSequence.substr(j, olength);
661 if(compareDNASeq(oligo, rawChunk)) {
663 primerEnd = primerStart + olength;
670 primerStart = 0; primerEnd = 0;
674 catch(exception& e) {
675 m->errorOut(e, "TrimOligos", "stripForward");
679 //******************************************************************/
680 bool PcrSeqsCommand::findReverse(Sequence& seq, int& primerStart, int& primerEnd){
683 string rawSequence = seq.getUnaligned();
685 for(int i=0;i<revPrimer.size();i++){
686 string oligo = revPrimer[i];
687 if(rawSequence.length() < oligo.length()) { break; }
690 int olength = oligo.length();
691 for (int j = rawSequence.length()-olength; j >= 0; j--){
692 if (m->control_pressed) { primerStart = 0; primerEnd = 0; return false; }
693 string rawChunk = rawSequence.substr(j, olength);
695 if(compareDNASeq(oligo, rawChunk)) {
697 primerEnd = primerStart + olength;
704 primerStart = 0; primerEnd = 0;
707 catch(exception& e) {
708 m->errorOut(e, "PcrSeqsCommand", "findReverse");
712 //********************************************************************/
713 bool PcrSeqsCommand::isAligned(string seq, map<int, int>& aligned){
715 bool isAligned = false;
718 for (int i = 0; i < seq.length(); i++) {
719 if (!isalpha(seq[i])) { isAligned = true; }
720 else { aligned[countBases] = i; countBases++; } //maps location in unaligned -> location in aligned.
721 } //ie. the 3rd base may be at spot 10 in the alignment
722 //later when we trim we want to trim from spot 10.
725 catch(exception& e) {
726 m->errorOut(e, "PcrSeqsCommand", "isAligned");
730 //********************************************************************/
731 string PcrSeqsCommand::reverseOligo(string oligo){
735 for(int i=oligo.length()-1;i>=0;i--){
737 if(oligo[i] == 'A') { reverse += 'T'; }
738 else if(oligo[i] == 'T'){ reverse += 'A'; }
739 else if(oligo[i] == 'U'){ reverse += 'A'; }
741 else if(oligo[i] == 'G'){ reverse += 'C'; }
742 else if(oligo[i] == 'C'){ reverse += 'G'; }
744 else if(oligo[i] == 'R'){ reverse += 'Y'; }
745 else if(oligo[i] == 'Y'){ reverse += 'R'; }
747 else if(oligo[i] == 'M'){ reverse += 'K'; }
748 else if(oligo[i] == 'K'){ reverse += 'M'; }
750 else if(oligo[i] == 'W'){ reverse += 'W'; }
751 else if(oligo[i] == 'S'){ reverse += 'S'; }
753 else if(oligo[i] == 'B'){ reverse += 'V'; }
754 else if(oligo[i] == 'V'){ reverse += 'B'; }
756 else if(oligo[i] == 'D'){ reverse += 'H'; }
757 else if(oligo[i] == 'H'){ reverse += 'D'; }
759 else { reverse += 'N'; }
765 catch(exception& e) {
766 m->errorOut(e, "PcrSeqsCommand", "reverseOligo");
771 //***************************************************************************************************************
772 bool PcrSeqsCommand::readOligos(){
775 m->openInputFile(oligosfile, inOligos);
777 string type, oligo, group;
779 while(!inOligos.eof()){
783 if(type[0] == '#'){ //ignore
784 while (!inOligos.eof()) { char c = inOligos.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there
788 //make type case insensitive
789 for(int i=0;i<type.length();i++){ type[i] = toupper(type[i]); }
793 for(int i=0;i<oligo.length();i++){
794 oligo[i] = toupper(oligo[i]);
795 if(oligo[i] == 'U') { oligo[i] = 'T'; }
798 if(type == "FORWARD"){
799 // get rest of line in case there is a primer name
800 while (!inOligos.eof()) {
801 char c = inOligos.get();
802 if (c == 10 || c == 13){ break; }
803 else if (c == 32 || c == 9){;} //space or tab
805 primers.push_back(oligo);
806 }else if(type == "REVERSE"){
807 string oligoRC = reverseOligo(oligo);
808 revPrimer.push_back(oligoRC);
809 //cout << "oligo = " << oligo << " reverse = " << oligoRC << endl;
810 }else if(type == "BARCODE"){
812 }else if((type == "LINKER")||(type == "SPACER")) {;}
813 else{ m->mothurOut(type + " is not recognized as a valid type. Choices are forward, reverse, linker, spacer and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); m->control_pressed = true; }
819 if ((primers.size() == 0) && (revPrimer.size() == 0)) {
820 m->mothurOut("[ERROR]: your oligos file does not contain valid primers or reverse primers. Please correct."); m->mothurOutEndLine();
821 m->control_pressed = true;
827 }catch(exception& e) {
828 m->errorOut(e, "PcrSeqsCommand", "readOligos");
832 //***************************************************************************************************************
833 bool PcrSeqsCommand::readEcoli(){
836 m->openInputFile(ecolifile, in);
841 length = ecoli.getAligned().length();
842 start = ecoli.getStartPos();
843 end = ecoli.getEndPos();
844 }else { in.close(); m->control_pressed = true; return false; }
849 catch(exception& e) {
850 m->errorOut(e, "PcrSeqsCommand", "readEcoli");
855 //***************************************************************************************************************
856 int PcrSeqsCommand::writeAccnos(set<string> badNames){
858 string thisOutputDir = outputDir;
859 if (outputDir == "") { thisOutputDir += m->hasPath(fastafile); }
860 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + "bad.accnos";
861 outputNames.push_back(outputFileName); outputTypes["accnos"].push_back(outputFileName);
864 m->openOutputFile(outputFileName, out);
866 for (set<string>::iterator it = badNames.begin(); it != badNames.end(); it++) {
867 if (m->control_pressed) { break; }
868 out << (*it) << endl;
874 catch(exception& e) {
875 m->errorOut(e, "PcrSeqsCommand", "writeAccnos");
880 //******************************************************************/
881 bool PcrSeqsCommand::compareDNASeq(string oligo, string seq){
884 int length = oligo.length();
886 for(int i=0;i<length;i++){
888 if(oligo[i] != seq[i]){
889 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C') { success = 0; }
890 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N')) { success = 0; }
891 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G')) { success = 0; }
892 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T')) { success = 0; }
893 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A')) { success = 0; }
894 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G')) { success = 0; }
895 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A')) { success = 0; }
896 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G')) { success = 0; }
897 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G')) { success = 0; }
898 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G')) { success = 0; }
899 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C')) { success = 0; }
900 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G')) { success = 0; }
902 if(success == 0) { break; }
911 catch(exception& e) {
912 m->errorOut(e, "PcrSeqsCommand", "compareDNASeq");
917 //***************************************************************************************************************
918 int PcrSeqsCommand::readName(set<string>& names){
920 string thisOutputDir = outputDir;
921 if (outputDir == "") { thisOutputDir += m->hasPath(namefile); }
922 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(namefile)) + "pcr" + m->getExtension(namefile);
925 m->openOutputFile(outputFileName, out);
928 m->openInputFile(namefile, in);
929 string name, firstCol, secondCol;
931 bool wroteSomething = false;
932 int removedCount = 0;
935 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
937 in >> firstCol; m->gobble(in);
940 string savedSecond = secondCol;
941 vector<string> parsedNames;
942 m->splitAtComma(secondCol, parsedNames);
944 vector<string> validSecond; validSecond.clear();
945 for (int i = 0; i < parsedNames.size(); i++) {
946 if (names.count(parsedNames[i]) == 0) {
947 validSecond.push_back(parsedNames[i]);
951 if (validSecond.size() != parsedNames.size()) { //we want to get rid of someone, so get rid of everyone
952 for (int i = 0; i < parsedNames.size(); i++) { names.insert(parsedNames[i]); }
953 removedCount += parsedNames.size();
955 out << firstCol << '\t' << savedSecond << endl;
956 wroteSomething = true;
963 if (wroteSomething == false) { m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine(); }
964 outputTypes["name"].push_back(outputFileName); outputNames.push_back(outputFileName);
966 m->mothurOut("Removed " + toString(removedCount) + " sequences from your name file."); m->mothurOutEndLine();
970 catch(exception& e) {
971 m->errorOut(e, "PcrSeqsCommand", "readName");
975 //**********************************************************************************************************************
976 int PcrSeqsCommand::readGroup(set<string> names){
978 string thisOutputDir = outputDir;
979 if (outputDir == "") { thisOutputDir += m->hasPath(groupfile); }
980 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(groupfile)) + "pcr" + m->getExtension(groupfile);
983 m->openOutputFile(outputFileName, out);
986 m->openInputFile(groupfile, in);
989 bool wroteSomething = false;
990 int removedCount = 0;
993 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
995 in >> name; //read from first column
996 in >> group; //read from second column
998 //if this name is in the accnos file
999 if (names.count(name) == 0) {
1000 wroteSomething = true;
1001 out << name << '\t' << group << endl;
1002 }else { removedCount++; }
1009 if (wroteSomething == false) { m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine(); }
1010 outputTypes["group"].push_back(outputFileName); outputNames.push_back(outputFileName);
1012 m->mothurOut("Removed " + toString(removedCount) + " sequences from your group file."); m->mothurOutEndLine();
1017 catch(exception& e) {
1018 m->errorOut(e, "PcrSeqsCommand", "readGroup");
1022 //**********************************************************************************************************************
1023 int PcrSeqsCommand::readTax(set<string> names){
1025 string thisOutputDir = outputDir;
1026 if (outputDir == "") { thisOutputDir += m->hasPath(taxfile); }
1027 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(taxfile)) + "pcr" + m->getExtension(taxfile);
1029 m->openOutputFile(outputFileName, out);
1032 m->openInputFile(taxfile, in);
1035 bool wroteSomething = false;
1036 int removedCount = 0;
1039 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
1041 in >> name; //read from first column
1042 in >> tax; //read from second column
1044 //if this name is in the accnos file
1045 if (names.count(name) == 0) {
1046 wroteSomething = true;
1047 out << name << '\t' << tax << endl;
1048 }else { removedCount++; }
1055 if (wroteSomething == false) { m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine(); }
1056 outputTypes["taxonomy"].push_back(outputFileName); outputNames.push_back(outputFileName);
1058 m->mothurOut("Removed " + toString(removedCount) + " sequences from your taxonomy file."); m->mothurOutEndLine();
1062 catch(exception& e) {
1063 m->errorOut(e, "PcrSeqsCommand", "readTax");
1067 /**************************************************************************************/