1 #ifndef Mothur_pcrseqscommand_h
2 #define Mothur_pcrseqscommand_h
8 // Created by Sarah Westcott on 3/14/12.
9 // Copyright (c) 2012 Schloss Lab. All rights reserved.
13 #include "command.hpp"
14 #include "sequence.hpp"
15 #include "trimoligos.h"
16 #include "alignment.hpp"
17 #include "needlemanoverlap.hpp"
18 #include "counttable.h"
21 class PcrSeqsCommand : public Command {
23 PcrSeqsCommand(string);
27 vector<string> setParameters();
28 string getCommandName() { return "pcr.seqs"; }
29 string getCommandCategory() { return "Sequence Processing"; }
31 string getHelpString();
32 string getOutputPattern(string);
33 string getCitation() { return "http://www.mothur.org/wiki/Pcr.seqs"; }
34 string getDescription() { return "pcr.seqs"; }
37 void help() { m->mothurOut(getHelpString()); }
42 unsigned long long start;
43 unsigned long long end;
44 linePair(unsigned long long i, unsigned long long j) : start(i), end(j) {}
48 vector<linePair> lines;
49 bool abort, keepprimer, keepdots, fileAligned, pairedOligos;
50 string fastafile, oligosfile, taxfile, groupfile, namefile, countfile, ecolifile, outputDir, nomatch;
51 int start, end, processors, length, pdiffs, numFPrimers, numRPrimers;
54 vector<string> outputNames;
56 int writeAccnos(set<string>);
57 int readName(set<string>&);
58 int readGroup(set<string>);
59 int readTax(set<string>);
60 int readCount(set<string>);
63 int driverPcr(string, string, string, string, set<string>&, linePair, int&, bool&);
64 int createProcesses(string, string, string, set<string>&);
65 bool isAligned(string, map<int, int>&);
66 int adjustDots(string, string, int, int);
70 /**************************************************************************************************/
71 //custom data structure for threads to use.
72 // This is passed by void pointer so it can be any data type
73 // that can be passed using a single void pointer (LPVOID).
76 string goodFasta, badFasta, oligosfile, ecolifile, nomatch, locationsName;
77 unsigned long long fstart;
78 unsigned long long fend;
79 int count, start, end, length, pdiffs, pstart, pend;
81 set<string> badSeqNames;
82 bool keepprimer, keepdots, fileAligned, adjustNeeded;
85 pcrData(string f, string gf, string bfn, string loc, MothurOut* mout, string ol, string ec, string nm, bool kp, bool kd, int st, int en, int l, int pd, unsigned long long fst, unsigned long long fen) {
104 adjustNeeded = false;
109 /**************************************************************************************************/
110 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
112 static DWORD WINAPI MyPcrThreadFunction(LPVOID lpParam){
114 pDataArray = (pcrData*)lpParam;
118 pDataArray->m->openOutputFile(pDataArray->goodFasta, goodFile);
121 pDataArray->m->openOutputFile(pDataArray->badFasta, badFile);
123 ofstream locationsFile;
124 pDataArray->m->openOutputFile(pDataArray->locationsName, locationsFile);
127 pDataArray->m->openInputFile(pDataArray->filename, inFASTA);
129 //print header if you are process 0
130 if ((pDataArray->fstart == 0) || (pDataArray->fstart == 1)) {
132 }else { //this accounts for the difference in line endings.
133 inFASTA.seekg(pDataArray->fstart-1); pDataArray->m->gobble(inFASTA);
137 //pdiffs, bdiffs, primers, barcodes, revPrimers
138 map<string, int> faked;
139 set<int> locations; //locations = beginning locations
141 Oligos oligos(pDataArray->oligosfile);
142 int numFPrimers, numRPrimers;
143 map<string, int> primers;
144 map<string, int> barcodes; //not used
145 vector<string> revPrimer;
146 if (oligos.hasPairedBarcodes()) {
147 numFPrimers = oligos.getPairedPrimers().size();
148 map<int, oligosPair> primerPairs = oligos.getPairedPrimers();
149 for (map<int, oligosPair>::iterator it = primerPairs.begin(); it != primerPairs.end(); it++) {
150 primers[(it->second).forward] = it->first;
151 revPrimer.push_back((it->second).reverse);
154 numFPrimers = oligos.getPrimers().size();
155 primers = oligos.getPrimers();
156 revPrimer = oligos.getReversePrimers();
158 numRPrimers = oligos.getReversePrimers().size();
160 TrimOligos trim(pDataArray->pdiffs, 0, primers, barcodes, revPrimer);
162 for(int i = 0; i < pDataArray->fend; i++){ //end is the number of sequences to process
164 if (pDataArray->m->control_pressed) { break; }
166 Sequence currSeq(inFASTA); pDataArray->m->gobble(inFASTA);
168 if (pDataArray->fileAligned) { //assume aligned until proven otherwise
169 lengths.insert(currSeq.getAligned().length());
170 if (lengths.size() > 1) { pDataArray->fileAligned = false; }
173 string trashCode = "";
174 string locationsString = "";
177 if (currSeq.getName() != "") {
180 if (pDataArray->oligosfile != "") {
181 map<int, int> mapAligned;
182 //bool aligned = isAligned(currSeq.getAligned(), mapAligned);
183 ///////////////////////////////////////////////////////////////
184 bool aligned = false;
185 string seq = currSeq.getAligned();
187 for (int k = 0; k < seq.length(); k++) {
188 if (!isalpha(seq[k])) { aligned = true; }
189 else { mapAligned[countBases] = k; countBases++; } //maps location in unaligned -> location in aligned.
190 } //ie. the 3rd base may be at spot 10 in the alignment
191 //later when we trim we want to trim from spot 10.
192 ///////////////////////////////////////////////////////////////
195 if (numFPrimers != 0) {
196 int primerStart = 0; int primerEnd = 0;
197 bool good = trim.findForward(currSeq, primerStart, primerEnd);
199 if(!good){ if (pDataArray->nomatch == "reject") { goodSeq = false; } trashCode += "f"; }
203 if (!pDataArray->keepprimer) {
204 if (pDataArray->keepdots) { currSeq.filterToPos(mapAligned[primerEnd-1]+1); }
206 currSeq.setAligned(currSeq.getAligned().substr(mapAligned[primerEnd-1]+1));
207 if (pDataArray->fileAligned) {
208 thisPStart = mapAligned[primerEnd-1]+1; //locations.insert(mapAligned[primerEnd-1]+1);
209 locationsString += currSeq.getName() + "\t" + toString(mapAligned[primerEnd-1]+1) + "\n";
214 if (pDataArray->keepdots) { currSeq.filterToPos(mapAligned[primerStart]); }
216 currSeq.setAligned(currSeq.getAligned().substr(mapAligned[primerStart]));
217 if (pDataArray->fileAligned) {
218 thisPStart = mapAligned[primerStart]; //locations.insert(mapAligned[primerStart]);
219 locationsString += currSeq.getName() + "\t" + toString(mapAligned[primerStart]) + "\n";
223 ///////////////////////////////////////////////////////////////
225 string seq = currSeq.getAligned();
227 for (int k = 0; k < seq.length(); k++) {
228 if (!isalpha(seq[k])) { ; }
229 else { mapAligned[countBases] = k; countBases++; }
231 ///////////////////////////////////////////////////////////////
233 if (!pDataArray->keepprimer) { currSeq.setAligned(currSeq.getUnaligned().substr(primerEnd)); }
234 else { currSeq.setAligned(currSeq.getUnaligned().substr(primerStart)); }
239 //process reverse primers
240 if (numRPrimers != 0) {
241 int primerStart = 0; int primerEnd = 0;
242 bool good = trim.findReverse(currSeq, primerStart, primerEnd);
244 if(!good){ if (pDataArray->nomatch == "reject") { goodSeq = false; } trashCode += "r"; }
248 if (!pDataArray->keepprimer) {
249 if (pDataArray->keepdots) { currSeq.filterFromPos(mapAligned[primerStart]); }
251 currSeq.setAligned(currSeq.getAligned().substr(0, mapAligned[primerStart]));
252 if (pDataArray->fileAligned) {
253 thisPEnd = mapAligned[primerStart]; //locations.insert(mapAligned[primerStart]);
254 locationsString += currSeq.getName() + "\t" + toString(mapAligned[primerStart]) + "\n";
260 if (pDataArray->keepdots) { currSeq.filterFromPos(mapAligned[primerEnd-1]+1); }
262 currSeq.setAligned(currSeq.getAligned().substr(0, mapAligned[primerEnd-1]+1));
263 if (pDataArray->fileAligned) {
264 thisPEnd = mapAligned[primerEnd-1]+1; //locations.insert(mapAligned[primerEnd-1]+1);
265 locationsString += currSeq.getName() + "\t" + toString(mapAligned[primerEnd-1]+1) + "\n";
271 if (!pDataArray->keepprimer) { currSeq.setAligned(currSeq.getUnaligned().substr(0, primerStart)); }
272 else { currSeq.setAligned(currSeq.getUnaligned().substr(0, primerEnd)); }
276 }else if (pDataArray->ecolifile != "") {
277 //make sure the seqs are aligned
278 if (!pDataArray->fileAligned) { pDataArray->m->mothurOut("[ERROR]: seqs are not aligned. When using start and end your sequences must be aligned.\n"); pDataArray->m->control_pressed = true; break; }
279 else if (currSeq.getAligned().length() != pDataArray->length) {
280 pDataArray->m->mothurOut("[ERROR]: seqs are not the same length as ecoli seq. When using ecoli option your sequences must be aligned and the same length as the ecoli sequence.\n"); pDataArray->m->control_pressed = true; break;
282 if (pDataArray->keepdots) {
283 currSeq.filterToPos(pDataArray->start);
284 currSeq.filterFromPos(pDataArray->end);
286 string seqString = currSeq.getAligned().substr(0, pDataArray->end);
287 seqString = seqString.substr(pDataArray->start);
288 currSeq.setAligned(seqString);
291 }else{ //using start and end to trim
292 //make sure the seqs are aligned
293 if (!pDataArray->fileAligned) { pDataArray->m->mothurOut("[ERROR]: seqs are not aligned. When using start and end your sequences must be aligned.\n"); pDataArray->m->control_pressed = true; break; }
295 if (pDataArray->end != -1) {
296 if (pDataArray->end > currSeq.getAligned().length()) { pDataArray->m->mothurOut("[ERROR]: end is longer than your sequence length, aborting.\n"); pDataArray->m->control_pressed = true; break; }
298 if (pDataArray->keepdots) { currSeq.filterFromPos(pDataArray->end); }
300 string seqString = currSeq.getAligned().substr(0, pDataArray->end);
301 currSeq.setAligned(seqString);
305 if (pDataArray->start != -1) {
306 if (pDataArray->keepdots) { currSeq.filterToPos(pDataArray->start); }
308 string seqString = currSeq.getAligned().substr(pDataArray->start);
309 currSeq.setAligned(seqString);
317 currSeq.printSequence(goodFile);
318 if (locationsString != "") { locationsFile << locationsString; }
319 if (thisPStart != -1) { locations.insert(thisPStart); }
322 pDataArray->badSeqNames.insert(currSeq.getName());
323 currSeq.setName(currSeq.getName() + '|' + trashCode);
324 currSeq.printSequence(badFile);
329 if((i+1) % 100 == 0){ pDataArray->m->mothurOutJustToScreen("Processing sequence: " + toString(i+1)+"\n"); }
332 if((pDataArray->count) % 100 != 0){ pDataArray->m->mothurOutJustToScreen("Thread Processing sequence: " + toString(pDataArray->count)+"\n"); }
337 locationsFile.close();
339 if (pDataArray->m->debug) { pDataArray->m->mothurOut("[DEBUG]: fileAligned = " + toString(pDataArray->fileAligned) +'\n'); }
341 if (pDataArray->fileAligned && !pDataArray->keepdots) { //print out smallest start value and largest end value
342 if (locations.size() > 1) { pDataArray->adjustNeeded = true; }
343 if (numFPrimers != 0) { set<int>::iterator it = locations.begin(); pDataArray->pstart = *it; }
348 catch(exception& e) {
349 pDataArray->m->errorOut(e, "PcrSeqsCommand", "MyPcrThreadFunction");
356 /**************************************************************************************************/