1 #ifndef Mothur_pcrseqscommand_h
2 #define Mothur_pcrseqscommand_h
8 // Created by Sarah Westcott on 3/14/12.
9 // Copyright (c) 2012 Schloss Lab. All rights reserved.
13 #include "command.hpp"
14 #include "sequence.hpp"
15 #include "trimoligos.h"
16 #include "alignment.hpp"
17 #include "needlemanoverlap.hpp"
18 #include "counttable.h"
20 class PcrSeqsCommand : public Command {
22 PcrSeqsCommand(string);
26 vector<string> setParameters();
27 string getCommandName() { return "pcr.seqs"; }
28 string getCommandCategory() { return "Sequence Processing"; }
30 string getHelpString();
31 string getOutputPattern(string);
32 string getCitation() { return "http://www.mothur.org/wiki/Pcr.seqs"; }
33 string getDescription() { return "pcr.seqs"; }
36 void help() { m->mothurOut(getHelpString()); }
41 unsigned long long start;
42 unsigned long long end;
43 linePair(unsigned long long i, unsigned long long j) : start(i), end(j) {}
47 vector<linePair> lines;
48 bool getOligos(vector<vector<string> >&, vector<vector<string> >&, vector<vector<string> >&);
49 bool abort, keepprimer, keepdots;
50 string fastafile, oligosfile, taxfile, groupfile, namefile, countfile, ecolifile, outputDir, nomatch;
51 int start, end, processors, length, pdiffs;
53 vector<string> revPrimer, outputNames;
54 map<string, int> primers;
56 int writeAccnos(set<string>);
57 int readName(set<string>&);
58 int readGroup(set<string>);
59 int readTax(set<string>);
60 int readCount(set<string>);
63 int driverPcr(string, string, string, set<string>&, linePair);
64 int createProcesses(string, string, string, set<string>&);
65 bool isAligned(string, map<int, int>&);
66 string reverseOligo(string);
69 /**************************************************************************************************/
70 //custom data structure for threads to use.
71 // This is passed by void pointer so it can be any data type
72 // that can be passed using a single void pointer (LPVOID).
75 string goodFasta, badFasta, oligosfile, ecolifile, nomatch;
76 unsigned long long fstart;
77 unsigned long long fend;
78 int count, start, end, length, pdiffs;
80 map<string, int> primers;
81 vector<string> revPrimer;
82 set<string> badSeqNames;
83 bool keepprimer, keepdots;
87 pcrData(string f, string gf, string bfn, MothurOut* mout, string ol, string ec, map<string, int> pr, vector<string> rpr, string nm, bool kp, bool kd, int st, int en, int l, int pd, unsigned long long fst, unsigned long long fen) {
108 /**************************************************************************************************/
109 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
111 static DWORD WINAPI MyPcrThreadFunction(LPVOID lpParam){
113 pDataArray = (pcrData*)lpParam;
117 pDataArray->m->openOutputFile(pDataArray->goodFasta, goodFile);
120 pDataArray->m->openOutputFile(pDataArray->badFasta, badFile);
123 pDataArray->m->openInputFile(pDataArray->filename, inFASTA);
125 //print header if you are process 0
126 if ((pDataArray->fstart == 0) || (pDataArray->fstart == 1)) {
128 }else { //this accounts for the difference in line endings.
129 inFASTA.seekg(pDataArray->fstart-1); pDataArray->m->gobble(inFASTA);
133 //pdiffs, bdiffs, primers, barcodes, revPrimers
134 map<string, int> faked;
135 TrimOligos trim(pDataArray->pdiffs, 0, pDataArray->primers, faked, pDataArray->revPrimer);
137 for(int i = 0; i < pDataArray->fend; i++){ //end is the number of sequences to process
139 if (pDataArray->m->control_pressed) { break; }
141 Sequence currSeq(inFASTA); pDataArray->m->gobble(inFASTA);
143 string trashCode = "";
144 if (currSeq.getName() != "") {
147 if (pDataArray->oligosfile != "") {
148 map<int, int> mapAligned;
149 //bool aligned = isAligned(currSeq.getAligned(), mapAligned);
150 ///////////////////////////////////////////////////////////////
151 bool aligned = false;
152 string seq = currSeq.getAligned();
154 for (int k = 0; k < seq.length(); k++) {
155 if (!isalpha(seq[k])) { aligned = true; }
156 else { mapAligned[countBases] = k; countBases++; } //maps location in unaligned -> location in aligned.
157 } //ie. the 3rd base may be at spot 10 in the alignment
158 //later when we trim we want to trim from spot 10.
159 ///////////////////////////////////////////////////////////////
162 if (pDataArray->primers.size() != 0) {
163 int primerStart = 0; int primerEnd = 0;
164 bool good = trim.findForward(currSeq, primerStart, primerEnd);
166 if(!good){ if (pDataArray->nomatch == "reject") { goodSeq = false; } trashCode += "f"; }
170 if (!pDataArray->keepprimer) {
171 if (pDataArray->keepdots) { currSeq.filterToPos(mapAligned[primerEnd]); }
172 else { currSeq.setAligned(currSeq.getAligned().substr(mapAligned[primerEnd])); }
175 if (pDataArray->keepdots) { currSeq.filterToPos(mapAligned[primerStart]); }
176 else { currSeq.setAligned(currSeq.getAligned().substr(mapAligned[primerStart])); }
178 ///////////////////////////////////////////////////////////////
180 string seq = currSeq.getAligned();
182 for (int k = 0; k < seq.length(); k++) {
183 if (!isalpha(seq[k])) { ; }
184 else { mapAligned[countBases] = k; countBases++; }
186 ///////////////////////////////////////////////////////////////
188 if (!pDataArray->keepprimer) { currSeq.setAligned(currSeq.getUnaligned().substr(primerEnd)); }
189 else { currSeq.setAligned(currSeq.getUnaligned().substr(primerStart)); }
194 //process reverse primers
195 if (pDataArray->revPrimer.size() != 0) {
196 int primerStart = 0; int primerEnd = 0;
197 bool good = trim.findReverse(currSeq, primerStart, primerEnd);
199 if(!good){ if (pDataArray->nomatch == "reject") { goodSeq = false; } trashCode += "r"; }
203 if (!pDataArray->keepprimer) {
204 if (pDataArray->keepdots) { currSeq.filterFromPos(mapAligned[primerStart]); }
205 else { currSeq.setAligned(currSeq.getAligned().substr(0, mapAligned[primerStart])); }
208 if (pDataArray->keepdots) { currSeq.filterFromPos(mapAligned[primerEnd]); }
209 else { currSeq.setAligned(currSeq.getAligned().substr(0, mapAligned[primerEnd])); }
212 if (!pDataArray->keepprimer) { currSeq.setAligned(currSeq.getUnaligned().substr(0, primerStart)); }
213 else { currSeq.setAligned(currSeq.getUnaligned().substr(0, primerEnd)); }
217 }else if (pDataArray->ecolifile != "") {
218 //make sure the seqs are aligned
219 lengths.insert(currSeq.getAligned().length());
220 if (lengths.size() > 1) { pDataArray->m->mothurOut("[ERROR]: seqs are not aligned. When using start and end your sequences must be aligned.\n"); pDataArray->m->control_pressed = true; break; }
221 else if (currSeq.getAligned().length() != pDataArray->length) {
222 pDataArray->m->mothurOut("[ERROR]: seqs are not the same length as ecoli seq. When using ecoli option your sequences must be aligned and the same length as the ecoli sequence.\n"); pDataArray->m->control_pressed = true; break;
224 if (pDataArray->keepdots) {
225 currSeq.filterToPos(pDataArray->start);
226 currSeq.filterFromPos(pDataArray->end);
228 string seqString = currSeq.getAligned().substr(0, pDataArray->end);
229 seqString = seqString.substr(pDataArray->start);
230 currSeq.setAligned(seqString);
233 }else{ //using start and end to trim
234 //make sure the seqs are aligned
235 lengths.insert(currSeq.getAligned().length());
236 if (lengths.size() > 1) { pDataArray->m->mothurOut("[ERROR]: seqs are not aligned. When using start and end your sequences must be aligned.\n"); pDataArray->m->control_pressed = true; break; }
238 if (pDataArray->end != -1) {
239 if (pDataArray->end > currSeq.getAligned().length()) { pDataArray->m->mothurOut("[ERROR]: end is longer than your sequence length, aborting.\n"); pDataArray->m->control_pressed = true; break; }
241 if (pDataArray->keepdots) { currSeq.filterFromPos(pDataArray->end); }
243 string seqString = currSeq.getAligned().substr(0, pDataArray->end);
244 currSeq.setAligned(seqString);
248 if (pDataArray->start != -1) {
249 if (pDataArray->keepdots) { currSeq.filterToPos(pDataArray->start); }
251 string seqString = currSeq.getAligned().substr(pDataArray->start);
252 currSeq.setAligned(seqString);
259 if(goodSeq == 1) { currSeq.printSequence(goodFile); }
261 pDataArray->badSeqNames.insert(currSeq.getName());
262 currSeq.setName(currSeq.getName() + '|' + trashCode);
263 currSeq.printSequence(badFile);
268 if((i+1) % 100 == 0){ pDataArray->m->mothurOut("Processing sequence: " + toString(i+1)); pDataArray->m->mothurOutEndLine(); }
271 if((pDataArray->count) % 100 != 0){ pDataArray->m->mothurOut("Thread Processing sequence: " + toString(pDataArray->count)); pDataArray->m->mothurOutEndLine(); }
280 catch(exception& e) {
281 pDataArray->m->errorOut(e, "PcrSeqsCommand", "MyPcrThreadFunction");
288 /**************************************************************************************************/