]> git.donarmstrong.com Git - mothur.git/blob - pcrseqscommand.h
update .gitignore
[mothur.git] / pcrseqscommand.h
1 #ifndef Mothur_pcrseqscommand_h
2 #define Mothur_pcrseqscommand_h
3
4 //
5 //  pcrseqscommand.h
6 //  Mothur
7 //
8 //  Created by Sarah Westcott on 3/14/12.
9 //  Copyright (c) 2012 Schloss Lab. All rights reserved.
10 //
11
12
13 #include "command.hpp"
14 #include "sequence.hpp"
15 #include "trimoligos.h"
16 #include "alignment.hpp"
17 #include "needlemanoverlap.hpp"
18 #include "counttable.h"
19 #include "oligos.h"
20
21 class PcrSeqsCommand : public Command {
22 public:
23         PcrSeqsCommand(string);
24         PcrSeqsCommand();
25         ~PcrSeqsCommand(){}
26         
27         vector<string> setParameters();
28         string getCommandName()                 { return "pcr.seqs";    }
29         string getCommandCategory()             { return "Sequence Processing";         }
30         
31         string getHelpString(); 
32     string getOutputPattern(string);    
33         string getCitation() { return "http://www.mothur.org/wiki/Pcr.seqs"; }
34         string getDescription()         { return "pcr.seqs"; }
35     
36         int execute(); 
37         void help() { m->mothurOut(getHelpString()); }  
38         
39 private:
40     
41     struct linePair {
42         unsigned long long start;
43         unsigned long long end;
44         linePair(unsigned long long i, unsigned long long j) : start(i), end(j) {}
45         linePair() {}
46     };
47     
48     vector<linePair> lines;
49     bool abort, keepprimer, keepdots, fileAligned, pairedOligos;
50         string fastafile, oligosfile, taxfile, groupfile, namefile, countfile, ecolifile, outputDir, nomatch;
51         int start, end, processors, length, pdiffs, numFPrimers, numRPrimers;
52     Oligos oligos;
53         
54     vector<string> outputNames;
55     
56     int writeAccnos(set<string>);
57     int readName(set<string>&);
58     int readGroup(set<string>);
59     int readTax(set<string>);
60     int readCount(set<string>);
61     int readOligos();
62     bool readEcoli();
63         int driverPcr(string, string, string, string, set<string>&, linePair, int&, bool&);
64         int createProcesses(string, string, string, set<string>&);
65     bool isAligned(string, map<int, int>&);
66     int adjustDots(string, string, int, int);
67     
68 };
69
70 /**************************************************************************************************/
71 //custom data structure for threads to use.
72 // This is passed by void pointer so it can be any data type
73 // that can be passed using a single void pointer (LPVOID).
74 struct pcrData {
75         string filename; 
76     string goodFasta, badFasta, oligosfile, ecolifile, nomatch, locationsName;
77         unsigned long long fstart;
78         unsigned long long fend;
79         int count, start, end, length, pdiffs, pstart, pend;
80         MothurOut* m;
81     set<string> badSeqNames;
82     bool keepprimer, keepdots, fileAligned, adjustNeeded;
83         
84         pcrData(){}
85         pcrData(string f, string gf, string bfn, string loc, MothurOut* mout, string ol, string ec, string nm, bool kp, bool kd, int st, int en, int l, int pd, unsigned long long fst, unsigned long long fen) {
86                 filename = f;
87         goodFasta = gf;
88         badFasta = bfn;
89                 m = mout;
90         oligosfile = ol;
91         ecolifile = ec;
92         nomatch = nm;
93         keepprimer = kp;
94         keepdots = kd;
95         end = en;
96                 start = st;
97         length = l;
98                 fstart = fst;
99         fend = fen;
100         pdiffs = pd;
101         locationsName = loc;
102                 count = 0;
103         fileAligned = true;
104         adjustNeeded = false;
105         pstart = -1;
106         pend = -1;
107         }
108 };
109 /**************************************************************************************************/
110 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
111 #else
112 static DWORD WINAPI MyPcrThreadFunction(LPVOID lpParam){ 
113         pcrData* pDataArray;
114         pDataArray = (pcrData*)lpParam;
115         
116         try {
117         ofstream goodFile;
118                 pDataArray->m->openOutputFile(pDataArray->goodFasta, goodFile);
119         
120         ofstream badFile;
121                 pDataArray->m->openOutputFile(pDataArray->badFasta, badFile);
122         
123         ofstream locationsFile;
124                 pDataArray->m->openOutputFile(pDataArray->locationsName, locationsFile);
125                 
126                 ifstream inFASTA;
127                 pDataArray->m->openInputFile(pDataArray->filename, inFASTA);
128         
129                 //print header if you are process 0
130                 if ((pDataArray->fstart == 0) || (pDataArray->fstart == 1)) {
131                         inFASTA.seekg(0);
132                 }else { //this accounts for the difference in line endings. 
133                         inFASTA.seekg(pDataArray->fstart-1); pDataArray->m->gobble(inFASTA); 
134                 }
135         
136         set<int> lengths;
137         //pdiffs, bdiffs, primers, barcodes, revPrimers
138         map<string, int> faked;
139         set<int> locations; //locations = beginning locations
140         
141         Oligos oligos(pDataArray->oligosfile);
142         int numFPrimers, numRPrimers;
143         map<string, int> primers;
144         map<string, int> barcodes; //not used
145         vector<string> revPrimer;
146         if (oligos.hasPairedBarcodes()) {
147             numFPrimers = oligos.getPairedPrimers().size();
148             map<int, oligosPair> primerPairs = oligos.getPairedPrimers();
149             for (map<int, oligosPair>::iterator it = primerPairs.begin(); it != primerPairs.end(); it++) {
150                 primers[(it->second).forward] = it->first;
151                 revPrimer.push_back((it->second).reverse);
152             }
153         }else {
154             numFPrimers = oligos.getPrimers().size();
155             primers = oligos.getPrimers();
156             revPrimer = oligos.getReversePrimers();
157         }
158         numRPrimers = oligos.getReversePrimers().size();
159         
160         TrimOligos trim(pDataArray->pdiffs, 0, primers, barcodes, revPrimer);
161                 
162                 for(int i = 0; i < pDataArray->fend; i++){ //end is the number of sequences to process
163             pDataArray->count++;
164                         if (pDataArray->m->control_pressed) {  break; }
165                         
166                         Sequence currSeq(inFASTA); pDataArray->m->gobble(inFASTA);
167             
168             if (pDataArray->fileAligned) { //assume aligned until proven otherwise
169                 lengths.insert(currSeq.getAligned().length());
170                 if (lengths.size() > 1) { pDataArray->fileAligned = false; }
171             }
172             
173             string trashCode = "";
174             string locationsString = "";
175             int thisPStart = -1;
176             int thisPEnd = -1;
177                         if (currSeq.getName() != "") {
178                 
179                 bool goodSeq = true;
180                 if (pDataArray->oligosfile != "") {
181                     map<int, int> mapAligned;
182                     //bool aligned = isAligned(currSeq.getAligned(), mapAligned);
183                     ///////////////////////////////////////////////////////////////
184                     bool aligned = false;
185                     string seq = currSeq.getAligned(); 
186                     int countBases = 0;
187                     for (int k = 0; k < seq.length(); k++) {
188                         if (!isalpha(seq[k])) { aligned = true; }
189                         else { mapAligned[countBases] = k; countBases++; } //maps location in unaligned -> location in aligned.
190                     }                                                   //ie. the 3rd base may be at spot 10 in the alignment
191                                                                         //later when we trim we want to trim from spot 10.
192                     ///////////////////////////////////////////////////////////////
193                     
194                     //process primers
195                     if (numFPrimers != 0) {
196                         int primerStart = 0; int primerEnd = 0;
197                         bool good = trim.findForward(currSeq, primerStart, primerEnd);
198                         
199                         if(!good){      if (pDataArray->nomatch == "reject") { goodSeq = false; } trashCode += "f";     }
200                         else{
201                             //are you aligned
202                             if (aligned) { 
203                                 if (!pDataArray->keepprimer)    {  
204                                     if (pDataArray->keepdots)   { currSeq.filterToPos(mapAligned[primerEnd-1]+1);   }
205                                     else            {
206                                         currSeq.setAligned(currSeq.getAligned().substr(mapAligned[primerEnd-1]+1));
207                                         if (pDataArray->fileAligned) {
208                                             thisPStart = mapAligned[primerEnd-1]+1; //locations.insert(mapAligned[primerEnd-1]+1);
209                                             locationsString += currSeq.getName() + "\t" + toString(mapAligned[primerEnd-1]+1) + "\n";
210                                         }
211 }
212                                 } 
213                                 else                {  
214                                     if (pDataArray->keepdots)   { currSeq.filterToPos(mapAligned[primerStart]);  }
215                                     else            {
216                                         currSeq.setAligned(currSeq.getAligned().substr(mapAligned[primerStart]));
217                                         if (pDataArray->fileAligned) {
218                                             thisPStart = mapAligned[primerStart]; //locations.insert(mapAligned[primerStart]);
219                                             locationsString += currSeq.getName() + "\t" + toString(mapAligned[primerStart]) + "\n";
220                                         }
221                                     }
222                                 }
223                                 ///////////////////////////////////////////////////////////////
224                                 mapAligned.clear();
225                                 string seq = currSeq.getAligned(); 
226                                 int countBases = 0;
227                                 for (int k = 0; k < seq.length(); k++) {
228                                     if (!isalpha(seq[k])) { ; }
229                                     else { mapAligned[countBases] = k; countBases++; } 
230                                 }                                                   
231                                 ///////////////////////////////////////////////////////////////
232                             }else { 
233                                 if (!pDataArray->keepprimer)    { currSeq.setAligned(currSeq.getUnaligned().substr(primerEnd)); } 
234                                 else                { currSeq.setAligned(currSeq.getUnaligned().substr(primerStart)); } 
235                             }
236                         }
237                     }
238                     
239                     //process reverse primers
240                     if (numRPrimers != 0) {
241                         int primerStart = 0; int primerEnd = 0;
242                         bool good = trim.findReverse(currSeq, primerStart, primerEnd);
243                          
244                         if(!good){      if (pDataArray->nomatch == "reject") { goodSeq = false; } trashCode += "r";     }
245                         else{ 
246                             //are you aligned
247                             if (aligned) { 
248                                 if (!pDataArray->keepprimer)    {  
249                                     if (pDataArray->keepdots)   { currSeq.filterFromPos(mapAligned[primerStart]); }
250                                     else            {
251                                         currSeq.setAligned(currSeq.getAligned().substr(0, mapAligned[primerStart]));
252                                         if (pDataArray->fileAligned) {
253                                             thisPEnd = mapAligned[primerStart]; //locations.insert(mapAligned[primerStart]);
254                                             locationsString += currSeq.getName() + "\t" + toString(mapAligned[primerStart]) + "\n";
255                                         }
256
257                                     }
258                                 } 
259                                 else                {  
260                                     if (pDataArray->keepdots)   { currSeq.filterFromPos(mapAligned[primerEnd-1]+1); }
261                                     else            {
262                                         currSeq.setAligned(currSeq.getAligned().substr(0, mapAligned[primerEnd-1]+1));
263                                         if (pDataArray->fileAligned) {
264                                             thisPEnd = mapAligned[primerEnd-1]+1; //locations.insert(mapAligned[primerEnd-1]+1);
265                                             locationsString += currSeq.getName() + "\t" + toString(mapAligned[primerEnd-1]+1) + "\n";
266                                         }
267
268                                     }
269                                 }                             }
270                             else { 
271                                 if (!pDataArray->keepprimer)    { currSeq.setAligned(currSeq.getUnaligned().substr(0, primerStart));   } 
272                                 else                { currSeq.setAligned(currSeq.getUnaligned().substr(0, primerEnd));     }
273                             }
274                         }
275                     }
276                 }else if (pDataArray->ecolifile != "") {
277                     //make sure the seqs are aligned
278                     if (!pDataArray->fileAligned) { pDataArray->m->mothurOut("[ERROR]: seqs are not aligned. When using start and end your sequences must be aligned.\n"); pDataArray->m->control_pressed = true; break; }
279                     else if (currSeq.getAligned().length() != pDataArray->length) {
280                         pDataArray->m->mothurOut("[ERROR]: seqs are not the same length as ecoli seq. When using ecoli option your sequences must be aligned and the same length as the ecoli sequence.\n"); pDataArray->m->control_pressed = true; break; 
281                     }else {
282                         if (pDataArray->keepdots)   { 
283                             currSeq.filterToPos(pDataArray->start); 
284                             currSeq.filterFromPos(pDataArray->end);
285                         }else {
286                             string seqString = currSeq.getAligned().substr(0, pDataArray->end);
287                             seqString = seqString.substr(pDataArray->start);
288                             currSeq.setAligned(seqString); 
289                         }
290                     }
291                 }else{ //using start and end to trim
292                     //make sure the seqs are aligned
293                     if (!pDataArray->fileAligned) { pDataArray->m->mothurOut("[ERROR]: seqs are not aligned. When using start and end your sequences must be aligned.\n"); pDataArray->m->control_pressed = true; break; }
294                     else {
295                         if (pDataArray->end != -1) {
296                             if (pDataArray->end > currSeq.getAligned().length()) {  pDataArray->m->mothurOut("[ERROR]: end is longer than your sequence length, aborting.\n"); pDataArray->m->control_pressed = true; break; }
297                             else {
298                                 if (pDataArray->keepdots)   { currSeq.filterFromPos(pDataArray->end); }
299                                 else {
300                                     string seqString = currSeq.getAligned().substr(0, pDataArray->end);
301                                     currSeq.setAligned(seqString); 
302                                 }
303                             }
304                         }
305                         if (pDataArray->start != -1) { 
306                             if (pDataArray->keepdots)   {  currSeq.filterToPos(pDataArray->start);  }
307                             else {
308                                 string seqString = currSeq.getAligned().substr(pDataArray->start);
309                                 currSeq.setAligned(seqString); 
310                             }
311                         }
312                         
313                     }
314                 }
315                 
316                                 if(goodSeq == 1)    {
317                     currSeq.printSequence(goodFile);
318                     if (locationsString != "") { locationsFile << locationsString; }
319                     if (thisPStart != -1)   { locations.insert(thisPStart);  }
320                 }
321                                 else {  
322                     pDataArray->badSeqNames.insert(currSeq.getName()); 
323                     currSeq.setName(currSeq.getName() + '|' + trashCode);
324                     currSeq.printSequence(badFile); 
325                 }
326                         }
327                                                 
328                         //report progress
329                         if((i+1) % 100 == 0){   pDataArray->m->mothurOutJustToScreen("Processing sequence: " + toString(i+1)+"\n");     }
330                 }
331                 //report progress
332                 if((pDataArray->count) % 100 != 0){     pDataArray->m->mothurOutJustToScreen("Thread Processing sequence: " + toString(pDataArray->count)+"\n");                }
333                 
334                 goodFile.close();
335                 inFASTA.close();
336         badFile.close();
337         locationsFile.close();
338         
339         if (pDataArray->m->debug) { pDataArray->m->mothurOut("[DEBUG]: fileAligned = " + toString(pDataArray->fileAligned) +'\n'); }
340         
341         if (pDataArray->fileAligned && !pDataArray->keepdots) { //print out smallest start value and largest end value
342             if (locations.size() > 1) { pDataArray->adjustNeeded = true; }
343             if (numFPrimers != 0)    {   set<int>::iterator it = locations.begin();  pDataArray->pstart = *it;  }
344         }
345         
346         return 0;
347         }
348         catch(exception& e) {
349                 pDataArray->m->errorOut(e, "PcrSeqsCommand", "MyPcrThreadFunction");
350                 exit(1);
351         }
352
353
354 #endif
355
356 /**************************************************************************************************/
357
358
359
360 #endif