]> git.donarmstrong.com Git - mothur.git/blob - pcrseqscommand.h
added load.logfile command. changed summary.single output for subsample=t.
[mothur.git] / pcrseqscommand.h
1 #ifndef Mothur_pcrseqscommand_h
2 #define Mothur_pcrseqscommand_h
3
4 //
5 //  pcrseqscommand.h
6 //  Mothur
7 //
8 //  Created by Sarah Westcott on 3/14/12.
9 //  Copyright (c) 2012 Schloss Lab. All rights reserved.
10 //
11
12
13 #include "command.hpp"
14 #include "sequence.hpp"
15 #include "trimoligos.h"
16 #include "alignment.hpp"
17 #include "needlemanoverlap.hpp"
18
19 class PcrSeqsCommand : public Command {
20 public:
21         PcrSeqsCommand(string);
22         PcrSeqsCommand();
23         ~PcrSeqsCommand(){}
24         
25         vector<string> setParameters();
26         string getCommandName()                 { return "pcr.seqs";    }
27         string getCommandCategory()             { return "Sequence Processing";         }
28         string getOutputFileNameTag(string, string);
29         string getHelpString(); 
30         string getCitation() { return "http://www.mothur.org/wiki/Pcr.seqs"; }
31         string getDescription()         { return "pcr.seqs"; }
32     
33         int execute(); 
34         void help() { m->mothurOut(getHelpString()); }  
35         
36 private:
37     
38     struct linePair {
39         unsigned long long start;
40         unsigned long long end;
41         linePair(unsigned long long i, unsigned long long j) : start(i), end(j) {}
42         linePair() {}
43     };
44     
45     vector<linePair> lines;
46         bool getOligos(vector<vector<string> >&, vector<vector<string> >&, vector<vector<string> >&);
47     bool abort, keepprimer, keepdots;
48         string fastafile, oligosfile, taxfile, groupfile, namefile, ecolifile, outputDir, nomatch;
49         int start, end, processors, length;
50         
51     vector<string> revPrimer, outputNames;
52         vector<string> primers;
53     
54     int writeAccnos(set<string>);
55     int readName(set<string>&);
56     int readGroup(set<string>);
57     int readTax(set<string>);
58     bool readOligos();
59     bool readEcoli();
60         int driverPcr(string, string, string, set<string>&, linePair);  
61         int createProcesses(string, string, string, set<string>&);
62     bool findForward(Sequence&, int&, int&);
63     bool findReverse(Sequence&, int&, int&);
64     bool isAligned(string, map<int, int>&);
65     bool compareDNASeq(string, string);
66     string reverseOligo(string);
67 };
68
69 /**************************************************************************************************/
70 //custom data structure for threads to use.
71 // This is passed by void pointer so it can be any data type
72 // that can be passed using a single void pointer (LPVOID).
73 struct pcrData {
74         string filename; 
75     string goodFasta, badFasta, oligosfile, ecolifile, nomatch;
76         unsigned long long fstart;
77         unsigned long long fend;
78         int count, start, end, length;
79         MothurOut* m;
80         vector<string> primers;
81     vector<string> revPrimer;
82     set<string> badSeqNames;
83     bool keepprimer, keepdots;
84         
85         
86         pcrData(){}
87         pcrData(string f, string gf, string bfn, MothurOut* mout, string ol, string ec, vector<string> pr, vector<string> rpr, string nm, bool kp, bool kd, int st, int en, int l, unsigned long long fst, unsigned long long fen) {
88                 filename = f;
89         goodFasta = gf;
90         badFasta = bfn;
91                 m = mout;
92         oligosfile = ol;
93         ecolifile = ec;
94         primers = pr;
95         revPrimer = rpr;
96         nomatch = nm;
97         keepprimer = kp;
98         keepdots = kd;
99                 start = st;
100                 end = en;
101         length = l;
102                 fstart = fst;
103         fend = fen;
104                 count = 0;
105         }
106 };
107 /**************************************************************************************************/
108 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
109 #else
110 static DWORD WINAPI MyPcrThreadFunction(LPVOID lpParam){ 
111         pcrData* pDataArray;
112         pDataArray = (pcrData*)lpParam;
113         
114         try {
115         ofstream goodFile;
116                 pDataArray->m->openOutputFile(pDataArray->goodFasta, goodFile);
117         
118         ofstream badFile;
119                 pDataArray->m->openOutputFile(pDataArray->badFasta, badFile);
120                 
121                 ifstream inFASTA;
122                 pDataArray->m->openInputFile(pDataArray->filename, inFASTA);
123         
124                 //print header if you are process 0
125                 if ((pDataArray->fstart == 0) || (pDataArray->fstart == 1)) {
126                         inFASTA.seekg(0);
127                 }else { //this accounts for the difference in line endings. 
128                         inFASTA.seekg(pDataArray->fstart-1); pDataArray->m->gobble(inFASTA); 
129                 }
130         
131         set<int> lengths;
132                 pDataArray->count = pDataArray->fend;
133                 for(int i = 0; i < pDataArray->fend; i++){ //end is the number of sequences to process
134             
135                         if (pDataArray->m->control_pressed) {  break; }
136                         
137                         Sequence currSeq(inFASTA); pDataArray->m->gobble(inFASTA);
138           
139             string trashCode = "";
140                         if (currSeq.getName() != "") {
141                 
142                 bool goodSeq = true;
143                 if (pDataArray->oligosfile != "") {
144                     map<int, int> mapAligned;
145                     //bool aligned = isAligned(currSeq.getAligned(), mapAligned);
146                     ///////////////////////////////////////////////////////////////
147                     bool aligned = false;
148                     string seq = currSeq.getAligned(); 
149                     int countBases = 0;
150                     for (int k = 0; k < seq.length(); k++) {
151                         if (!isalpha(seq[k])) { aligned = true; }
152                         else { mapAligned[countBases] = k; countBases++; } //maps location in unaligned -> location in aligned.
153                     }                                                   //ie. the 3rd base may be at spot 10 in the alignment
154                                                                         //later when we trim we want to trim from spot 10.
155                     ///////////////////////////////////////////////////////////////
156                     
157                     //process primers
158                     if (pDataArray->primers.size() != 0) {
159                         int primerStart = 0; int primerEnd = 0;
160                         //bool good = findForward(currSeq, primerStart, primerEnd);
161                         ///////////////////////////////////////////////////////////////
162                         bool good = false;
163                         string rawSequence = currSeq.getUnaligned();
164                         
165                         for(int j=0;j<pDataArray->primers.size();j++){
166                             string oligo = pDataArray->primers[j];
167                             
168                             if (pDataArray->m->control_pressed) {  primerStart = 0; primerEnd = 0; good = false; break; }
169                             
170                             if(rawSequence.length() < oligo.length()) {  break;  }
171                             
172                             //search for primer
173                             int olength = oligo.length();
174                             for (int l = 0; l < rawSequence.length()-olength; l++){
175                                 if (pDataArray->m->control_pressed) {  primerStart = 0; primerEnd = 0; good = false; break; }
176                                 string rawChunk = rawSequence.substr(l, olength);
177                                 //compareDNASeq(oligo, rawChunk)
178                                 ////////////////////////////////////////////////////////
179                                 bool success = 1;
180                                 for(int k=0;k<olength;k++){
181                                     
182                                     if(oligo[k] != rawChunk[k]){
183                                         if(oligo[k] == 'A' || oligo[k] == 'T' || oligo[k] == 'G' || oligo[k] == 'C')    {       success = 0;    }
184                                         else if((oligo[k] == 'N' || oligo[k] == 'I') && (rawChunk[k] == 'N'))                           {       success = 0;    }
185                                         else if(oligo[k] == 'R' && (rawChunk[k] != 'A' && rawChunk[k] != 'G'))                                  {       success = 0;    }
186                                         else if(oligo[k] == 'Y' && (rawChunk[k] != 'C' && rawChunk[k] != 'T'))                                  {       success = 0;    }
187                                         else if(oligo[k] == 'M' && (rawChunk[k] != 'C' && rawChunk[k] != 'A'))                                  {       success = 0;    }
188                                         else if(oligo[k] == 'K' && (rawChunk[k] != 'T' && rawChunk[k] != 'G'))                                  {       success = 0;    }
189                                         else if(oligo[k] == 'W' && (rawChunk[k] != 'T' && rawChunk[k] != 'A'))                                  {       success = 0;    }
190                                         else if(oligo[k] == 'S' && (rawChunk[k] != 'C' && rawChunk[k] != 'G'))                                  {       success = 0;    }
191                                         else if(oligo[k] == 'B' && (rawChunk[k] != 'C' && rawChunk[k] != 'T' && rawChunk[k] != 'G'))    {       success = 0;    }
192                                         else if(oligo[k] == 'D' && (rawChunk[k] != 'A' && rawChunk[k] != 'T' && rawChunk[k] != 'G'))    {       success = 0;    }
193                                         else if(oligo[k] == 'H' && (rawChunk[k] != 'A' && rawChunk[k] != 'T' && rawChunk[k] != 'C'))    {       success = 0;    }
194                                         else if(oligo[k] == 'V' && (rawChunk[k] != 'A' && rawChunk[k] != 'C' && rawChunk[k] != 'G'))    {       success = 0;    }                       
195                                         
196                                         if(success == 0)        {       break;   }
197                                     }
198                                     else{
199                                         success = 1;
200                                     }
201                                 }
202                                 
203                                 ////////////////////////////////////////////////////////////////////
204                                 if(success) {
205                                     primerStart = j;
206                                     primerEnd = primerStart + olength;
207                                     good = true; break;
208                                 }
209                             }
210                             if (good) { break; }
211                         }       
212                         
213                         if (!good) { primerStart = 0; primerEnd = 0; }
214                         ///////////////////////////////////////////////////////////////
215                         
216                         
217                         if(!good){      if (pDataArray->nomatch == "reject") { goodSeq = false; } trashCode += "f";     }
218                         else{
219                             //are you aligned
220                             if (aligned) { 
221                                 if (!pDataArray->keepprimer)    {  
222                                     if (pDataArray->keepdots)   { currSeq.filterToPos(mapAligned[primerEnd]);   }
223                                     else            { currSeq.setAligned(currSeq.getAligned().substr(mapAligned[primerEnd]));                                              }
224                                 } 
225                                 else                {  
226                                     if (pDataArray->keepdots)   { currSeq.filterToPos(mapAligned[primerStart]);  }
227                                     else            { currSeq.setAligned(currSeq.getAligned().substr(mapAligned[primerStart]));                                              }
228                                 }
229                             }else { 
230                                 if (!pDataArray->keepprimer)    { currSeq.setAligned(currSeq.getUnaligned().substr(primerEnd)); } 
231                                 else                { currSeq.setAligned(currSeq.getUnaligned().substr(primerStart)); } 
232                             }
233                         }
234                     }
235                     
236                     //process reverse primers
237                     if (pDataArray->revPrimer.size() != 0) {
238                         int primerStart = 0; int primerEnd = 0;
239                         bool good = false;
240                         //findReverse(currSeq, primerStart, primerEnd);
241                          ///////////////////////////////////////////////////////////////
242                         string rawSequence = currSeq.getUnaligned();
243                         
244                         for(int j=0;j<pDataArray->revPrimer.size();j++){
245                             string oligo = pDataArray->revPrimer[j];
246                             if (pDataArray->m->control_pressed) {  primerStart = 0; primerEnd = 0; good = false; break; }
247                             if(rawSequence.length() < oligo.length()) {  break;  }
248                             
249                             //search for primer
250                             int olength = oligo.length();
251                             for (int l = rawSequence.length()-olength; l >= 0; l--){
252                                 
253                                 string rawChunk = rawSequence.substr(l, olength);
254                                 //compareDNASeq(oligo, rawChunk)
255                                 ////////////////////////////////////////////////////////
256                                 bool success = 1;
257                                 for(int k=0;k<olength;k++){
258                                     
259                                     if(oligo[k] != rawChunk[k]){
260                                         if(oligo[k] == 'A' || oligo[k] == 'T' || oligo[k] == 'G' || oligo[k] == 'C')    {       success = 0;    }
261                                         else if((oligo[k] == 'N' || oligo[k] == 'I') && (rawChunk[k] == 'N'))                           {       success = 0;    }
262                                         else if(oligo[k] == 'R' && (rawChunk[k] != 'A' && rawChunk[k] != 'G'))                                  {       success = 0;    }
263                                         else if(oligo[k] == 'Y' && (rawChunk[k] != 'C' && rawChunk[k] != 'T'))                                  {       success = 0;    }
264                                         else if(oligo[k] == 'M' && (rawChunk[k] != 'C' && rawChunk[k] != 'A'))                                  {       success = 0;    }
265                                         else if(oligo[k] == 'K' && (rawChunk[k] != 'T' && rawChunk[k] != 'G'))                                  {       success = 0;    }
266                                         else if(oligo[k] == 'W' && (rawChunk[k] != 'T' && rawChunk[k] != 'A'))                                  {       success = 0;    }
267                                         else if(oligo[k] == 'S' && (rawChunk[k] != 'C' && rawChunk[k] != 'G'))                                  {       success = 0;    }
268                                         else if(oligo[k] == 'B' && (rawChunk[k] != 'C' && rawChunk[k] != 'T' && rawChunk[k] != 'G'))    {       success = 0;    }
269                                         else if(oligo[k] == 'D' && (rawChunk[k] != 'A' && rawChunk[k] != 'T' && rawChunk[k] != 'G'))    {       success = 0;    }
270                                         else if(oligo[k] == 'H' && (rawChunk[k] != 'A' && rawChunk[k] != 'T' && rawChunk[k] != 'C'))    {       success = 0;    }
271                                         else if(oligo[k] == 'V' && (rawChunk[k] != 'A' && rawChunk[k] != 'C' && rawChunk[k] != 'G'))    {       success = 0;    }                       
272                                         
273                                         if(success == 0)        {       break;   }
274                                     }
275                                     else{
276                                         success = 1;
277                                     }
278                                 }
279                                 
280                                 ////////////////////////////////////////////////////////////////////
281                                 if(success) {
282                                     primerStart = j;
283                                     primerEnd = primerStart + olength;
284                                     good = true; break;
285                                 }
286                             }
287                             if (good) { break; }
288                         }       
289                         
290                         if (!good) { primerStart = 0; primerEnd = 0; }
291
292                          ///////////////////////////////////////////////////////////////
293                         if(!good){      if (pDataArray->nomatch == "reject") { goodSeq = false; } trashCode += "r";     }
294                         else{ 
295                             //are you aligned
296                             if (aligned) { 
297                                 if (!pDataArray->keepprimer)    {  
298                                     if (pDataArray->keepdots)   { currSeq.filterFromPos(mapAligned[primerStart]); }
299                                     else            { currSeq.setAligned(currSeq.getAligned().substr(0, mapAligned[primerStart]));   }
300                                 } 
301                                 else                {  
302                                     if (pDataArray->keepdots)   { currSeq.filterFromPos(mapAligned[primerEnd]); }
303                                     else            { currSeq.setAligned(currSeq.getAligned().substr(0, mapAligned[primerEnd]));   }
304                                 }                             }
305                             else { 
306                                 if (!pDataArray->keepprimer)    { currSeq.setAligned(currSeq.getUnaligned().substr(0, primerStart));   } 
307                                 else                { currSeq.setAligned(currSeq.getUnaligned().substr(0, primerEnd));     }
308                             }
309                         }
310                     }
311                 }else if (pDataArray->ecolifile != "") {
312                     //make sure the seqs are aligned
313                     lengths.insert(currSeq.getAligned().length());
314                     if (lengths.size() > 1) { pDataArray->m->mothurOut("[ERROR]: seqs are not aligned. When using start and end your sequences must be aligned.\n"); pDataArray->m->control_pressed = true; break; }
315                     else if (currSeq.getAligned().length() != pDataArray->length) {
316                         pDataArray->m->mothurOut("[ERROR]: seqs are not the same length as ecoli seq. When using ecoli option your sequences must be aligned and the same length as the ecoli sequence.\n"); pDataArray->m->control_pressed = true; break; 
317                     }else {
318                         if (pDataArray->keepdots)   { 
319                             currSeq.filterToPos(pDataArray->start); 
320                             currSeq.filterFromPos(pDataArray->end);
321                         }else {
322                             string seqString = currSeq.getAligned().substr(0, pDataArray->end);
323                             seqString = seqString.substr(pDataArray->start);
324                             currSeq.setAligned(seqString); 
325                         }
326                     }
327                 }else{ //using start and end to trim
328                     //make sure the seqs are aligned
329                     lengths.insert(currSeq.getAligned().length());
330                     if (lengths.size() > 1) { pDataArray->m->mothurOut("[ERROR]: seqs are not aligned. When using start and end your sequences must be aligned.\n"); pDataArray->m->control_pressed = true; break; }
331                     else {
332                         if (pDataArray->end != -1) {
333                             if (pDataArray->end > currSeq.getAligned().length()) {  pDataArray->m->mothurOut("[ERROR]: end is longer than your sequence length, aborting.\n"); pDataArray->m->control_pressed = true; break; }
334                             else {
335                                 if (pDataArray->keepdots)   { currSeq.filterFromPos(pDataArray->end); }
336                                 else {
337                                     string seqString = currSeq.getAligned().substr(0, pDataArray->end);
338                                     currSeq.setAligned(seqString); 
339                                 }
340                             }
341                         }
342                         if (pDataArray->start != -1) { 
343                             if (pDataArray->keepdots)   {  currSeq.filterToPos(pDataArray->start);  }
344                             else {
345                                 string seqString = currSeq.getAligned().substr(pDataArray->start);
346                                 currSeq.setAligned(seqString); 
347                             }
348                         }
349                         
350                     }
351                 }
352                 
353                                 if(goodSeq == 1)    {   currSeq.printSequence(goodFile);        }
354                                 else {  
355                     pDataArray->badSeqNames.insert(currSeq.getName()); 
356                     currSeq.setName(currSeq.getName() + '|' + trashCode);
357                     currSeq.printSequence(badFile); 
358                 }
359                         }
360                                                 
361                         //report progress
362                         if((i+1) % 100 == 0){   pDataArray->m->mothurOut("Processing sequence: " + toString(i+1)); pDataArray->m->mothurOutEndLine();           }
363                 }
364                 //report progress
365                 if((pDataArray->count) % 100 != 0){     pDataArray->m->mothurOut("Thread Processing sequence: " + toString(pDataArray->count)); pDataArray->m->mothurOutEndLine();              }
366                 
367                 goodFile.close();
368                 inFASTA.close();
369         badFile.close();
370         
371         return 0;
372                 
373         }
374         catch(exception& e) {
375                 pDataArray->m->errorOut(e, "PcrSeqsCommand", "MyPcrThreadFunction");
376                 exit(1);
377         }
378
379
380 #endif
381
382 /**************************************************************************************************/
383
384
385
386 #endif