]> git.donarmstrong.com Git - mothur.git/blob - pcrseqscommand.h
changed random forest output filename
[mothur.git] / pcrseqscommand.h
1 #ifndef Mothur_pcrseqscommand_h
2 #define Mothur_pcrseqscommand_h
3
4 //
5 //  pcrseqscommand.h
6 //  Mothur
7 //
8 //  Created by Sarah Westcott on 3/14/12.
9 //  Copyright (c) 2012 Schloss Lab. All rights reserved.
10 //
11
12
13 #include "command.hpp"
14 #include "sequence.hpp"
15 #include "trimoligos.h"
16 #include "alignment.hpp"
17 #include "needlemanoverlap.hpp"
18 #include "counttable.h"
19
20 class PcrSeqsCommand : public Command {
21 public:
22         PcrSeqsCommand(string);
23         PcrSeqsCommand();
24         ~PcrSeqsCommand(){}
25         
26         vector<string> setParameters();
27         string getCommandName()                 { return "pcr.seqs";    }
28         string getCommandCategory()             { return "Sequence Processing";         }
29         
30         string getHelpString(); 
31     string getOutputPattern(string);    
32         string getCitation() { return "http://www.mothur.org/wiki/Pcr.seqs"; }
33         string getDescription()         { return "pcr.seqs"; }
34     
35         int execute(); 
36         void help() { m->mothurOut(getHelpString()); }  
37         
38 private:
39     
40     struct linePair {
41         unsigned long long start;
42         unsigned long long end;
43         linePair(unsigned long long i, unsigned long long j) : start(i), end(j) {}
44         linePair() {}
45     };
46     
47     vector<linePair> lines;
48         bool getOligos(vector<vector<string> >&, vector<vector<string> >&, vector<vector<string> >&);
49     bool abort, keepprimer, keepdots, fileAligned;
50         string fastafile, oligosfile, taxfile, groupfile, namefile, countfile, ecolifile, outputDir, nomatch;
51         int start, end, processors, length, pdiffs;
52         
53     vector<string> revPrimer, outputNames;
54         map<string, int> primers;
55     
56     int writeAccnos(set<string>);
57     int readName(set<string>&);
58     int readGroup(set<string>);
59     int readTax(set<string>);
60     int readCount(set<string>);
61     bool readOligos();
62     bool readEcoli();
63         int driverPcr(string, string, string, string, set<string>&, linePair, int&, int&, bool&);
64         int createProcesses(string, string, string, set<string>&);
65     bool isAligned(string, map<int, int>&);
66     string reverseOligo(string);
67     int adjustDots(string, string, int, int);
68     
69 };
70
71 /**************************************************************************************************/
72 //custom data structure for threads to use.
73 // This is passed by void pointer so it can be any data type
74 // that can be passed using a single void pointer (LPVOID).
75 struct pcrData {
76         string filename; 
77     string goodFasta, badFasta, oligosfile, ecolifile, nomatch, locationsName;
78         unsigned long long fstart;
79         unsigned long long fend;
80         int count, start, end, length, pdiffs, pstart, pend;
81         MothurOut* m;
82         map<string, int> primers;
83     vector<string> revPrimer;
84     set<string> badSeqNames;
85     bool keepprimer, keepdots, fileAligned, adjustNeeded;
86         
87         
88         pcrData(){}
89         pcrData(string f, string gf, string bfn, string loc, MothurOut* mout, string ol, string ec, map<string, int> pr, vector<string> rpr, string nm, bool kp, bool kd, int st, int en, int l, int pd, unsigned long long fst, unsigned long long fen) {
90                 filename = f;
91         goodFasta = gf;
92         badFasta = bfn;
93                 m = mout;
94         oligosfile = ol;
95         ecolifile = ec;
96         primers = pr;
97         revPrimer = rpr;
98         nomatch = nm;
99         keepprimer = kp;
100         keepdots = kd;
101                 start = st;
102                 end = en;
103         length = l;
104                 fstart = fst;
105         fend = fen;
106         pdiffs = pd;
107         locationsName = loc;
108                 count = 0;
109         fileAligned = true;
110         adjustNeeded = false;
111         pstart = -1;
112         pend = -1;
113         }
114 };
115 /**************************************************************************************************/
116 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
117 #else
118 static DWORD WINAPI MyPcrThreadFunction(LPVOID lpParam){ 
119         pcrData* pDataArray;
120         pDataArray = (pcrData*)lpParam;
121         
122         try {
123         ofstream goodFile;
124                 pDataArray->m->openOutputFile(pDataArray->goodFasta, goodFile);
125         
126         ofstream badFile;
127                 pDataArray->m->openOutputFile(pDataArray->badFasta, badFile);
128         
129         ofstream locationsFile;
130                 pDataArray->m->openOutputFile(pDataArray->locationsName, locationsFile);
131                 
132                 ifstream inFASTA;
133                 pDataArray->m->openInputFile(pDataArray->filename, inFASTA);
134         
135                 //print header if you are process 0
136                 if ((pDataArray->fstart == 0) || (pDataArray->fstart == 1)) {
137                         inFASTA.seekg(0);
138                 }else { //this accounts for the difference in line endings. 
139                         inFASTA.seekg(pDataArray->fstart-1); pDataArray->m->gobble(inFASTA); 
140                 }
141         
142         set<int> lengths;
143         //pdiffs, bdiffs, primers, barcodes, revPrimers
144         map<string, int> faked;
145         vector< set<int> > locations; //locations[0] = beginning locations, locations[1] = ending locations
146         locations.resize(2);
147         TrimOligos trim(pDataArray->pdiffs, 0, pDataArray->primers, faked, pDataArray->revPrimer);
148                 
149                 for(int i = 0; i < pDataArray->fend; i++){ //end is the number of sequences to process
150             pDataArray->count++;
151                         if (pDataArray->m->control_pressed) {  break; }
152                         
153                         Sequence currSeq(inFASTA); pDataArray->m->gobble(inFASTA);
154             
155             if (pDataArray->fileAligned) { //assume aligned until proven otherwise
156                 lengths.insert(currSeq.getAligned().length());
157                 if (lengths.size() > 1) { pDataArray->fileAligned = false; }
158             }
159             
160             string trashCode = "";
161             string locationsString = "";
162             int thisPStart = -1;
163             int thisPEnd = -1;
164                         if (currSeq.getName() != "") {
165                 
166                 bool goodSeq = true;
167                 if (pDataArray->oligosfile != "") {
168                     map<int, int> mapAligned;
169                     //bool aligned = isAligned(currSeq.getAligned(), mapAligned);
170                     ///////////////////////////////////////////////////////////////
171                     bool aligned = false;
172                     string seq = currSeq.getAligned(); 
173                     int countBases = 0;
174                     for (int k = 0; k < seq.length(); k++) {
175                         if (!isalpha(seq[k])) { aligned = true; }
176                         else { mapAligned[countBases] = k; countBases++; } //maps location in unaligned -> location in aligned.
177                     }                                                   //ie. the 3rd base may be at spot 10 in the alignment
178                                                                         //later when we trim we want to trim from spot 10.
179                     ///////////////////////////////////////////////////////////////
180                     
181                     //process primers
182                     if (pDataArray->primers.size() != 0) {
183                         int primerStart = 0; int primerEnd = 0;
184                         bool good = trim.findForward(currSeq, primerStart, primerEnd);
185                         
186                         if(!good){      if (pDataArray->nomatch == "reject") { goodSeq = false; } trashCode += "f";     }
187                         else{
188                             //are you aligned
189                             if (aligned) { 
190                                 if (!pDataArray->keepprimer)    {  
191                                     if (pDataArray->keepdots)   { currSeq.filterToPos(mapAligned[primerEnd-1]+1);   }
192                                     else            {
193                                         currSeq.setAligned(currSeq.getAligned().substr(mapAligned[primerEnd-1]+1));
194                                         if (pDataArray->fileAligned) {
195                                             thisPStart = mapAligned[primerEnd-1]+1; //locations[0].insert(mapAligned[primerEnd-1]+1);
196                                             locationsString += currSeq.getName() + "\t" + toString(mapAligned[primerEnd-1]+1) + "\n";
197                                         }
198 }
199                                 } 
200                                 else                {  
201                                     if (pDataArray->keepdots)   { currSeq.filterToPos(mapAligned[primerStart]);  }
202                                     else            {
203                                         currSeq.setAligned(currSeq.getAligned().substr(mapAligned[primerStart]));
204                                         if (pDataArray->fileAligned) {
205                                             thisPStart = mapAligned[primerStart]; //locations[0].insert(mapAligned[primerStart]);
206                                             locationsString += currSeq.getName() + "\t" + toString(mapAligned[primerStart]) + "\n";
207                                         }
208                                     }
209                                 }
210                                 ///////////////////////////////////////////////////////////////
211                                 mapAligned.clear();
212                                 string seq = currSeq.getAligned(); 
213                                 int countBases = 0;
214                                 for (int k = 0; k < seq.length(); k++) {
215                                     if (!isalpha(seq[k])) { ; }
216                                     else { mapAligned[countBases] = k; countBases++; } 
217                                 }                                                   
218                                 ///////////////////////////////////////////////////////////////
219                             }else { 
220                                 if (!pDataArray->keepprimer)    { currSeq.setAligned(currSeq.getUnaligned().substr(primerEnd)); } 
221                                 else                { currSeq.setAligned(currSeq.getUnaligned().substr(primerStart)); } 
222                             }
223                         }
224                     }
225                     
226                     //process reverse primers
227                     if (pDataArray->revPrimer.size() != 0) {
228                         int primerStart = 0; int primerEnd = 0;
229                         bool good = trim.findReverse(currSeq, primerStart, primerEnd);
230                          
231                         if(!good){      if (pDataArray->nomatch == "reject") { goodSeq = false; } trashCode += "r";     }
232                         else{ 
233                             //are you aligned
234                             if (aligned) { 
235                                 if (!pDataArray->keepprimer)    {  
236                                     if (pDataArray->keepdots)   { currSeq.filterFromPos(mapAligned[primerStart]); }
237                                     else            {
238                                         currSeq.setAligned(currSeq.getAligned().substr(0, mapAligned[primerStart]));
239                                         if (pDataArray->fileAligned) {
240                                             thisPEnd = mapAligned[primerStart]; //locations[1].insert(mapAligned[primerStart]);
241                                             locationsString += currSeq.getName() + "\t" + toString(mapAligned[primerStart]) + "\n";
242                                         }
243
244                                     }
245                                 } 
246                                 else                {  
247                                     if (pDataArray->keepdots)   { currSeq.filterFromPos(mapAligned[primerEnd-1]+1); }
248                                     else            {
249                                         currSeq.setAligned(currSeq.getAligned().substr(0, mapAligned[primerEnd-1]+1));
250                                         if (pDataArray->fileAligned) {
251                                             thisPEnd = mapAligned[primerEnd-1]+1; //locations[1].insert(mapAligned[primerEnd-1]+1);
252                                             locationsString += currSeq.getName() + "\t" + toString(mapAligned[primerEnd-1]+1) + "\n";
253                                         }
254
255                                     }
256                                 }                             }
257                             else { 
258                                 if (!pDataArray->keepprimer)    { currSeq.setAligned(currSeq.getUnaligned().substr(0, primerStart));   } 
259                                 else                { currSeq.setAligned(currSeq.getUnaligned().substr(0, primerEnd));     }
260                             }
261                         }
262                     }
263                 }else if (pDataArray->ecolifile != "") {
264                     //make sure the seqs are aligned
265                     if (!pDataArray->fileAligned) { pDataArray->m->mothurOut("[ERROR]: seqs are not aligned. When using start and end your sequences must be aligned.\n"); pDataArray->m->control_pressed = true; break; }
266                     else if (currSeq.getAligned().length() != pDataArray->length) {
267                         pDataArray->m->mothurOut("[ERROR]: seqs are not the same length as ecoli seq. When using ecoli option your sequences must be aligned and the same length as the ecoli sequence.\n"); pDataArray->m->control_pressed = true; break; 
268                     }else {
269                         if (pDataArray->keepdots)   { 
270                             currSeq.filterToPos(pDataArray->start); 
271                             currSeq.filterFromPos(pDataArray->end);
272                         }else {
273                             string seqString = currSeq.getAligned().substr(0, pDataArray->end);
274                             seqString = seqString.substr(pDataArray->start);
275                             currSeq.setAligned(seqString); 
276                         }
277                     }
278                 }else{ //using start and end to trim
279                     //make sure the seqs are aligned
280                     if (!pDataArray->fileAligned) { pDataArray->m->mothurOut("[ERROR]: seqs are not aligned. When using start and end your sequences must be aligned.\n"); pDataArray->m->control_pressed = true; break; }
281                     else {
282                         if (pDataArray->end != -1) {
283                             if (pDataArray->end > currSeq.getAligned().length()) {  pDataArray->m->mothurOut("[ERROR]: end is longer than your sequence length, aborting.\n"); pDataArray->m->control_pressed = true; break; }
284                             else {
285                                 if (pDataArray->keepdots)   { currSeq.filterFromPos(pDataArray->end); }
286                                 else {
287                                     string seqString = currSeq.getAligned().substr(0, pDataArray->end);
288                                     currSeq.setAligned(seqString); 
289                                 }
290                             }
291                         }
292                         if (pDataArray->start != -1) { 
293                             if (pDataArray->keepdots)   {  currSeq.filterToPos(pDataArray->start);  }
294                             else {
295                                 string seqString = currSeq.getAligned().substr(pDataArray->start);
296                                 currSeq.setAligned(seqString); 
297                             }
298                         }
299                         
300                     }
301                 }
302                 
303                                 if(goodSeq == 1)    {
304                     currSeq.printSequence(goodFile);
305                     if (locationsString != "") { locationsFile << locationsString; }
306                     if (thisPStart != -1)   { locations[0].insert(thisPStart);  }
307                     if (thisPEnd != -1)     { locations[1].insert(thisPEnd);    }
308                 }
309                                 else {  
310                     pDataArray->badSeqNames.insert(currSeq.getName()); 
311                     currSeq.setName(currSeq.getName() + '|' + trashCode);
312                     currSeq.printSequence(badFile); 
313                 }
314                         }
315                                                 
316                         //report progress
317                         if((i+1) % 100 == 0){   pDataArray->m->mothurOutJustToScreen("Processing sequence: " + toString(i+1)+"\n");     }
318                 }
319                 //report progress
320                 if((pDataArray->count) % 100 != 0){     pDataArray->m->mothurOutJustToScreen("Thread Processing sequence: " + toString(pDataArray->count)+"\n");                }
321                 
322                 goodFile.close();
323                 inFASTA.close();
324         badFile.close();
325         locationsFile.close();
326         
327         if (pDataArray->m->debug) { pDataArray->m->mothurOut("[DEBUG]: fileAligned = " + toString(pDataArray->fileAligned) +'\n'); }
328         
329         if (pDataArray->fileAligned && !pDataArray->keepdots) { //print out smallest start value and largest end value
330             if ((locations[0].size() > 1) || (locations[1].size() > 1)) { pDataArray->adjustNeeded = true; }
331             if (pDataArray->primers.size() != 0)    {   set<int>::iterator it = locations[0].begin();  pDataArray->pstart = *it;  }
332             if (pDataArray->revPrimer.size() != 0)  {   set<int>::reverse_iterator it2 = locations[1].rbegin();  pDataArray->pend = *it2; }
333         }
334         
335         return 0;
336         }
337         catch(exception& e) {
338                 pDataArray->m->errorOut(e, "PcrSeqsCommand", "MyPcrThreadFunction");
339                 exit(1);
340         }
341
342
343 #endif
344
345 /**************************************************************************************************/
346
347
348
349 #endif