]> git.donarmstrong.com Git - mothur.git/blob - pcrseqscommand.h
added mothurOutJustToScreen function and changed all counter outputs to use it.
[mothur.git] / pcrseqscommand.h
1 #ifndef Mothur_pcrseqscommand_h
2 #define Mothur_pcrseqscommand_h
3
4 //
5 //  pcrseqscommand.h
6 //  Mothur
7 //
8 //  Created by Sarah Westcott on 3/14/12.
9 //  Copyright (c) 2012 Schloss Lab. All rights reserved.
10 //
11
12
13 #include "command.hpp"
14 #include "sequence.hpp"
15 #include "trimoligos.h"
16 #include "alignment.hpp"
17 #include "needlemanoverlap.hpp"
18 #include "counttable.h"
19
20 class PcrSeqsCommand : public Command {
21 public:
22         PcrSeqsCommand(string);
23         PcrSeqsCommand();
24         ~PcrSeqsCommand(){}
25         
26         vector<string> setParameters();
27         string getCommandName()                 { return "pcr.seqs";    }
28         string getCommandCategory()             { return "Sequence Processing";         }
29         
30         string getHelpString(); 
31     string getOutputPattern(string);    
32         string getCitation() { return "http://www.mothur.org/wiki/Pcr.seqs"; }
33         string getDescription()         { return "pcr.seqs"; }
34     
35         int execute(); 
36         void help() { m->mothurOut(getHelpString()); }  
37         
38 private:
39     
40     struct linePair {
41         unsigned long long start;
42         unsigned long long end;
43         linePair(unsigned long long i, unsigned long long j) : start(i), end(j) {}
44         linePair() {}
45     };
46     
47     vector<linePair> lines;
48         bool getOligos(vector<vector<string> >&, vector<vector<string> >&, vector<vector<string> >&);
49     bool abort, keepprimer, keepdots;
50         string fastafile, oligosfile, taxfile, groupfile, namefile, countfile, ecolifile, outputDir, nomatch;
51         int start, end, processors, length, pdiffs;
52         
53     vector<string> revPrimer, outputNames;
54         map<string, int> primers;
55     
56     int writeAccnos(set<string>);
57     int readName(set<string>&);
58     int readGroup(set<string>);
59     int readTax(set<string>);
60     int readCount(set<string>);
61     bool readOligos();
62     bool readEcoli();
63         int driverPcr(string, string, string, set<string>&, linePair);  
64         int createProcesses(string, string, string, set<string>&);
65     bool isAligned(string, map<int, int>&);
66     string reverseOligo(string);
67 };
68
69 /**************************************************************************************************/
70 //custom data structure for threads to use.
71 // This is passed by void pointer so it can be any data type
72 // that can be passed using a single void pointer (LPVOID).
73 struct pcrData {
74         string filename; 
75     string goodFasta, badFasta, oligosfile, ecolifile, nomatch;
76         unsigned long long fstart;
77         unsigned long long fend;
78         int count, start, end, length, pdiffs;
79         MothurOut* m;
80         map<string, int> primers;
81     vector<string> revPrimer;
82     set<string> badSeqNames;
83     bool keepprimer, keepdots;
84         
85         
86         pcrData(){}
87         pcrData(string f, string gf, string bfn, MothurOut* mout, string ol, string ec, map<string, int> pr, vector<string> rpr, string nm, bool kp, bool kd, int st, int en, int l, int pd, unsigned long long fst, unsigned long long fen) {
88                 filename = f;
89         goodFasta = gf;
90         badFasta = bfn;
91                 m = mout;
92         oligosfile = ol;
93         ecolifile = ec;
94         primers = pr;
95         revPrimer = rpr;
96         nomatch = nm;
97         keepprimer = kp;
98         keepdots = kd;
99                 start = st;
100                 end = en;
101         length = l;
102                 fstart = fst;
103         fend = fen;
104         pdiffs = pd;
105                 count = 0;
106         }
107 };
108 /**************************************************************************************************/
109 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
110 #else
111 static DWORD WINAPI MyPcrThreadFunction(LPVOID lpParam){ 
112         pcrData* pDataArray;
113         pDataArray = (pcrData*)lpParam;
114         
115         try {
116         ofstream goodFile;
117                 pDataArray->m->openOutputFile(pDataArray->goodFasta, goodFile);
118         
119         ofstream badFile;
120                 pDataArray->m->openOutputFile(pDataArray->badFasta, badFile);
121                 
122                 ifstream inFASTA;
123                 pDataArray->m->openInputFile(pDataArray->filename, inFASTA);
124         
125                 //print header if you are process 0
126                 if ((pDataArray->fstart == 0) || (pDataArray->fstart == 1)) {
127                         inFASTA.seekg(0);
128                 }else { //this accounts for the difference in line endings. 
129                         inFASTA.seekg(pDataArray->fstart-1); pDataArray->m->gobble(inFASTA); 
130                 }
131         
132         set<int> lengths;
133         //pdiffs, bdiffs, primers, barcodes, revPrimers
134         map<string, int> faked;
135         TrimOligos trim(pDataArray->pdiffs, 0, pDataArray->primers, faked, pDataArray->revPrimer);
136                 
137                 for(int i = 0; i < pDataArray->fend; i++){ //end is the number of sequences to process
138             pDataArray->count++;
139                         if (pDataArray->m->control_pressed) {  break; }
140                         
141                         Sequence currSeq(inFASTA); pDataArray->m->gobble(inFASTA);
142           
143             string trashCode = "";
144                         if (currSeq.getName() != "") {
145                 
146                 bool goodSeq = true;
147                 if (pDataArray->oligosfile != "") {
148                     map<int, int> mapAligned;
149                     //bool aligned = isAligned(currSeq.getAligned(), mapAligned);
150                     ///////////////////////////////////////////////////////////////
151                     bool aligned = false;
152                     string seq = currSeq.getAligned(); 
153                     int countBases = 0;
154                     for (int k = 0; k < seq.length(); k++) {
155                         if (!isalpha(seq[k])) { aligned = true; }
156                         else { mapAligned[countBases] = k; countBases++; } //maps location in unaligned -> location in aligned.
157                     }                                                   //ie. the 3rd base may be at spot 10 in the alignment
158                                                                         //later when we trim we want to trim from spot 10.
159                     ///////////////////////////////////////////////////////////////
160                     
161                     //process primers
162                     if (pDataArray->primers.size() != 0) {
163                         int primerStart = 0; int primerEnd = 0;
164                         bool good = trim.findForward(currSeq, primerStart, primerEnd);
165                         
166                         if(!good){      if (pDataArray->nomatch == "reject") { goodSeq = false; } trashCode += "f";     }
167                         else{
168                             //are you aligned
169                             if (aligned) { 
170                                 if (!pDataArray->keepprimer)    {  
171                                     if (pDataArray->keepdots)   { currSeq.filterToPos(mapAligned[primerEnd]);   }
172                                     else            { currSeq.setAligned(currSeq.getAligned().substr(mapAligned[primerEnd]));                                              }
173                                 } 
174                                 else                {  
175                                     if (pDataArray->keepdots)   { currSeq.filterToPos(mapAligned[primerStart]);  }
176                                     else            { currSeq.setAligned(currSeq.getAligned().substr(mapAligned[primerStart]));                                              }
177                                 }
178                                 ///////////////////////////////////////////////////////////////
179                                 mapAligned.clear();
180                                 string seq = currSeq.getAligned(); 
181                                 int countBases = 0;
182                                 for (int k = 0; k < seq.length(); k++) {
183                                     if (!isalpha(seq[k])) { ; }
184                                     else { mapAligned[countBases] = k; countBases++; } 
185                                 }                                                   
186                                 ///////////////////////////////////////////////////////////////
187                             }else { 
188                                 if (!pDataArray->keepprimer)    { currSeq.setAligned(currSeq.getUnaligned().substr(primerEnd)); } 
189                                 else                { currSeq.setAligned(currSeq.getUnaligned().substr(primerStart)); } 
190                             }
191                         }
192                     }
193                     
194                     //process reverse primers
195                     if (pDataArray->revPrimer.size() != 0) {
196                         int primerStart = 0; int primerEnd = 0;
197                         bool good = trim.findReverse(currSeq, primerStart, primerEnd);
198                          
199                         if(!good){      if (pDataArray->nomatch == "reject") { goodSeq = false; } trashCode += "r";     }
200                         else{ 
201                             //are you aligned
202                             if (aligned) { 
203                                 if (!pDataArray->keepprimer)    {  
204                                     if (pDataArray->keepdots)   { currSeq.filterFromPos(mapAligned[primerStart]); }
205                                     else            { currSeq.setAligned(currSeq.getAligned().substr(0, mapAligned[primerStart]));   }
206                                 } 
207                                 else                {  
208                                     if (pDataArray->keepdots)   { currSeq.filterFromPos(mapAligned[primerEnd]); }
209                                     else            { currSeq.setAligned(currSeq.getAligned().substr(0, mapAligned[primerEnd]));   }
210                                 }                             }
211                             else { 
212                                 if (!pDataArray->keepprimer)    { currSeq.setAligned(currSeq.getUnaligned().substr(0, primerStart));   } 
213                                 else                { currSeq.setAligned(currSeq.getUnaligned().substr(0, primerEnd));     }
214                             }
215                         }
216                     }
217                 }else if (pDataArray->ecolifile != "") {
218                     //make sure the seqs are aligned
219                     lengths.insert(currSeq.getAligned().length());
220                     if (lengths.size() > 1) { pDataArray->m->mothurOut("[ERROR]: seqs are not aligned. When using start and end your sequences must be aligned.\n"); pDataArray->m->control_pressed = true; break; }
221                     else if (currSeq.getAligned().length() != pDataArray->length) {
222                         pDataArray->m->mothurOut("[ERROR]: seqs are not the same length as ecoli seq. When using ecoli option your sequences must be aligned and the same length as the ecoli sequence.\n"); pDataArray->m->control_pressed = true; break; 
223                     }else {
224                         if (pDataArray->keepdots)   { 
225                             currSeq.filterToPos(pDataArray->start); 
226                             currSeq.filterFromPos(pDataArray->end);
227                         }else {
228                             string seqString = currSeq.getAligned().substr(0, pDataArray->end);
229                             seqString = seqString.substr(pDataArray->start);
230                             currSeq.setAligned(seqString); 
231                         }
232                     }
233                 }else{ //using start and end to trim
234                     //make sure the seqs are aligned
235                     lengths.insert(currSeq.getAligned().length());
236                     if (lengths.size() > 1) { pDataArray->m->mothurOut("[ERROR]: seqs are not aligned. When using start and end your sequences must be aligned.\n"); pDataArray->m->control_pressed = true; break; }
237                     else {
238                         if (pDataArray->end != -1) {
239                             if (pDataArray->end > currSeq.getAligned().length()) {  pDataArray->m->mothurOut("[ERROR]: end is longer than your sequence length, aborting.\n"); pDataArray->m->control_pressed = true; break; }
240                             else {
241                                 if (pDataArray->keepdots)   { currSeq.filterFromPos(pDataArray->end); }
242                                 else {
243                                     string seqString = currSeq.getAligned().substr(0, pDataArray->end);
244                                     currSeq.setAligned(seqString); 
245                                 }
246                             }
247                         }
248                         if (pDataArray->start != -1) { 
249                             if (pDataArray->keepdots)   {  currSeq.filterToPos(pDataArray->start);  }
250                             else {
251                                 string seqString = currSeq.getAligned().substr(pDataArray->start);
252                                 currSeq.setAligned(seqString); 
253                             }
254                         }
255                         
256                     }
257                 }
258                 
259                                 if(goodSeq == 1)    {   currSeq.printSequence(goodFile);        }
260                                 else {  
261                     pDataArray->badSeqNames.insert(currSeq.getName()); 
262                     currSeq.setName(currSeq.getName() + '|' + trashCode);
263                     currSeq.printSequence(badFile); 
264                 }
265                         }
266                                                 
267                         //report progress
268                         if((i+1) % 100 == 0){   pDataArray->m->mothurOutJustToScreen("Processing sequence: " + toString(i+1)+"\n");     }
269                 }
270                 //report progress
271                 if((pDataArray->count) % 100 != 0){     pDataArray->m->mothurOutJustToScreen("Thread Processing sequence: " + toString(pDataArray->count)+"\n");                }
272                 
273                 goodFile.close();
274                 inFASTA.close();
275         badFile.close();
276         
277         return 0;
278                 
279         }
280         catch(exception& e) {
281                 pDataArray->m->errorOut(e, "PcrSeqsCommand", "MyPcrThreadFunction");
282                 exit(1);
283         }
284
285
286 #endif
287
288 /**************************************************************************************************/
289
290
291
292 #endif