]> git.donarmstrong.com Git - mothur.git/blob - chimeraslayercommand.h
working on windows paralellization, added trimOligos class to be used by trim.flows...
[mothur.git] / chimeraslayercommand.h
1 #ifndef CHIMERASLAYERCOMMAND_H
2 #define CHIMERASLAYERCOMMAND_H
3
4 /*
5  *  chimeraslayercommand.h
6  *  Mothur
7  *
8  *  Created by westcott on 3/31/10.
9  *  Copyright 2010 Schloss Lab. All rights reserved.
10  *
11  */
12
13 #include "mothur.h"
14 #include "command.hpp"
15 #include "chimera.h"
16 #include "chimeraslayer.h"
17
18 /***********************************************************/
19
20 class ChimeraSlayerCommand : public Command {
21 public:
22         ChimeraSlayerCommand(string);
23         ChimeraSlayerCommand();
24         ~ChimeraSlayerCommand() {}
25         
26         vector<string> setParameters();
27         string getCommandName()                 { return "chimera.slayer";              }
28         string getCommandCategory()             { return "Sequence Processing"; }
29         string getHelpString(); 
30         string getCitation() { return "Haas BJ, Gevers D, Earl A, Feldgarden M, Ward DV, Giannokous G, Ciulla D, Tabbaa D, Highlander SK, Sodergren E, Methe B, Desantis TZ, Petrosino JF, Knight R, Birren BW (2011). Chimeric 16S rRNA sequence formation and detection in Sanger and 454-pyrosequenced PCR amplicons. Genome Res. \nhttp://www.mothur.org/wiki/Chimera.slayer"; }
31         string getDescription()         { return "detect chimeric sequences"; }
32         
33         int execute(); 
34         void help() { m->mothurOut(getHelpString()); }          
35         
36 private:
37
38         struct linePair {
39                 unsigned long long start;
40                 unsigned long long end;
41                 linePair(unsigned long long i, unsigned long long j) : start(i), end(j) {}
42         };
43
44         vector<int> processIDS;   //processid
45         vector<linePair*> lines;
46         map<string, int> priority;
47         
48         int driver(linePair*, string, string, string, string);
49         int createProcesses(string, string, string, string);
50         int divideInHalf(Sequence, string&, string&);
51         map<string, int> sortFastaFile(string, string);
52                 
53         #ifdef USE_MPI
54         int driverMPI(int, int, MPI_File&, MPI_File&, MPI_File&, MPI_File&, vector<unsigned long long>&);
55         #endif
56
57         bool abort, realign, trim, trimera, save;
58         string fastafile, templatefile, outputDir, search, namefile, blastlocation;
59         int processors, window, iters, increment, numwanted, ksize, match, mismatch, parents, minSimilarity, minCoverage, minBS, minSNP, numSeqs, templateSeqsLength;
60         float divR;
61         Chimera* chimera;
62         
63         vector<string> outputNames;
64         vector<string> fastaFileNames;
65         vector<string> nameFileNames;
66         
67 };
68
69 /***********************************************************/
70
71 //custom data structure for threads to use.
72 // This is passed by void pointer so it can be any data type
73 // that can be passed using a single void pointer (LPVOID).
74 typedef struct slayerData {
75         string outputFName; 
76         string fasta; 
77         string accnos;
78         string filename;
79         string templatefile;
80         string search;
81         string blastlocation;
82         bool trimera;
83         bool trim, realign;
84         unsigned long long start;
85         unsigned long long end;
86         int ksize, match, mismatch, window, minSimilarity, minCoverage, minBS, minSNP, parents, iters, increment, numwanted;
87         MothurOut* m;
88         float divR;
89         map<string, int> priority;
90         int count;
91         int numNoParents;
92         int threadId;
93         
94         slayerData(){}
95         slayerData(string o, string fa, string ac, string f, string te, string se, string bl, bool tri, bool trm, bool re, MothurOut* mout, unsigned long long st, unsigned long long en, int ks, int ma, int mis, int win, int minS, int minC, int miBS, int minSN, int par, int it, int inc, int numw, float div, map<string, int> prior, int tid) {
96                 outputFName = o;
97                 fasta = fa;
98                 accnos = ac;
99                 filename = f;
100                 templatefile = te;
101                 search = se;
102                 blastlocation = bl;
103                 trimera = tri;
104                 trim = trm;
105                 realign = re;
106                 m = mout;
107                 start = st;
108                 end = en;
109                 ksize = ks;
110                 match = ma; 
111                 mismatch = mis;
112                 window = win;
113                 minSimilarity = minS;
114                 minCoverage = minC;
115                 minBS = miBS;
116                 minSNP = minSN;
117                 parents = par;
118                 iters = it;
119                 increment = inc;
120                 numwanted = numw;
121                 divR = div;
122                 priority = prior;
123                 threadId = tid;
124                 count = 0;
125                 numNoParents = 0;
126         }
127 };
128
129 /**************************************************************************************************/
130 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
131 #else
132 static DWORD WINAPI MySlayerThreadFunction(LPVOID lpParam){ 
133         slayerData* pDataArray;
134         pDataArray = (slayerData*)lpParam;
135         
136         try {
137                 ofstream out;
138                 pDataArray->m->openOutputFile(pDataArray->outputFName, out);
139                 
140                 ofstream out2;
141                 pDataArray->m->openOutputFile(pDataArray->accnos, out2);
142                 
143                 ofstream out3;
144                 if (pDataArray->trim) {  pDataArray->m->openOutputFile(pDataArray->fasta, out3); }
145                 
146                 ifstream inFASTA;
147                 pDataArray->m->openInputFile(pDataArray->filename, inFASTA);
148                 
149                 
150                 
151                 Chimera* chimera;
152                 if (pDataArray->templatefile != "self") { //you want to run slayer with a reference template
153                         chimera = new ChimeraSlayer(pDataArray->filename, pDataArray->templatefile, pDataArray->trim, pDataArray->search, pDataArray->ksize, pDataArray->match, pDataArray->mismatch, pDataArray->window, pDataArray->divR, pDataArray->minSimilarity, pDataArray->minCoverage, pDataArray->minBS, pDataArray->minSNP, pDataArray->parents, pDataArray->iters, pDataArray->increment, pDataArray->numwanted, pDataArray->realign, pDataArray->blastlocation, pDataArray->threadId);     
154                 }else {
155                         chimera = new ChimeraSlayer(pDataArray->filename, pDataArray->templatefile, pDataArray->trim, pDataArray->priority, pDataArray->search, pDataArray->ksize, pDataArray->match, pDataArray->mismatch, pDataArray->window, pDataArray->divR, pDataArray->minSimilarity, pDataArray->minCoverage, pDataArray->minBS, pDataArray->minSNP, pDataArray->parents, pDataArray->iters, pDataArray->increment, pDataArray->numwanted, pDataArray->realign, pDataArray->blastlocation, pDataArray->threadId);       
156                 }
157                 
158                 //print header if you are process 0
159                 if ((pDataArray->start == 0) || (pDataArray->start == 1)) {
160                         chimera->printHeader(out); 
161                         inFASTA.seekg(0);
162                 }else { //this accounts for the difference in line endings. 
163                         inFASTA.seekg(pDataArray->start-1); pDataArray->m->gobble(inFASTA); 
164                 }
165                 
166                 pDataArray->count = pDataArray->end;
167                 
168                 if (pDataArray->m->control_pressed) { out.close(); out2.close(); if (pDataArray->trim) { out3.close(); } inFASTA.close(); delete chimera;  return 0;    }
169                 
170                 if (chimera->getUnaligned()) { 
171                         pDataArray->m->mothurOut("Your template sequences are different lengths, please correct."); pDataArray->m->mothurOutEndLine(); 
172                         out.close(); out2.close(); if (pDataArray->trim) { out3.close(); } inFASTA.close();
173                         delete chimera;
174                         return 0; 
175                 }
176                 int templateSeqsLength = chimera->getLength();
177                 
178                 if (pDataArray->start == 0) { chimera->printHeader(out); }
179                 
180                 int count = 0;
181                 for(int i = 0; i < pDataArray->end; i++){
182                         
183                         if (pDataArray->m->control_pressed) {   out.close(); out2.close(); if (pDataArray->trim) { out3.close(); } inFASTA.close(); delete chimera; return 1;   }
184                         
185                         Sequence* candidateSeq = new Sequence(inFASTA);  pDataArray->m->gobble(inFASTA);
186                         string candidateAligned = candidateSeq->getAligned();
187                         
188                         if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file
189                                 if (candidateSeq->getAligned().length() != templateSeqsLength) {  
190                                         pDataArray->m->mothurOut(candidateSeq->getName() + " is not the same length as the template sequences. Skipping."); pDataArray->m->mothurOutEndLine();
191                                 }else{
192                                         //find chimeras
193                                         chimera->getChimeras(candidateSeq);
194                                         
195                                         if (pDataArray->m->control_pressed) {   delete candidateSeq; delete chimera; return 1;  }
196                                         
197                                         //if you are not chimeric, then check each half
198                                         data_results wholeResults = chimera->getResults();
199                                         
200                                         //determine if we need to split
201                                         bool isChimeric = false;
202                                         
203                                         if (wholeResults.flag == "yes") {
204                                                 string chimeraFlag = "no";
205                                                 if(  (wholeResults.results[0].bsa >= pDataArray->minBS && wholeResults.results[0].divr_qla_qrb >= pDataArray->divR)
206                                                    ||
207                                                    (wholeResults.results[0].bsb >= pDataArray->minBS && wholeResults.results[0].divr_qlb_qra >= pDataArray->divR) ) { chimeraFlag = "yes"; }
208                                                 
209                                                 
210                                                 if (chimeraFlag == "yes") {     
211                                                         if ((wholeResults.results[0].bsa >= pDataArray->minBS) || (wholeResults.results[0].bsb >= pDataArray->minBS)) { isChimeric = true; }
212                                                 }
213                                         }
214                                         
215                                         if ((!isChimeric) && pDataArray->trimera) {
216                                                 
217                                                 //split sequence in half by bases
218                                                 string leftQuery, rightQuery;
219                                                 Sequence tempSeq(candidateSeq->getName(), candidateAligned);
220                                                 //divideInHalf(tempSeq, leftQuery, rightQuery);
221                                                 string queryUnAligned = tempSeq.getUnaligned();
222                                                 int numBases = int(queryUnAligned.length() * 0.5);
223                                                 
224                                                 string queryAligned = tempSeq.getAligned();
225                                                 leftQuery = tempSeq.getAligned();
226                                                 rightQuery = tempSeq.getAligned();
227                                                 
228                                                 int baseCount = 0;
229                                                 int leftSpot = 0;
230                                                 for (int i = 0; i < queryAligned.length(); i++) {
231                                                         //if you are a base
232                                                         if (isalpha(queryAligned[i])) {         
233                                                                 baseCount++; 
234                                                         }
235                                                         
236                                                         //if you have half
237                                                         if (baseCount >= numBases) {  leftSpot = i; break; } //first half
238                                                 }
239                                                 
240                                                 //blank out right side
241                                                 for (int i = leftSpot; i < leftQuery.length(); i++) { leftQuery[i] = '.'; }
242                                                 
243                                                 //blank out left side
244                                                 for (int i = 0; i < leftSpot; i++) { rightQuery[i] = '.'; }
245                                                 
246                                                 //run chimeraSlayer on each piece
247                                                 Sequence* left = new Sequence(candidateSeq->getName(), leftQuery);
248                                                 Sequence* right = new Sequence(candidateSeq->getName(), rightQuery);
249                                                 
250                                                 //find chimeras
251                                                 chimera->getChimeras(left);
252                                                 data_results leftResults = chimera->getResults();
253                                                 
254                                                 chimera->getChimeras(right);
255                                                 data_results rightResults = chimera->getResults();
256                                                 
257                                                 //if either piece is chimeric then report
258                                                 Sequence trimmed = chimera->print(out, out2, leftResults, rightResults);
259                                                 if (pDataArray->trim) { trimmed.printSequence(out3);  }
260                                                 
261                                                 delete left; delete right;
262                                                 
263                                         }else { //already chimeric
264                                                 //print results
265                                                 Sequence trimmed = chimera->print(out, out2);
266                                                 if (pDataArray->trim) { trimmed.printSequence(out3);  }
267                                         }
268                                         
269                                         
270                                 }
271                                 count++;
272                         }
273                         
274                         delete candidateSeq;
275                         //report progress
276                         if((count) % 100 == 0){ pDataArray->m->mothurOut("Processing sequence: " + toString(count)); pDataArray->m->mothurOutEndLine();         }
277                 }
278                 //report progress
279                 if((count) % 100 != 0){ pDataArray->m->mothurOut("Processing sequence: " + toString(count)); pDataArray->m->mothurOutEndLine();         }
280                 
281                 pDataArray->numNoParents = chimera->getNumNoParents();
282                 out.close();
283                 out2.close();
284                 if (pDataArray->trim) { out3.close(); }
285                 inFASTA.close();
286                 delete chimera;
287                 
288                 return 0;
289                 
290         }
291         catch(exception& e) {
292                 pDataArray->m->errorOut(e, "ChimeraSlayerCommand", "MySlayerThreadFunction");
293                 exit(1);
294         }
295
296 #endif
297
298 /**************************************************************************************************/
299
300
301 #endif
302
303