]> git.donarmstrong.com Git - mothur.git/blob - chimeraslayercommand.h
paralellized chimera.uchime for windows for both by group and with a template.
[mothur.git] / chimeraslayercommand.h
1 #ifndef CHIMERASLAYERCOMMAND_H
2 #define CHIMERASLAYERCOMMAND_H
3
4 /*
5  *  chimeraslayercommand.h
6  *  Mothur
7  *
8  *  Created by westcott on 3/31/10.
9  *  Copyright 2010 Schloss Lab. All rights reserved.
10  *
11  */
12
13 #include "mothur.h"
14 #include "command.hpp"
15 #include "chimera.h"
16 #include "chimeraslayer.h"
17 #include "sequenceparser.h"
18
19 /***********************************************************/
20
21 class ChimeraSlayerCommand : public Command {
22 public:
23         ChimeraSlayerCommand(string);
24         ChimeraSlayerCommand();
25         ~ChimeraSlayerCommand() {}
26         
27         vector<string> setParameters();
28         string getCommandName()                 { return "chimera.slayer";              }
29         string getCommandCategory()             { return "Sequence Processing"; }
30         string getHelpString(); 
31         string getCitation() { return "Haas BJ, Gevers D, Earl A, Feldgarden M, Ward DV, Giannokous G, Ciulla D, Tabbaa D, Highlander SK, Sodergren E, Methe B, Desantis TZ, Petrosino JF, Knight R, Birren BW (2011). Chimeric 16S rRNA sequence formation and detection in Sanger and 454-pyrosequenced PCR amplicons. Genome Res. \nhttp://www.mothur.org/wiki/Chimera.slayer"; }
32         string getDescription()         { return "detect chimeric sequences"; }
33         
34         int execute(); 
35         void help() { m->mothurOut(getHelpString()); }          
36         
37 private:
38
39         struct linePair {
40                 unsigned long long start;
41                 unsigned long long end;
42                 linePair(unsigned long long i, unsigned long long j) : start(i), end(j) {}
43         };
44
45         vector<int> processIDS;   //processid
46         vector<linePair*> lines;
47         
48         int driver(linePair*, string, string, string, string);
49         int createProcesses(string, string, string, string);
50         int divideInHalf(Sequence, string&, string&);
51         map<string, int> sortFastaFile(string, string);
52         map<string, int> sortFastaFile(vector<Sequence>&, map<string, string>&, string newFile);
53         string getNamesFile(string&);
54         int setupChimera(string, map<string, int>&);
55         int MPIExecute(string, string, string, string);
56         int deconvoluteResults(SequenceParser*, string, string, string);
57         map<string, int> priority;
58
59                 
60         #ifdef USE_MPI
61         int driverMPI(int, int, MPI_File&, MPI_File&, MPI_File&, MPI_File&, vector<unsigned long long>&);
62         #endif
63
64         bool abort, realign, trim, trimera, save;
65         string fastafile, groupfile, templatefile, outputDir, search, namefile, blastlocation;
66         int processors, window, iters, increment, numwanted, ksize, match, mismatch, parents, minSimilarity, minCoverage, minBS, minSNP, numSeqs, templateSeqsLength;
67         float divR;
68         Chimera* chimera;
69         
70         vector<string> outputNames;
71         vector<string> fastaFileNames;
72         vector<string> nameFileNames;
73         vector<string> groupFileNames;
74         
75 };
76
77 /***********************************************************/
78
79 //custom data structure for threads to use.
80 // This is passed by void pointer so it can be any data type
81 // that can be passed using a single void pointer (LPVOID).
82 struct slayerData {
83         string outputFName; 
84         string fasta; 
85         string accnos;
86         string filename;
87         string templatefile;
88         string search;
89         string blastlocation;
90         bool trimera;
91         bool trim, realign;
92         unsigned long long start;
93         unsigned long long end;
94         int ksize, match, mismatch, window, minSimilarity, minCoverage, minBS, minSNP, parents, iters, increment, numwanted;
95         MothurOut* m;
96         float divR;
97         map<string, int> priority;
98         int count;
99         int numNoParents;
100         int threadId;
101         
102         slayerData(){}
103         slayerData(string o, string fa, string ac, string f, string te, string se, string bl, bool tri, bool trm, bool re, MothurOut* mout, unsigned long long st, unsigned long long en, int ks, int ma, int mis, int win, int minS, int minC, int miBS, int minSN, int par, int it, int inc, int numw, float div, map<string, int> prior, int tid) {
104                 outputFName = o;
105                 fasta = fa;
106                 accnos = ac;
107                 filename = f;
108                 templatefile = te;
109                 search = se;
110                 blastlocation = bl;
111                 trimera = tri;
112                 trim = trm;
113                 realign = re;
114                 m = mout;
115                 start = st;
116                 end = en;
117                 ksize = ks;
118                 match = ma; 
119                 mismatch = mis;
120                 window = win;
121                 minSimilarity = minS;
122                 minCoverage = minC;
123                 minBS = miBS;
124                 minSNP = minSN;
125                 parents = par;
126                 iters = it;
127                 increment = inc;
128                 numwanted = numw;
129                 divR = div;
130                 priority = prior;
131                 threadId = tid;
132                 count = 0;
133                 numNoParents = 0;
134         }
135 };
136
137 /**************************************************************************************************/
138 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
139 #else
140 static DWORD WINAPI MySlayerThreadFunction(LPVOID lpParam){ 
141         slayerData* pDataArray;
142         pDataArray = (slayerData*)lpParam;
143         
144         try {
145                 ofstream out;
146                 pDataArray->m->openOutputFile(pDataArray->outputFName, out);
147                 
148                 ofstream out2;
149                 pDataArray->m->openOutputFile(pDataArray->accnos, out2);
150                 
151                 ofstream out3;
152                 if (pDataArray->trim) {  pDataArray->m->openOutputFile(pDataArray->fasta, out3); }
153                 
154                 ifstream inFASTA;
155                 pDataArray->m->openInputFile(pDataArray->filename, inFASTA);
156                 
157                 
158                 
159                 Chimera* chimera;
160                 if (pDataArray->templatefile != "self") { //you want to run slayer with a reference template
161                         chimera = new ChimeraSlayer(pDataArray->filename, pDataArray->templatefile, pDataArray->trim, pDataArray->search, pDataArray->ksize, pDataArray->match, pDataArray->mismatch, pDataArray->window, pDataArray->divR, pDataArray->minSimilarity, pDataArray->minCoverage, pDataArray->minBS, pDataArray->minSNP, pDataArray->parents, pDataArray->iters, pDataArray->increment, pDataArray->numwanted, pDataArray->realign, pDataArray->blastlocation, pDataArray->threadId);     
162                 }else {
163                         chimera = new ChimeraSlayer(pDataArray->filename, pDataArray->templatefile, pDataArray->trim, pDataArray->priority, pDataArray->search, pDataArray->ksize, pDataArray->match, pDataArray->mismatch, pDataArray->window, pDataArray->divR, pDataArray->minSimilarity, pDataArray->minCoverage, pDataArray->minBS, pDataArray->minSNP, pDataArray->parents, pDataArray->iters, pDataArray->increment, pDataArray->numwanted, pDataArray->realign, pDataArray->blastlocation, pDataArray->threadId);       
164                 }
165                 
166                 //print header if you are process 0
167                 if ((pDataArray->start == 0) || (pDataArray->start == 1)) {
168                         chimera->printHeader(out); 
169                         inFASTA.seekg(0);
170                 }else { //this accounts for the difference in line endings. 
171                         inFASTA.seekg(pDataArray->start-1); pDataArray->m->gobble(inFASTA); 
172                 }
173                 
174                 pDataArray->count = pDataArray->end;
175                 
176                 if (pDataArray->m->control_pressed) { out.close(); out2.close(); if (pDataArray->trim) { out3.close(); } inFASTA.close(); delete chimera;  return 0;    }
177                 
178                 if (chimera->getUnaligned()) { 
179                         pDataArray->m->mothurOut("Your template sequences are different lengths, please correct."); pDataArray->m->mothurOutEndLine(); 
180                         out.close(); out2.close(); if (pDataArray->trim) { out3.close(); } inFASTA.close();
181                         delete chimera;
182                         return 0; 
183                 }
184                 int templateSeqsLength = chimera->getLength();
185                 
186                 if (pDataArray->start == 0) { chimera->printHeader(out); }
187                 
188                 int count = 0;
189                 for(int i = 0; i < pDataArray->end; i++){
190                         
191                         if (pDataArray->m->control_pressed) {   out.close(); out2.close(); if (pDataArray->trim) { out3.close(); } inFASTA.close(); delete chimera; return 1;   }
192                         
193                         Sequence* candidateSeq = new Sequence(inFASTA);  pDataArray->m->gobble(inFASTA);
194                         string candidateAligned = candidateSeq->getAligned();
195                         
196                         if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file
197                                 if (candidateSeq->getAligned().length() != templateSeqsLength) {  
198                                         pDataArray->m->mothurOut(candidateSeq->getName() + " is not the same length as the template sequences. Skipping."); pDataArray->m->mothurOutEndLine();
199                                 }else{
200                                         //find chimeras
201                                         chimera->getChimeras(candidateSeq);
202                                         
203                                         if (pDataArray->m->control_pressed) {   delete candidateSeq; delete chimera; return 1;  }
204                                         
205                                         //if you are not chimeric, then check each half
206                                         data_results wholeResults = chimera->getResults();
207                                         
208                                         //determine if we need to split
209                                         bool isChimeric = false;
210                                         
211                                         if (wholeResults.flag == "yes") {
212                                                 string chimeraFlag = "no";
213                                                 if(  (wholeResults.results[0].bsa >= pDataArray->minBS && wholeResults.results[0].divr_qla_qrb >= pDataArray->divR)
214                                                    ||
215                                                    (wholeResults.results[0].bsb >= pDataArray->minBS && wholeResults.results[0].divr_qlb_qra >= pDataArray->divR) ) { chimeraFlag = "yes"; }
216                                                 
217                                                 
218                                                 if (chimeraFlag == "yes") {     
219                                                         if ((wholeResults.results[0].bsa >= pDataArray->minBS) || (wholeResults.results[0].bsb >= pDataArray->minBS)) { isChimeric = true; }
220                                                 }
221                                         }
222                                         
223                                         if ((!isChimeric) && pDataArray->trimera) {
224                                                 
225                                                 //split sequence in half by bases
226                                                 string leftQuery, rightQuery;
227                                                 Sequence tempSeq(candidateSeq->getName(), candidateAligned);
228                                                 //divideInHalf(tempSeq, leftQuery, rightQuery);
229                                                 string queryUnAligned = tempSeq.getUnaligned();
230                                                 int numBases = int(queryUnAligned.length() * 0.5);
231                                                 
232                                                 string queryAligned = tempSeq.getAligned();
233                                                 leftQuery = tempSeq.getAligned();
234                                                 rightQuery = tempSeq.getAligned();
235                                                 
236                                                 int baseCount = 0;
237                                                 int leftSpot = 0;
238                                                 for (int i = 0; i < queryAligned.length(); i++) {
239                                                         //if you are a base
240                                                         if (isalpha(queryAligned[i])) {         
241                                                                 baseCount++; 
242                                                         }
243                                                         
244                                                         //if you have half
245                                                         if (baseCount >= numBases) {  leftSpot = i; break; } //first half
246                                                 }
247                                                 
248                                                 //blank out right side
249                                                 for (int i = leftSpot; i < leftQuery.length(); i++) { leftQuery[i] = '.'; }
250                                                 
251                                                 //blank out left side
252                                                 for (int i = 0; i < leftSpot; i++) { rightQuery[i] = '.'; }
253                                                 
254                                                 //run chimeraSlayer on each piece
255                                                 Sequence* left = new Sequence(candidateSeq->getName(), leftQuery);
256                                                 Sequence* right = new Sequence(candidateSeq->getName(), rightQuery);
257                                                 
258                                                 //find chimeras
259                                                 chimera->getChimeras(left);
260                                                 data_results leftResults = chimera->getResults();
261                                                 
262                                                 chimera->getChimeras(right);
263                                                 data_results rightResults = chimera->getResults();
264                                                 
265                                                 //if either piece is chimeric then report
266                                                 Sequence trimmed = chimera->print(out, out2, leftResults, rightResults);
267                                                 if (pDataArray->trim) { trimmed.printSequence(out3);  }
268                                                 
269                                                 delete left; delete right;
270                                                 
271                                         }else { //already chimeric
272                                                 //print results
273                                                 Sequence trimmed = chimera->print(out, out2);
274                                                 if (pDataArray->trim) { trimmed.printSequence(out3);  }
275                                         }
276                                         
277                                         
278                                 }
279                                 count++;
280                         }
281                         
282                         delete candidateSeq;
283                         //report progress
284                         if((count) % 100 == 0){ pDataArray->m->mothurOut("Processing sequence: " + toString(count)); pDataArray->m->mothurOutEndLine();         }
285                 }
286                 //report progress
287                 if((count) % 100 != 0){ pDataArray->m->mothurOut("Processing sequence: " + toString(count)); pDataArray->m->mothurOutEndLine();         }
288                 
289                 pDataArray->numNoParents = chimera->getNumNoParents();
290                 out.close();
291                 out2.close();
292                 if (pDataArray->trim) { out3.close(); }
293                 inFASTA.close();
294                 delete chimera;
295                 
296                 return 0;
297                 
298         }
299         catch(exception& e) {
300                 pDataArray->m->errorOut(e, "ChimeraSlayerCommand", "MySlayerThreadFunction");
301                 exit(1);
302         }
303
304 #endif
305
306 /**************************************************************************************************/
307
308
309 #endif
310
311