1 #ifndef CHIMERASLAYERCOMMAND_H
2 #define CHIMERASLAYERCOMMAND_H
5 * chimeraslayercommand.h
8 * Created by westcott on 3/31/10.
9 * Copyright 2010 Schloss Lab. All rights reserved.
14 #include "command.hpp"
16 #include "chimeraslayer.h"
18 /***********************************************************/
20 class ChimeraSlayerCommand : public Command {
22 ChimeraSlayerCommand(string);
23 ChimeraSlayerCommand();
24 ~ChimeraSlayerCommand() {}
26 vector<string> setParameters();
27 string getCommandName() { return "chimera.slayer"; }
28 string getCommandCategory() { return "Sequence Processing"; }
29 string getHelpString();
30 string getCitation() { return "Haas BJ, Gevers D, Earl A, Feldgarden M, Ward DV, Giannokous G, Ciulla D, Tabbaa D, Highlander SK, Sodergren E, Methe B, Desantis TZ, Petrosino JF, Knight R, Birren BW (2011). Chimeric 16S rRNA sequence formation and detection in Sanger and 454-pyrosequenced PCR amplicons. Genome Res. \nhttp://www.mothur.org/wiki/Chimera.slayer"; }
31 string getDescription() { return "detect chimeric sequences"; }
34 void help() { m->mothurOut(getHelpString()); }
39 unsigned long long start;
40 unsigned long long end;
41 linePair(unsigned long long i, unsigned long long j) : start(i), end(j) {}
44 vector<int> processIDS; //processid
45 vector<linePair*> lines;
46 map<string, int> priority;
48 int driver(linePair*, string, string, string, string);
49 int createProcesses(string, string, string, string);
50 int divideInHalf(Sequence, string&, string&);
51 map<string, int> sortFastaFile(string, string);
54 int driverMPI(int, int, MPI_File&, MPI_File&, MPI_File&, MPI_File&, vector<unsigned long long>&);
57 bool abort, realign, trim, trimera, save;
58 string fastafile, templatefile, outputDir, search, namefile, blastlocation;
59 int processors, window, iters, increment, numwanted, ksize, match, mismatch, parents, minSimilarity, minCoverage, minBS, minSNP, numSeqs, templateSeqsLength;
63 vector<string> outputNames;
64 vector<string> fastaFileNames;
65 vector<string> nameFileNames;
69 /***********************************************************/
71 //custom data structure for threads to use.
72 // This is passed by void pointer so it can be any data type
73 // that can be passed using a single void pointer (LPVOID).
74 typedef struct slayerData {
84 unsigned long long start;
85 unsigned long long end;
86 int ksize, match, mismatch, window, minSimilarity, minCoverage, minBS, minSNP, parents, iters, increment, numwanted;
89 map<string, int> priority;
95 slayerData(string o, string fa, string ac, string f, string te, string se, string bl, bool tri, bool trm, bool re, MothurOut* mout, unsigned long long st, unsigned long long en, int ks, int ma, int mis, int win, int minS, int minC, int miBS, int minSN, int par, int it, int inc, int numw, float div, map<string, int> prior, int tid) {
113 minSimilarity = minS;
129 /**************************************************************************************************/
130 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
132 static DWORD WINAPI MySlayerThreadFunction(LPVOID lpParam){
133 slayerData* pDataArray;
134 pDataArray = (slayerData*)lpParam;
138 pDataArray->m->openOutputFile(pDataArray->outputFName, out);
141 pDataArray->m->openOutputFile(pDataArray->accnos, out2);
144 if (pDataArray->trim) { pDataArray->m->openOutputFile(pDataArray->fasta, out3); }
147 pDataArray->m->openInputFile(pDataArray->filename, inFASTA);
152 if (pDataArray->templatefile != "self") { //you want to run slayer with a reference template
153 chimera = new ChimeraSlayer(pDataArray->filename, pDataArray->templatefile, pDataArray->trim, pDataArray->search, pDataArray->ksize, pDataArray->match, pDataArray->mismatch, pDataArray->window, pDataArray->divR, pDataArray->minSimilarity, pDataArray->minCoverage, pDataArray->minBS, pDataArray->minSNP, pDataArray->parents, pDataArray->iters, pDataArray->increment, pDataArray->numwanted, pDataArray->realign, pDataArray->blastlocation, pDataArray->threadId);
155 chimera = new ChimeraSlayer(pDataArray->filename, pDataArray->templatefile, pDataArray->trim, pDataArray->priority, pDataArray->search, pDataArray->ksize, pDataArray->match, pDataArray->mismatch, pDataArray->window, pDataArray->divR, pDataArray->minSimilarity, pDataArray->minCoverage, pDataArray->minBS, pDataArray->minSNP, pDataArray->parents, pDataArray->iters, pDataArray->increment, pDataArray->numwanted, pDataArray->realign, pDataArray->blastlocation, pDataArray->threadId);
158 //print header if you are process 0
159 if ((pDataArray->start == 0) || (pDataArray->start == 1)) {
160 chimera->printHeader(out);
162 }else { //this accounts for the difference in line endings.
163 inFASTA.seekg(pDataArray->start-1); pDataArray->m->gobble(inFASTA);
166 pDataArray->count = pDataArray->end;
168 if (pDataArray->m->control_pressed) { out.close(); out2.close(); if (pDataArray->trim) { out3.close(); } inFASTA.close(); delete chimera; return 0; }
170 if (chimera->getUnaligned()) {
171 pDataArray->m->mothurOut("Your template sequences are different lengths, please correct."); pDataArray->m->mothurOutEndLine();
172 out.close(); out2.close(); if (pDataArray->trim) { out3.close(); } inFASTA.close();
176 int templateSeqsLength = chimera->getLength();
178 if (pDataArray->start == 0) { chimera->printHeader(out); }
181 for(int i = 0; i < pDataArray->end; i++){
183 if (pDataArray->m->control_pressed) { out.close(); out2.close(); if (pDataArray->trim) { out3.close(); } inFASTA.close(); delete chimera; return 1; }
185 Sequence* candidateSeq = new Sequence(inFASTA); pDataArray->m->gobble(inFASTA);
186 string candidateAligned = candidateSeq->getAligned();
188 if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file
189 if (candidateSeq->getAligned().length() != templateSeqsLength) {
190 pDataArray->m->mothurOut(candidateSeq->getName() + " is not the same length as the template sequences. Skipping."); pDataArray->m->mothurOutEndLine();
193 chimera->getChimeras(candidateSeq);
195 if (pDataArray->m->control_pressed) { delete candidateSeq; delete chimera; return 1; }
197 //if you are not chimeric, then check each half
198 data_results wholeResults = chimera->getResults();
200 //determine if we need to split
201 bool isChimeric = false;
203 if (wholeResults.flag == "yes") {
204 string chimeraFlag = "no";
205 if( (wholeResults.results[0].bsa >= pDataArray->minBS && wholeResults.results[0].divr_qla_qrb >= pDataArray->divR)
207 (wholeResults.results[0].bsb >= pDataArray->minBS && wholeResults.results[0].divr_qlb_qra >= pDataArray->divR) ) { chimeraFlag = "yes"; }
210 if (chimeraFlag == "yes") {
211 if ((wholeResults.results[0].bsa >= pDataArray->minBS) || (wholeResults.results[0].bsb >= pDataArray->minBS)) { isChimeric = true; }
215 if ((!isChimeric) && pDataArray->trimera) {
217 //split sequence in half by bases
218 string leftQuery, rightQuery;
219 Sequence tempSeq(candidateSeq->getName(), candidateAligned);
220 //divideInHalf(tempSeq, leftQuery, rightQuery);
221 string queryUnAligned = tempSeq.getUnaligned();
222 int numBases = int(queryUnAligned.length() * 0.5);
224 string queryAligned = tempSeq.getAligned();
225 leftQuery = tempSeq.getAligned();
226 rightQuery = tempSeq.getAligned();
230 for (int i = 0; i < queryAligned.length(); i++) {
232 if (isalpha(queryAligned[i])) {
237 if (baseCount >= numBases) { leftSpot = i; break; } //first half
240 //blank out right side
241 for (int i = leftSpot; i < leftQuery.length(); i++) { leftQuery[i] = '.'; }
243 //blank out left side
244 for (int i = 0; i < leftSpot; i++) { rightQuery[i] = '.'; }
246 //run chimeraSlayer on each piece
247 Sequence* left = new Sequence(candidateSeq->getName(), leftQuery);
248 Sequence* right = new Sequence(candidateSeq->getName(), rightQuery);
251 chimera->getChimeras(left);
252 data_results leftResults = chimera->getResults();
254 chimera->getChimeras(right);
255 data_results rightResults = chimera->getResults();
257 //if either piece is chimeric then report
258 Sequence trimmed = chimera->print(out, out2, leftResults, rightResults);
259 if (pDataArray->trim) { trimmed.printSequence(out3); }
261 delete left; delete right;
263 }else { //already chimeric
265 Sequence trimmed = chimera->print(out, out2);
266 if (pDataArray->trim) { trimmed.printSequence(out3); }
276 if((count) % 100 == 0){ pDataArray->m->mothurOut("Processing sequence: " + toString(count)); pDataArray->m->mothurOutEndLine(); }
279 if((count) % 100 != 0){ pDataArray->m->mothurOut("Processing sequence: " + toString(count)); pDataArray->m->mothurOutEndLine(); }
281 pDataArray->numNoParents = chimera->getNumNoParents();
284 if (pDataArray->trim) { out3.close(); }
291 catch(exception& e) {
292 pDataArray->m->errorOut(e, "ChimeraSlayerCommand", "MySlayerThreadFunction");
298 /**************************************************************************************************/