1 #ifndef CHIMERASLAYERCOMMAND_H
2 #define CHIMERASLAYERCOMMAND_H
5 * chimeraslayercommand.h
8 * Created by westcott on 3/31/10.
9 * Copyright 2010 Schloss Lab. All rights reserved.
14 #include "command.hpp"
16 #include "chimeraslayer.h"
17 #include "sequenceparser.h"
19 /***********************************************************/
21 class ChimeraSlayerCommand : public Command {
23 ChimeraSlayerCommand(string);
24 ChimeraSlayerCommand();
25 ~ChimeraSlayerCommand() {}
27 vector<string> setParameters();
28 string getCommandName() { return "chimera.slayer"; }
29 string getCommandCategory() { return "Sequence Processing"; }
30 string getHelpString();
31 string getCitation() { return "Haas BJ, Gevers D, Earl A, Feldgarden M, Ward DV, Giannokous G, Ciulla D, Tabbaa D, Highlander SK, Sodergren E, Methe B, Desantis TZ, Petrosino JF, Knight R, Birren BW (2011). Chimeric 16S rRNA sequence formation and detection in Sanger and 454-pyrosequenced PCR amplicons. Genome Res. \nhttp://www.mothur.org/wiki/Chimera.slayer"; }
32 string getDescription() { return "detect chimeric sequences"; }
35 void help() { m->mothurOut(getHelpString()); }
40 unsigned long long start;
41 unsigned long long end;
42 linePair(unsigned long long i, unsigned long long j) : start(i), end(j) {}
45 vector<int> processIDS; //processid
46 vector<linePair*> lines;
48 int driver(linePair*, string, string, string, string);
49 int createProcesses(string, string, string, string);
50 int divideInHalf(Sequence, string&, string&);
51 map<string, int> sortFastaFile(string, string);
52 map<string, int> sortFastaFile(vector<Sequence>&, map<string, string>&, string newFile);
53 string getNamesFile(string&);
54 int setupChimera(string, map<string, int>&);
55 int MPIExecute(string, string, string, string);
56 int deconvoluteResults(SequenceParser*, string, string, string);
57 map<string, int> priority;
61 int driverMPI(int, int, MPI_File&, MPI_File&, MPI_File&, MPI_File&, vector<unsigned long long>&);
64 bool abort, realign, trim, trimera, save;
65 string fastafile, groupfile, templatefile, outputDir, search, namefile, blastlocation;
66 int processors, window, iters, increment, numwanted, ksize, match, mismatch, parents, minSimilarity, minCoverage, minBS, minSNP, numSeqs, templateSeqsLength;
70 vector<string> outputNames;
71 vector<string> fastaFileNames;
72 vector<string> nameFileNames;
73 vector<string> groupFileNames;
77 /***********************************************************/
79 //custom data structure for threads to use.
80 // This is passed by void pointer so it can be any data type
81 // that can be passed using a single void pointer (LPVOID).
92 unsigned long long start;
93 unsigned long long end;
94 int ksize, match, mismatch, window, minSimilarity, minCoverage, minBS, minSNP, parents, iters, increment, numwanted;
97 map<string, int> priority;
103 slayerData(string o, string fa, string ac, string f, string te, string se, string bl, bool tri, bool trm, bool re, MothurOut* mout, unsigned long long st, unsigned long long en, int ks, int ma, int mis, int win, int minS, int minC, int miBS, int minSN, int par, int it, int inc, int numw, float div, map<string, int> prior, int tid) {
121 minSimilarity = minS;
137 /**************************************************************************************************/
138 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
140 static DWORD WINAPI MySlayerThreadFunction(LPVOID lpParam){
141 slayerData* pDataArray;
142 pDataArray = (slayerData*)lpParam;
146 pDataArray->m->openOutputFile(pDataArray->outputFName, out);
149 pDataArray->m->openOutputFile(pDataArray->accnos, out2);
152 if (pDataArray->trim) { pDataArray->m->openOutputFile(pDataArray->fasta, out3); }
155 pDataArray->m->openInputFile(pDataArray->filename, inFASTA);
160 if (pDataArray->templatefile != "self") { //you want to run slayer with a reference template
161 chimera = new ChimeraSlayer(pDataArray->filename, pDataArray->templatefile, pDataArray->trim, pDataArray->search, pDataArray->ksize, pDataArray->match, pDataArray->mismatch, pDataArray->window, pDataArray->divR, pDataArray->minSimilarity, pDataArray->minCoverage, pDataArray->minBS, pDataArray->minSNP, pDataArray->parents, pDataArray->iters, pDataArray->increment, pDataArray->numwanted, pDataArray->realign, pDataArray->blastlocation, pDataArray->threadId);
163 chimera = new ChimeraSlayer(pDataArray->filename, pDataArray->templatefile, pDataArray->trim, pDataArray->priority, pDataArray->search, pDataArray->ksize, pDataArray->match, pDataArray->mismatch, pDataArray->window, pDataArray->divR, pDataArray->minSimilarity, pDataArray->minCoverage, pDataArray->minBS, pDataArray->minSNP, pDataArray->parents, pDataArray->iters, pDataArray->increment, pDataArray->numwanted, pDataArray->realign, pDataArray->blastlocation, pDataArray->threadId);
166 //print header if you are process 0
167 if ((pDataArray->start == 0) || (pDataArray->start == 1)) {
168 chimera->printHeader(out);
170 }else { //this accounts for the difference in line endings.
171 inFASTA.seekg(pDataArray->start-1); pDataArray->m->gobble(inFASTA);
174 pDataArray->count = pDataArray->end;
176 if (pDataArray->m->control_pressed) { out.close(); out2.close(); if (pDataArray->trim) { out3.close(); } inFASTA.close(); delete chimera; return 0; }
178 if (chimera->getUnaligned()) {
179 pDataArray->m->mothurOut("Your template sequences are different lengths, please correct."); pDataArray->m->mothurOutEndLine();
180 out.close(); out2.close(); if (pDataArray->trim) { out3.close(); } inFASTA.close();
184 int templateSeqsLength = chimera->getLength();
186 if (pDataArray->start == 0) { chimera->printHeader(out); }
189 for(int i = 0; i < pDataArray->end; i++){
191 if (pDataArray->m->control_pressed) { out.close(); out2.close(); if (pDataArray->trim) { out3.close(); } inFASTA.close(); delete chimera; return 1; }
193 Sequence* candidateSeq = new Sequence(inFASTA); pDataArray->m->gobble(inFASTA);
194 string candidateAligned = candidateSeq->getAligned();
196 if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file
197 if (candidateSeq->getAligned().length() != templateSeqsLength) {
198 pDataArray->m->mothurOut(candidateSeq->getName() + " is not the same length as the template sequences. Skipping."); pDataArray->m->mothurOutEndLine();
201 chimera->getChimeras(candidateSeq);
203 if (pDataArray->m->control_pressed) { delete candidateSeq; delete chimera; return 1; }
205 //if you are not chimeric, then check each half
206 data_results wholeResults = chimera->getResults();
208 //determine if we need to split
209 bool isChimeric = false;
211 if (wholeResults.flag == "yes") {
212 string chimeraFlag = "no";
213 if( (wholeResults.results[0].bsa >= pDataArray->minBS && wholeResults.results[0].divr_qla_qrb >= pDataArray->divR)
215 (wholeResults.results[0].bsb >= pDataArray->minBS && wholeResults.results[0].divr_qlb_qra >= pDataArray->divR) ) { chimeraFlag = "yes"; }
218 if (chimeraFlag == "yes") {
219 if ((wholeResults.results[0].bsa >= pDataArray->minBS) || (wholeResults.results[0].bsb >= pDataArray->minBS)) { isChimeric = true; }
223 if ((!isChimeric) && pDataArray->trimera) {
225 //split sequence in half by bases
226 string leftQuery, rightQuery;
227 Sequence tempSeq(candidateSeq->getName(), candidateAligned);
228 //divideInHalf(tempSeq, leftQuery, rightQuery);
229 string queryUnAligned = tempSeq.getUnaligned();
230 int numBases = int(queryUnAligned.length() * 0.5);
232 string queryAligned = tempSeq.getAligned();
233 leftQuery = tempSeq.getAligned();
234 rightQuery = tempSeq.getAligned();
238 for (int i = 0; i < queryAligned.length(); i++) {
240 if (isalpha(queryAligned[i])) {
245 if (baseCount >= numBases) { leftSpot = i; break; } //first half
248 //blank out right side
249 for (int i = leftSpot; i < leftQuery.length(); i++) { leftQuery[i] = '.'; }
251 //blank out left side
252 for (int i = 0; i < leftSpot; i++) { rightQuery[i] = '.'; }
254 //run chimeraSlayer on each piece
255 Sequence* left = new Sequence(candidateSeq->getName(), leftQuery);
256 Sequence* right = new Sequence(candidateSeq->getName(), rightQuery);
259 chimera->getChimeras(left);
260 data_results leftResults = chimera->getResults();
262 chimera->getChimeras(right);
263 data_results rightResults = chimera->getResults();
265 //if either piece is chimeric then report
266 Sequence trimmed = chimera->print(out, out2, leftResults, rightResults);
267 if (pDataArray->trim) { trimmed.printSequence(out3); }
269 delete left; delete right;
271 }else { //already chimeric
273 Sequence trimmed = chimera->print(out, out2);
274 if (pDataArray->trim) { trimmed.printSequence(out3); }
284 if((count) % 100 == 0){ pDataArray->m->mothurOut("Processing sequence: " + toString(count)); pDataArray->m->mothurOutEndLine(); }
287 if((count) % 100 != 0){ pDataArray->m->mothurOut("Processing sequence: " + toString(count)); pDataArray->m->mothurOutEndLine(); }
289 pDataArray->numNoParents = chimera->getNumNoParents();
292 if (pDataArray->trim) { out3.close(); }
299 catch(exception& e) {
300 pDataArray->m->errorOut(e, "ChimeraSlayerCommand", "MySlayerThreadFunction");
306 /**************************************************************************************************/