+ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, string mode, int k, int ms, int mms, int win, float div,
+int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r, string blas, int tid) : Chimera() {
+ try {
+ fastafile = file;
+ templateFileName = temp; templateSeqs = readSeqs(temp);
+ searchMethod = mode;
+ kmerSize = k;
+ match = ms;
+ misMatch = mms;
+ window = win;
+ divR = div;
+ minSim = minsim;
+ minCov = mincov;
+ minBS = minbs;
+ minSNP = minsnp;
+ parents = par;
+ iters = it;
+ increment = inc;
+ numWanted = numw;
+ realign = r;
+ trimChimera = trim;
+ numNoParents = 0;
+ blastlocation = blas;
+ threadID = tid;
+
+ doPrep();
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ChimeraSlayer", "ChimeraSlayer");
+ exit(1);
+ }
+}
+//***************************************************************************************************************
+//template=self, byGroup parameter used for mpienabled version to read the template as MPI_COMM_SELF instead of MPI_COMM_WORLD
+ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, map<string, int>& prior, string mode, int k, int ms, int mms, int win, float div,
+ int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r, string blas, int tid, bool bg) : Chimera() {
+ try {
+ byGroup = bg;
+ fastafile = file; templateSeqs = readSeqs(fastafile);
+ templateFileName = temp;
+ searchMethod = mode;
+ kmerSize = k;
+ match = ms;
+ misMatch = mms;
+ window = win;
+ divR = div;
+ minSim = minsim;
+ minCov = mincov;
+ minBS = minbs;
+ minSNP = minsnp;
+ parents = par;
+ iters = it;
+ increment = inc;
+ numWanted = numw;
+ realign = r;
+ trimChimera = trim;
+ priority = prior;
+ numNoParents = 0;
+ blastlocation = blas;
+ threadID = tid;
+
+
+ createFilter(templateSeqs, 0.0); //just removed columns where all seqs have a gap
+
+ if (searchMethod == "distance") {
+ //createFilter(templateSeqs, 0.0); //just removed columns where all seqs have a gap
+
+ //run filter on template copying templateSeqs into filteredTemplateSeqs
+ for (int i = 0; i < templateSeqs.size(); i++) {
+ if (m->control_pressed) { break; }
+
+ Sequence* newSeq = new Sequence(templateSeqs[i]->getName(), templateSeqs[i]->getAligned());
+ runFilter(newSeq);
+ filteredTemplateSeqs.push_back(newSeq);
+ }
+ }
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ChimeraSlayer", "ChimeraSlayer");
+ exit(1);
+ }
+}
+//***************************************************************************************************************
+//template=self
+ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, map<string, int>& prior, string mode, int k, int ms, int mms, int win, float div,
+ int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r, string blas, int tid) : Chimera() {
+ try {
+ fastafile = file; templateSeqs = readSeqs(fastafile);
+ templateFileName = temp;
+ searchMethod = mode;
+ kmerSize = k;
+ match = ms;
+ misMatch = mms;
+ window = win;
+ divR = div;
+ minSim = minsim;
+ minCov = mincov;
+ minBS = minbs;
+ minSNP = minsnp;
+ parents = par;
+ iters = it;
+ increment = inc;
+ numWanted = numw;
+ realign = r;
+ trimChimera = trim;
+ priority = prior;
+ numNoParents = 0;
+ blastlocation = blas;
+ threadID = tid;
+
+
+ createFilter(templateSeqs, 0.0); //just removed columns where all seqs have a gap
+
+ if (searchMethod == "distance") {
+ //createFilter(templateSeqs, 0.0); //just removed columns where all seqs have a gap
+
+ //run filter on template copying templateSeqs into filteredTemplateSeqs
+ for (int i = 0; i < templateSeqs.size(); i++) {
+ if (m->control_pressed) { break; }
+
+ Sequence* newSeq = new Sequence(templateSeqs[i]->getName(), templateSeqs[i]->getAligned());
+ runFilter(newSeq);
+ filteredTemplateSeqs.push_back(newSeq);
+ }
+ }
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ChimeraSlayer", "ChimeraSlayer");
+ exit(1);
+ }
+}
+//***************************************************************************************************************
+int ChimeraSlayer::doPrep() {
+ try {
+ if (searchMethod == "distance") {
+ //read in all query seqs
+ vector<Sequence*> tempQuerySeqs = readSeqs(fastafile);
+
+ vector<Sequence*> temp = templateSeqs;
+ for (int i = 0; i < tempQuerySeqs.size(); i++) { temp.push_back(tempQuerySeqs[i]); }
+
+ createFilter(temp, 0.0); //just removed columns where all seqs have a gap
+
+ for (int i = 0; i < tempQuerySeqs.size(); i++) { delete tempQuerySeqs[i]; }
+
+ if (m->control_pressed) { return 0; }
+
+ //run filter on template copying templateSeqs into filteredTemplateSeqs
+ for (int i = 0; i < templateSeqs.size(); i++) {
+ if (m->control_pressed) { return 0; }
+
+ Sequence* newSeq = new Sequence(templateSeqs[i]->getName(), templateSeqs[i]->getAligned());
+ runFilter(newSeq);
+ filteredTemplateSeqs.push_back(newSeq);
+ }
+ }
+ string kmerDBNameLeft;
+ string kmerDBNameRight;
+
+ //generate the kmerdb to pass to maligner
+ if (searchMethod == "kmer") {
+ string templatePath = m->hasPath(templateFileName);
+ string rightTemplateFileName = templatePath + "right." + m->getRootName(m->getSimpleName(templateFileName));
+ databaseRight = new KmerDB(rightTemplateFileName, kmerSize);
+
+ string leftTemplateFileName = templatePath + "left." + m->getRootName(m->getSimpleName(templateFileName));
+ databaseLeft = new KmerDB(leftTemplateFileName, kmerSize);
+ #ifdef USE_MPI
+ for (int i = 0; i < templateSeqs.size(); i++) {
+
+ if (m->control_pressed) { return 0; }
+
+ string leftFrag = templateSeqs[i]->getUnaligned();
+ leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
+
+ Sequence leftTemp(templateSeqs[i]->getName(), leftFrag);
+ databaseLeft->addSequence(leftTemp);
+ }
+ databaseLeft->generateDB();
+ databaseLeft->setNumSeqs(templateSeqs.size());
+
+ for (int i = 0; i < templateSeqs.size(); i++) {
+ if (m->control_pressed) { return 0; }
+
+ string rightFrag = templateSeqs[i]->getUnaligned();
+ rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
+
+ Sequence rightTemp(templateSeqs[i]->getName(), rightFrag);
+ databaseRight->addSequence(rightTemp);
+ }
+ databaseRight->generateDB();
+ databaseRight->setNumSeqs(templateSeqs.size());
+
+ #else
+ //leftside
+ kmerDBNameLeft = leftTemplateFileName.substr(0,leftTemplateFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
+ ifstream kmerFileTestLeft(kmerDBNameLeft.c_str());
+ bool needToGenerateLeft = true;
+
+ if(kmerFileTestLeft){
+ bool GoodFile = m->checkReleaseVersion(kmerFileTestLeft, m->getVersion());
+ if (GoodFile) { needToGenerateLeft = false; }
+ }
+
+ if(needToGenerateLeft){
+
+ for (int i = 0; i < templateSeqs.size(); i++) {
+
+ if (m->control_pressed) { return 0; }
+
+ string leftFrag = templateSeqs[i]->getUnaligned();
+ leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
+
+ Sequence leftTemp(templateSeqs[i]->getName(), leftFrag);
+ databaseLeft->addSequence(leftTemp);
+ }
+ databaseLeft->generateDB();
+
+ }else {
+ databaseLeft->readKmerDB(kmerFileTestLeft);
+ }
+ kmerFileTestLeft.close();
+
+ databaseLeft->setNumSeqs(templateSeqs.size());
+
+ //rightside
+ kmerDBNameRight = rightTemplateFileName.substr(0,rightTemplateFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
+ ifstream kmerFileTestRight(kmerDBNameRight.c_str());
+ bool needToGenerateRight = true;
+
+ if(kmerFileTestRight){
+ bool GoodFile = m->checkReleaseVersion(kmerFileTestRight, m->getVersion());
+ if (GoodFile) { needToGenerateRight = false; }
+ }
+
+ if(needToGenerateRight){
+
+ for (int i = 0; i < templateSeqs.size(); i++) {
+ if (m->control_pressed) { return 0; }
+
+ string rightFrag = templateSeqs[i]->getUnaligned();
+ rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
+
+ Sequence rightTemp(templateSeqs[i]->getName(), rightFrag);
+ databaseRight->addSequence(rightTemp);
+ }
+ databaseRight->generateDB();
+
+ }else {
+ databaseRight->readKmerDB(kmerFileTestRight);
+ }
+ kmerFileTestRight.close();
+
+ databaseRight->setNumSeqs(templateSeqs.size());
+ #endif
+ }else if (searchMethod == "blast") {
+
+ //generate blastdb
+ databaseLeft = new BlastDB(m->getRootName(m->getSimpleName(fastafile)), -1.0, -1.0, 1, -3, blastlocation, threadID);
+
+ if (m->control_pressed) { return 0; }
+
+ for (int i = 0; i < templateSeqs.size(); i++) { databaseLeft->addSequence(*templateSeqs[i]); }
+ databaseLeft->generateDB();
+ databaseLeft->setNumSeqs(templateSeqs.size());
+ }
+
+ return 0;
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ChimeraSlayer", "doprep");
+ exit(1);
+ }
+}
+//***************************************************************************************************************
+vector<Sequence*> ChimeraSlayer::getTemplate(Sequence q, vector<Sequence*>& userTemplateFiltered) {
+ try {
+
+ //when template=self, the query file is sorted from most abundance to least abundant
+ //userTemplate grows as the query file is processed by adding sequences that are not chimeric and more abundant
+ vector<Sequence*> userTemplate;
+
+ int myAbund = priority[q.getName()];
+
+ for (int i = 0; i < templateSeqs.size(); i++) {
+
+ if (m->control_pressed) { return userTemplate; }
+
+ //have I reached a sequence with the same abundance as myself?
+ if (!(priority[templateSeqs[i]->getName()] > myAbund)) { break; }
+
+ //if its am not chimeric add it
+ if (chimericSeqs.count(templateSeqs[i]->getName()) == 0) {
+ userTemplate.push_back(templateSeqs[i]);
+ if (searchMethod == "distance") { userTemplateFiltered.push_back(filteredTemplateSeqs[i]); }
+ }
+ }
+
+ //avoids nuisance error from formatdb for making blank blast database
+ if (userTemplate.size() == 0) {
+ return userTemplate;
+ }
+
+ string kmerDBNameLeft;
+ string kmerDBNameRight;
+
+ //generate the kmerdb to pass to maligner
+ if (searchMethod == "kmer") {
+ string templatePath = m->hasPath(templateFileName);
+ string rightTemplateFileName = templatePath + "right." + m->getRootName(m->getSimpleName(templateFileName));
+ databaseRight = new KmerDB(rightTemplateFileName, kmerSize);
+
+ string leftTemplateFileName = templatePath + "left." + m->getRootName(m->getSimpleName(templateFileName));
+ databaseLeft = new KmerDB(leftTemplateFileName, kmerSize);
+#ifdef USE_MPI
+ for (int i = 0; i < userTemplate.size(); i++) {
+
+ if (m->control_pressed) { return userTemplate; }
+
+ string leftFrag = userTemplate[i]->getUnaligned();
+ leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
+
+ Sequence leftTemp(userTemplate[i]->getName(), leftFrag);
+ databaseLeft->addSequence(leftTemp);
+ }
+ databaseLeft->generateDB();
+ databaseLeft->setNumSeqs(userTemplate.size());
+
+ for (int i = 0; i < userTemplate.size(); i++) {
+ if (m->control_pressed) { return userTemplate; }
+
+ string rightFrag = userTemplate[i]->getUnaligned();
+ rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
+
+ Sequence rightTemp(userTemplate[i]->getName(), rightFrag);
+ databaseRight->addSequence(rightTemp);
+ }
+ databaseRight->generateDB();
+ databaseRight->setNumSeqs(userTemplate.size());
+
+#else
+
+
+ for (int i = 0; i < userTemplate.size(); i++) {
+
+ if (m->control_pressed) { return userTemplate; }
+
+ string leftFrag = userTemplate[i]->getUnaligned();
+ leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
+
+ Sequence leftTemp(userTemplate[i]->getName(), leftFrag);
+ databaseLeft->addSequence(leftTemp);
+ }
+ databaseLeft->generateDB();
+ databaseLeft->setNumSeqs(userTemplate.size());
+
+ for (int i = 0; i < userTemplate.size(); i++) {
+ if (m->control_pressed) { return userTemplate; }
+
+ string rightFrag = userTemplate[i]->getUnaligned();
+ rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
+
+ Sequence rightTemp(userTemplate[i]->getName(), rightFrag);
+ databaseRight->addSequence(rightTemp);
+ }
+ databaseRight->generateDB();
+ databaseRight->setNumSeqs(userTemplate.size());
+#endif
+ }else if (searchMethod == "blast") {
+
+ //generate blastdb
+ databaseLeft = new BlastDB(m->getRootName(m->getSimpleName(templateFileName)), -1.0, -1.0, 1, -3, blastlocation, threadID);
+
+ if (m->control_pressed) { return userTemplate; }
+
+ for (int i = 0; i < userTemplate.size(); i++) { if (m->control_pressed) { return userTemplate; } databaseLeft->addSequence(*userTemplate[i]); }
+ databaseLeft->generateDB();
+ databaseLeft->setNumSeqs(userTemplate.size());
+ }
+
+ return userTemplate;
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ChimeraSlayer", "getTemplate");
+ exit(1);
+ }
+}
+