5 * Created by westcott on 9/25/09.
6 * Copyright 2009 Pschloss Lab. All rights reserved.
10 #include "chimeraslayer.h"
11 #include "chimerarealigner.h"
13 #include "blastdb.hpp"
15 //***************************************************************************************************************
16 ChimeraSlayer::ChimeraSlayer(string file, string temp, string mode, int k, int ms, int mms, int win, float div,
17 int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r) : Chimera() {
20 templateFileName = temp; templateSeqs = readSeqs(temp);
37 decalc = new DeCalculator();
42 m->errorOut(e, "ChimeraSlayer", "ChimeraSlayer");
46 //***************************************************************************************************************
47 int ChimeraSlayer::doPrep() {
50 //read in all query seqs
51 vector<Sequence*> tempQuerySeqs = readSeqs(fastafile);
53 vector<Sequence*> temp = templateSeqs;
54 for (int i = 0; i < tempQuerySeqs.size(); i++) { temp.push_back(tempQuerySeqs[i]); }
56 createFilter(temp, 0.0); //just removed columns where all seqs have a gap
58 for (int i = 0; i < tempQuerySeqs.size(); i++) { delete tempQuerySeqs[i]; }
60 if (m->control_pressed) { return 0; }
62 //run filter on template
63 for (int i = 0; i < templateSeqs.size(); i++) { if (m->control_pressed) { return 0; } runFilter(templateSeqs[i]); }
65 string kmerDBNameLeft;
66 string kmerDBNameRight;
68 //generate the kmerdb to pass to maligner
69 if (searchMethod == "kmer") {
70 string rightTemplateFileName = "right." + templateFileName;
71 databaseRight = new KmerDB(rightTemplateFileName, kmerSize);
73 string leftTemplateFileName = "left." + templateFileName;
74 databaseLeft = new KmerDB(leftTemplateFileName, kmerSize);
76 for (int i = 0; i < templateSeqs.size(); i++) {
78 if (m->control_pressed) { return 0; }
80 string leftFrag = templateSeqs[i]->getUnaligned();
81 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
83 Sequence leftTemp(templateSeqs[i]->getName(), leftFrag);
84 databaseLeft->addSequence(leftTemp);
86 databaseLeft->generateDB();
87 databaseLeft->setNumSeqs(templateSeqs.size());
89 for (int i = 0; i < templateSeqs.size(); i++) {
90 if (m->control_pressed) { return 0; }
92 string rightFrag = templateSeqs[i]->getUnaligned();
93 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
95 Sequence rightTemp(templateSeqs[i]->getName(), rightFrag);
96 databaseRight->addSequence(rightTemp);
98 databaseRight->generateDB();
99 databaseRight->setNumSeqs(templateSeqs.size());
103 kmerDBNameLeft = leftTemplateFileName.substr(0,leftTemplateFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
104 ifstream kmerFileTestLeft(kmerDBNameLeft.c_str());
106 if(!kmerFileTestLeft){
108 for (int i = 0; i < templateSeqs.size(); i++) {
110 if (m->control_pressed) { return 0; }
112 string leftFrag = templateSeqs[i]->getUnaligned();
113 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
115 Sequence leftTemp(templateSeqs[i]->getName(), leftFrag);
116 databaseLeft->addSequence(leftTemp);
118 databaseLeft->generateDB();
121 databaseLeft->readKmerDB(kmerFileTestLeft);
123 kmerFileTestLeft.close();
125 databaseLeft->setNumSeqs(templateSeqs.size());
128 kmerDBNameRight = rightTemplateFileName.substr(0,rightTemplateFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
129 ifstream kmerFileTestRight(kmerDBNameRight.c_str());
131 if(!kmerFileTestRight){
133 for (int i = 0; i < templateSeqs.size(); i++) {
134 if (m->control_pressed) { return 0; }
136 string rightFrag = templateSeqs[i]->getUnaligned();
137 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
139 Sequence rightTemp(templateSeqs[i]->getName(), rightFrag);
140 databaseRight->addSequence(rightTemp);
142 databaseRight->generateDB();
145 databaseRight->readKmerDB(kmerFileTestRight);
147 kmerFileTestRight.close();
149 databaseRight->setNumSeqs(templateSeqs.size());
151 }else if (searchMethod == "blast") {
154 databaseLeft = new BlastDB(-2.0, -1.0, match, misMatch);
155 for (int i = 0; i < templateSeqs.size(); i++) { databaseLeft->addSequence(*templateSeqs[i]); }
156 databaseLeft->generateDB();
157 databaseLeft->setNumSeqs(templateSeqs.size());
163 catch(exception& e) {
164 m->errorOut(e, "ChimeraSlayer", "doprep");
168 //***************************************************************************************************************
169 ChimeraSlayer::~ChimeraSlayer() {
171 if (searchMethod == "kmer") { delete databaseRight; delete databaseLeft; }
172 else if (searchMethod == "blast") { delete databaseLeft; }
174 //***************************************************************************************************************
175 void ChimeraSlayer::printHeader(ostream& out) {
176 m->mothurOutEndLine();
177 m->mothurOut("Only reporting sequence supported by " + toString(minBS) + "% of bootstrapped results.");
178 m->mothurOutEndLine();
180 out << "Name\tLeftParent\tRightParent\tDivQLAQRB\tPerIDQLAQRB\tBootStrapA\tDivQLBQRA\tPerIDQLBQRA\tBootStrapB\tFlag\tLeftWindow\tRightWindow\n";
182 //***************************************************************************************************************
183 int ChimeraSlayer::print(ostream& out, ostream& outAcc) {
185 if (chimeraFlags == "yes") {
186 string chimeraFlag = "no";
187 if( (chimeraResults[0].bsa >= minBS && chimeraResults[0].divr_qla_qrb >= divR)
189 (chimeraResults[0].bsb >= minBS && chimeraResults[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
192 if (chimeraFlag == "yes") {
193 if ((chimeraResults[0].bsa >= minBS) || (chimeraResults[0].bsb >= minBS)) {
194 m->mothurOut(querySeq->getName() + "\tyes"); m->mothurOutEndLine();
195 outAcc << querySeq->getName() << endl;
199 printBlock(chimeraResults[0], chimeraFlag, out);
201 }else { out << querySeq->getName() << "\tno" << endl; }
206 catch(exception& e) {
207 m->errorOut(e, "ChimeraSlayer", "print");
212 //***************************************************************************************************************
213 int ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc) {
216 bool results = false;
217 string outAccString = "";
218 string outputString = "";
220 if (chimeraFlags == "yes") {
221 string chimeraFlag = "no";
222 if( (chimeraResults[0].bsa >= minBS && chimeraResults[0].divr_qla_qrb >= divR)
224 (chimeraResults[0].bsb >= minBS && chimeraResults[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
227 if (chimeraFlag == "yes") {
228 if ((chimeraResults[0].bsa >= minBS) || (chimeraResults[0].bsb >= minBS)) {
229 cout << querySeq->getName() << "\tyes" << endl;
230 outAccString += querySeq->getName() + "\n";
233 //write to accnos file
234 int length = outAccString.length();
235 char* buf2 = new char[length];
236 memcpy(buf2, outAccString.c_str(), length);
238 MPI_File_write_shared(outAcc, buf2, length, MPI_CHAR, &status);
243 outputString = getBlock(chimeraResults[0], chimeraFlag);
244 outputString += "\n";
245 //cout << outputString << endl;
246 //write to output file
247 int length = outputString.length();
248 char* buf = new char[length];
249 memcpy(buf, outputString.c_str(), length);
251 MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
255 outputString += querySeq->getName() + "\tno\n";
256 //cout << outputString << endl;
257 //write to output file
258 int length = outputString.length();
259 char* buf = new char[length];
260 memcpy(buf, outputString.c_str(), length);
262 MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
269 catch(exception& e) {
270 m->errorOut(e, "ChimeraSlayer", "print");
276 //***************************************************************************************************************
277 int ChimeraSlayer::getChimeras(Sequence* query) {
282 spotMap = runFilter(query);
286 //referenceSeqs, numWanted, matchScore, misMatchPenalty, divR, minSimilarity
287 maligner = new Maligner(templateSeqs, numWanted, match, misMatch, divR, minSim, minCov, searchMethod, databaseLeft, databaseRight);
288 slayer = new Slayer(window, increment, minSim, divR, iters, minSNP);
290 if (m->control_pressed) { return 0; }
292 string chimeraFlag = maligner->getResults(query, decalc);
293 if (m->control_pressed) { return 0; }
294 vector<results> Results = maligner->getOutput();
296 //found in testing realigning only made things worse
298 ChimeraReAligner realigner(templateSeqs, match, misMatch);
299 realigner.reAlign(query, Results);
302 if (chimeraFlag == "yes") {
304 //get sequence that were given from maligner results
305 vector<SeqDist> seqs;
306 map<string, float> removeDups;
307 map<string, float>::iterator itDup;
308 map<string, string> parentNameSeq;
309 map<string, string>::iterator itSeq;
310 for (int j = 0; j < Results.size(); j++) {
311 float dist = (Results[j].regionEnd - Results[j].regionStart + 1) * Results[j].queryToParentLocal;
312 //only add if you are not a duplicate
313 itDup = removeDups.find(Results[j].parent);
314 if (itDup == removeDups.end()) { //this is not duplicate
315 removeDups[Results[j].parent] = dist;
316 parentNameSeq[Results[j].parent] = Results[j].parentAligned;
317 }else if (dist > itDup->second) { //is this a stronger number for this parent
318 removeDups[Results[j].parent] = dist;
319 parentNameSeq[Results[j].parent] = Results[j].parentAligned;
323 for (itDup = removeDups.begin(); itDup != removeDups.end(); itDup++) {
324 //Sequence* seq = getSequence(itDup->first); //makes copy so you can filter and mask and not effect template
325 itSeq = parentNameSeq.find(itDup->first);
326 //cout << itDup->first << itSeq->second << endl;
327 Sequence* seq = new Sequence(itDup->first, itSeq->second);
331 member.dist = itDup->second;
333 seqs.push_back(member);
336 //limit number of parents to explore - default 3
337 if (Results.size() > parents) {
339 sort(seqs.begin(), seqs.end(), compareSeqDist);
340 //prioritize larger more similiar sequence fragments
341 reverse(seqs.begin(), seqs.end());
343 for (int k = seqs.size()-1; k > (parents-1); k--) {
349 //put seqs into vector to send to slayer
350 vector<Sequence*> seqsForSlayer;
351 for (int k = 0; k < seqs.size(); k++) { seqsForSlayer.push_back(seqs[k].seq); }
353 //mask then send to slayer...
355 decalc->setMask(seqMask);
358 decalc->runMask(query);
361 for (int k = 0; k < seqsForSlayer.size(); k++) {
362 decalc->runMask(seqsForSlayer[k]);
365 spotMap = decalc->getMaskMap();
368 if (m->control_pressed) { for (int k = 0; k < seqs.size(); k++) { delete seqs[k].seq; } return 0; }
371 chimeraFlags = slayer->getResults(query, seqsForSlayer);
372 if (m->control_pressed) { return 0; }
373 chimeraResults = slayer->getOutput();
376 for (int k = 0; k < seqs.size(); k++) { delete seqs[k].seq; }
384 catch(exception& e) {
385 m->errorOut(e, "ChimeraSlayer", "getChimeras");
389 //***************************************************************************************************************
390 void ChimeraSlayer::printBlock(data_struct data, string flag, ostream& out){
394 out << querySeq->getName() << '\t';
395 out << data.parentA.getName() << "\t" << data.parentB.getName() << '\t';
396 //out << "Left Window: " << spotMap[data.winLStart] << " " << spotMap[data.winLEnd] << endl;
397 //out << "Right Window: " << spotMap[data.winRStart] << " " << spotMap[data.winREnd] << endl;
399 out << data.divr_qla_qrb << '\t' << data.qla_qrb << '\t' << data.bsa << '\t';
400 out << data.divr_qlb_qra << '\t' << data.qlb_qra << '\t' << data.bsb << '\t';
402 out << flag << '\t' << spotMap[data.winLStart] << "-" << spotMap[data.winLEnd] << '\t' << spotMap[data.winRStart] << "-" << spotMap[data.winREnd] << '\t';
404 //out << "Similarity of parents: " << data.ab << endl;
405 //out << "Similarity of query to parentA: " << data.qa << endl;
406 //out << "Similarity of query to parentB: " << data.qb << endl;
409 //out << "Per_id(QL,A): " << data.qla << endl;
410 //out << "Per_id(QL,B): " << data.qlb << endl;
411 //out << "Per_id(QR,A): " << data.qra << endl;
412 //out << "Per_id(QR,B): " << data.qrb << endl;
415 //out << "DeltaL: " << (data.qla - data.qlb) << endl;
416 //out << "DeltaR: " << (data.qra - data.qrb) << endl;
419 catch(exception& e) {
420 m->errorOut(e, "ChimeraSlayer", "printBlock");
424 //***************************************************************************************************************
425 string ChimeraSlayer::getBlock(data_struct data, string flag){
428 string outputString = "";
430 outputString += querySeq->getName() + "\t";
431 outputString += data.parentA.getName() + "\t" + data.parentB.getName() + "\t";
433 outputString += toString(data.divr_qla_qrb) + "\t" + toString(data.qla_qrb) + "\t" + toString(data.bsa) + "\t";
434 outputString += toString(data.divr_qlb_qra) + "\t" + toString(data.qlb_qra) + "\t" + toString(data.bsb) + "\t";
436 outputString += flag + "\t" + toString(spotMap[data.winLStart]) + "-" + toString(spotMap[data.winLEnd]) + "\t" + toString(spotMap[data.winRStart]) + "-" + toString(spotMap[data.winREnd]) + "\t";
440 catch(exception& e) {
441 m->errorOut(e, "ChimeraSlayer", "getBlock");
445 //***************************************************************************************************************/