5 * Created by westcott on 9/25/09.
6 * Copyright 2009 Pschloss Lab. All rights reserved.
10 #include "chimeraslayer.h"
11 #include "chimerarealigner.h"
14 //***************************************************************************************************************
15 ChimeraSlayer::ChimeraSlayer(string file, string temp, string mode, int k, int ms, int mms, int win, float div,
16 int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r) : Chimera() {
19 templateFileName = temp; templateSeqs = readSeqs(temp);
36 decalc = new DeCalculator();
41 m->errorOut(e, "ChimeraSlayer", "ChimeraSlayer");
45 //***************************************************************************************************************
46 int ChimeraSlayer::doPrep() {
50 //read in all query seqs
51 vector<Sequence*> tempQuerySeqs = readSeqs(fastafile);
53 vector<Sequence*> temp = templateSeqs;
54 for (int i = 0; i < tempQuerySeqs.size(); i++) { temp.push_back(tempQuerySeqs[i]); }
56 createFilter(temp, 0.0); //just removed columns where all seqs have a gap
58 for (int i = 0; i < tempQuerySeqs.size(); i++) { delete tempQuerySeqs[i]; }
60 if (m->control_pressed) { return 0; }
62 //run filter on template
63 for (int i = 0; i < templateSeqs.size(); i++) { if (m->control_pressed) { return 0; } runFilter(templateSeqs[i]); }
65 string kmerDBNameLeft;
66 string kmerDBNameRight;
68 //generate the kmerdb to pass to maligner
69 if (searchMethod == "kmer") {
70 string rightTemplateFileName = "right." + templateFileName;
71 databaseRight = new KmerDB(rightTemplateFileName, kmerSize);
73 string leftTemplateFileName = "left." + templateFileName;
74 databaseLeft = new KmerDB(leftTemplateFileName, kmerSize);
76 for (int i = 0; i < templateSeqs.size(); i++) {
78 if (m->control_pressed) { return 0; }
80 string leftFrag = templateSeqs[i]->getUnaligned();
81 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
83 Sequence leftTemp(templateSeqs[i]->getName(), leftFrag);
84 databaseLeft->addSequence(leftTemp);
86 databaseLeft->generateDB();
87 databaseLeft->setNumSeqs(templateSeqs.size());
89 for (int i = 0; i < templateSeqs.size(); i++) {
90 if (m->control_pressed) { return 0; }
92 string rightFrag = templateSeqs[i]->getUnaligned();
93 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
95 Sequence rightTemp(templateSeqs[i]->getName(), rightFrag);
96 databaseRight->addSequence(rightTemp);
98 databaseRight->generateDB();
99 databaseRight->setNumSeqs(templateSeqs.size());
103 kmerDBNameLeft = leftTemplateFileName.substr(0,leftTemplateFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
104 ifstream kmerFileTestLeft(kmerDBNameLeft.c_str());
106 if(!kmerFileTestLeft){
108 for (int i = 0; i < templateSeqs.size(); i++) {
110 if (m->control_pressed) { return 0; }
112 string leftFrag = templateSeqs[i]->getUnaligned();
113 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
115 Sequence leftTemp(templateSeqs[i]->getName(), leftFrag);
116 databaseLeft->addSequence(leftTemp);
118 databaseLeft->generateDB();
121 databaseLeft->readKmerDB(kmerFileTestLeft);
123 kmerFileTestLeft.close();
125 databaseLeft->setNumSeqs(templateSeqs.size());
128 kmerDBNameRight = rightTemplateFileName.substr(0,rightTemplateFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
129 ifstream kmerFileTestRight(kmerDBNameRight.c_str());
131 if(!kmerFileTestRight){
133 for (int i = 0; i < templateSeqs.size(); i++) {
134 if (m->control_pressed) { return 0; }
136 string rightFrag = templateSeqs[i]->getUnaligned();
137 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
139 Sequence rightTemp(templateSeqs[i]->getName(), rightFrag);
140 databaseRight->addSequence(rightTemp);
142 databaseRight->generateDB();
145 databaseRight->readKmerDB(kmerFileTestRight);
147 kmerFileTestRight.close();
149 databaseRight->setNumSeqs(templateSeqs.size());
156 catch(exception& e) {
157 m->errorOut(e, "ChimeraSlayer", "doprep");
161 //***************************************************************************************************************
162 ChimeraSlayer::~ChimeraSlayer() { delete decalc; if (searchMethod == "kmer") { delete databaseRight; delete databaseLeft; } }
163 //***************************************************************************************************************
164 void ChimeraSlayer::printHeader(ostream& out) {
165 m->mothurOutEndLine();
166 m->mothurOut("Only reporting sequence supported by " + toString(minBS) + "% of bootstrapped results.");
167 m->mothurOutEndLine();
169 out << "Name\tLeftParent\tRightParent\tDivQLAQRB\tPerIDQLAQRB\tBootStrapA\tDivQLBQRA\tPerIDQLBQRA\tBootStrapB\tFlag\tLeftWindow\tRightWindow\n";
171 //***************************************************************************************************************
172 int ChimeraSlayer::print(ostream& out, ostream& outAcc) {
174 if (chimeraFlags == "yes") {
175 string chimeraFlag = "no";
176 if( (chimeraResults[0].bsa >= minBS && chimeraResults[0].divr_qla_qrb >= divR)
178 (chimeraResults[0].bsb >= minBS && chimeraResults[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
181 if (chimeraFlag == "yes") {
182 if ((chimeraResults[0].bsa >= minBS) || (chimeraResults[0].bsb >= minBS)) {
183 m->mothurOut(querySeq->getName() + "\tyes"); m->mothurOutEndLine();
184 outAcc << querySeq->getName() << endl;
188 printBlock(chimeraResults[0], out);
190 }else { out << querySeq->getName() << "\tno" << endl; }
195 catch(exception& e) {
196 m->errorOut(e, "ChimeraSlayer", "print");
201 //***************************************************************************************************************
202 int ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc) {
205 bool results = false;
206 string outAccString = "";
207 string outputString = "";
209 if (chimeraFlags == "yes") {
210 string chimeraFlag = "no";
211 if( (chimeraResults[0].bsa >= minBS && chimeraResults[0].divr_qla_qrb >= divR)
213 (chimeraResults[0].bsb >= minBS && chimeraResults[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
216 if (chimeraFlag == "yes") {
217 if ((chimeraResults[0].bsa >= minBS) || (chimeraResults[0].bsb >= minBS)) {
218 cout << querySeq->getName() << "\tyes" << endl;
219 outAccString += querySeq->getName() + "\n";
222 //write to accnos file
223 int length = outAccString.length();
224 char* buf2 = new char[length];
\r
225 memcpy(buf2, outAccString.c_str(), length);
227 MPI_File_write_shared(outAcc, buf2, length, MPI_CHAR, &status);
232 outputString = getBlock(chimeraResults[0]);
233 outputString += "\n";
235 }else { outputString += querySeq->getName() + "\tno\n"; }
237 //write to output file
238 int length = outputString.length();
239 char* buf = new char[length];
\r
240 memcpy(buf, outputString.c_str(), length);
242 MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
247 catch(exception& e) {
248 m->errorOut(e, "ChimeraSlayer", "print");
254 //***************************************************************************************************************
255 int ChimeraSlayer::getChimeras(Sequence* query) {
260 spotMap = runFilter(query);
264 //referenceSeqs, numWanted, matchScore, misMatchPenalty, divR, minSimilarity
265 maligner = new Maligner(templateSeqs, numWanted, match, misMatch, divR, minSim, minCov, searchMethod, databaseLeft, databaseRight);
266 slayer = new Slayer(window, increment, minSim, divR, iters, minSNP);
268 if (m->control_pressed) { return 0; }
270 string chimeraFlag = maligner->getResults(query, decalc);
271 if (m->control_pressed) { return 0; }
272 vector<results> Results = maligner->getOutput();
274 //found in testing realigning only made things worse
276 ChimeraReAligner realigner(templateSeqs, match, misMatch);
277 realigner.reAlign(query, Results);
280 if (chimeraFlag == "yes") {
282 //get sequence that were given from maligner results
283 vector<SeqDist> seqs;
284 map<string, float> removeDups;
285 map<string, float>::iterator itDup;
286 map<string, string> parentNameSeq;
287 map<string, string>::iterator itSeq;
288 for (int j = 0; j < Results.size(); j++) {
289 float dist = (Results[j].regionEnd - Results[j].regionStart + 1) * Results[j].queryToParentLocal;
290 //only add if you are not a duplicate
291 itDup = removeDups.find(Results[j].parent);
292 if (itDup == removeDups.end()) { //this is not duplicate
293 removeDups[Results[j].parent] = dist;
294 parentNameSeq[Results[j].parent] = Results[j].parentAligned;
295 }else if (dist > itDup->second) { //is this a stronger number for this parent
296 removeDups[Results[j].parent] = dist;
297 parentNameSeq[Results[j].parent] = Results[j].parentAligned;
301 for (itDup = removeDups.begin(); itDup != removeDups.end(); itDup++) {
302 //Sequence* seq = getSequence(itDup->first); //makes copy so you can filter and mask and not effect template
303 itSeq = parentNameSeq.find(itDup->first);
304 //cout << itDup->first << itSeq->second << endl;
305 Sequence* seq = new Sequence(itDup->first, itSeq->second);
309 member.dist = itDup->second;
311 seqs.push_back(member);
314 //limit number of parents to explore - default 3
315 if (Results.size() > parents) {
317 sort(seqs.begin(), seqs.end(), compareSeqDist);
318 //prioritize larger more similiar sequence fragments
319 reverse(seqs.begin(), seqs.end());
321 for (int k = seqs.size()-1; k > (parents-1); k--) {
327 //put seqs into vector to send to slayer
328 vector<Sequence*> seqsForSlayer;
329 for (int k = 0; k < seqs.size(); k++) { seqsForSlayer.push_back(seqs[k].seq); }
331 //mask then send to slayer...
333 decalc->setMask(seqMask);
336 decalc->runMask(query);
339 for (int k = 0; k < seqsForSlayer.size(); k++) {
340 decalc->runMask(seqsForSlayer[k]);
343 spotMap = decalc->getMaskMap();
346 if (m->control_pressed) { for (int k = 0; k < seqs.size(); k++) { delete seqs[k].seq; } return 0; }
349 chimeraFlags = slayer->getResults(query, seqsForSlayer);
350 if (m->control_pressed) { return 0; }
351 chimeraResults = slayer->getOutput();
354 for (int k = 0; k < seqs.size(); k++) { delete seqs[k].seq; }
362 catch(exception& e) {
363 m->errorOut(e, "ChimeraSlayer", "getChimeras");
367 //***************************************************************************************************************
368 void ChimeraSlayer::printBlock(data_struct data, ostream& out){
372 out << querySeq->getName() << '\t';
373 out << data.parentA.getName() << "\t" << data.parentB.getName() << '\t';
374 //out << "Left Window: " << spotMap[data.winLStart] << " " << spotMap[data.winLEnd] << endl;
375 //out << "Right Window: " << spotMap[data.winRStart] << " " << spotMap[data.winREnd] << endl;
377 out << data.divr_qla_qrb << '\t' << data.qla_qrb << '\t' << data.bsa << '\t';
378 out << data.divr_qlb_qra << '\t' << data.qlb_qra << '\t' << data.bsb << '\t';
380 out << "yes\t" << spotMap[data.winLStart] << "-" << spotMap[data.winLEnd] << '\t' << spotMap[data.winRStart] << "-" << spotMap[data.winREnd] << '\t';
382 //out << "Similarity of parents: " << data.ab << endl;
383 //out << "Similarity of query to parentA: " << data.qa << endl;
384 //out << "Similarity of query to parentB: " << data.qb << endl;
387 //out << "Per_id(QL,A): " << data.qla << endl;
388 //out << "Per_id(QL,B): " << data.qlb << endl;
389 //out << "Per_id(QR,A): " << data.qra << endl;
390 //out << "Per_id(QR,B): " << data.qrb << endl;
393 //out << "DeltaL: " << (data.qla - data.qlb) << endl;
394 //out << "DeltaR: " << (data.qra - data.qrb) << endl;
397 catch(exception& e) {
398 m->errorOut(e, "ChimeraSlayer", "printBlock");
402 //***************************************************************************************************************
403 string ChimeraSlayer::getBlock(data_struct data){
406 string outputString = "";
408 outputString += querySeq->getName() + "\t";
409 outputString += data.parentA.getName() + "\t" + data.parentB.getName() + "\t";
411 outputString += toString(data.divr_qla_qrb) + "\t" + toString(data.qla_qrb) + "\t" + toString(data.bsa) + "\t";
412 outputString += toString(data.divr_qlb_qra) + "\t" + toString(data.qlb_qra) + "\t" + toString(data.bsb) + "\t";
414 outputString += "yes\t" + toString(spotMap[data.winLStart]) + "-" + toString(spotMap[data.winLEnd]) + "\t" + toString(spotMap[data.winRStart]) + "-" + toString(spotMap[data.winREnd]) + "\t";
418 catch(exception& e) {
419 m->errorOut(e, "ChimeraSlayer", "getBlock");
423 //***************************************************************************************************************/