5 * Created by westcott on 9/25/09.
6 * Copyright 2009 Pschloss Lab. All rights reserved.
10 #include "chimeraslayer.h"
11 #include "chimerarealigner.h"
14 //***************************************************************************************************************
15 ChimeraSlayer::ChimeraSlayer(string file, string temp, string mode, int k, int ms, int mms, int win, float div,
16 int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r) : Chimera() {
19 templateFileName = temp; templateSeqs = readSeqs(temp);
36 decalc = new DeCalculator();
41 m->errorOut(e, "ChimeraSlayer", "ChimeraSlayer");
45 //***************************************************************************************************************
46 int ChimeraSlayer::doPrep() {
50 //read in all query seqs
51 vector<Sequence*> tempQuerySeqs = readSeqs(fastafile);
53 vector<Sequence*> temp = templateSeqs;
54 for (int i = 0; i < tempQuerySeqs.size(); i++) { temp.push_back(tempQuerySeqs[i]); }
56 createFilter(temp, 0.0); //just removed columns where all seqs have a gap
58 for (int i = 0; i < tempQuerySeqs.size(); i++) { delete tempQuerySeqs[i]; }
60 if (m->control_pressed) { return 0; }
62 //run filter on template
63 for (int i = 0; i < templateSeqs.size(); i++) { if (m->control_pressed) { return 0; } runFilter(templateSeqs[i]); }
65 string kmerDBNameLeft;
66 string kmerDBNameRight;
68 //generate the kmerdb to pass to maligner
69 if (searchMethod == "kmer") {
70 string rightTemplateFileName = "right." + templateFileName;
71 databaseRight = new KmerDB(rightTemplateFileName, kmerSize);
73 string leftTemplateFileName = "left." + templateFileName;
74 databaseLeft = new KmerDB(leftTemplateFileName, kmerSize);
76 for (int i = 0; i < templateSeqs.size(); i++) {
78 if (m->control_pressed) { return 0; }
80 string leftFrag = templateSeqs[i]->getUnaligned();
81 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
83 Sequence leftTemp(templateSeqs[i]->getName(), leftFrag);
84 databaseLeft->addSequence(leftTemp);
86 databaseLeft->generateDB();
87 databaseLeft->setNumSeqs(templateSeqs.size());
89 for (int i = 0; i < templateSeqs.size(); i++) {
90 if (m->control_pressed) { return 0; }
92 string rightFrag = templateSeqs[i]->getUnaligned();
93 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
95 Sequence rightTemp(templateSeqs[i]->getName(), rightFrag);
96 databaseRight->addSequence(rightTemp);
98 databaseRight->generateDB();
99 databaseRight->setNumSeqs(templateSeqs.size());
103 kmerDBNameLeft = leftTemplateFileName.substr(0,leftTemplateFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
104 ifstream kmerFileTestLeft(kmerDBNameLeft.c_str());
106 if(!kmerFileTestLeft){
108 for (int i = 0; i < templateSeqs.size(); i++) {
110 if (m->control_pressed) { return 0; }
112 string leftFrag = templateSeqs[i]->getUnaligned();
113 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
115 Sequence leftTemp(templateSeqs[i]->getName(), leftFrag);
116 databaseLeft->addSequence(leftTemp);
118 databaseLeft->generateDB();
121 databaseLeft->readKmerDB(kmerFileTestLeft);
123 kmerFileTestLeft.close();
125 databaseLeft->setNumSeqs(templateSeqs.size());
128 kmerDBNameRight = rightTemplateFileName.substr(0,rightTemplateFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
129 ifstream kmerFileTestRight(kmerDBNameRight.c_str());
131 if(!kmerFileTestRight){
133 for (int i = 0; i < templateSeqs.size(); i++) {
134 if (m->control_pressed) { return 0; }
136 string rightFrag = templateSeqs[i]->getUnaligned();
137 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
139 Sequence rightTemp(templateSeqs[i]->getName(), rightFrag);
140 databaseRight->addSequence(rightTemp);
142 databaseRight->generateDB();
145 databaseRight->readKmerDB(kmerFileTestRight);
147 kmerFileTestRight.close();
149 databaseRight->setNumSeqs(templateSeqs.size());
156 catch(exception& e) {
157 m->errorOut(e, "ChimeraSlayer", "doprep");
161 //***************************************************************************************************************
162 ChimeraSlayer::~ChimeraSlayer() { delete decalc; if (searchMethod == "kmer") { delete databaseRight; delete databaseLeft; } }
163 //***************************************************************************************************************
164 void ChimeraSlayer::printHeader(ostream& out) {
165 m->mothurOutEndLine();
166 m->mothurOut("Only reporting sequence supported by " + toString(minBS) + "% of bootstrapped results.");
167 m->mothurOutEndLine();
169 out << "Name\tLeftParent\tRightParent\tDivQLAQRB\tPerIDQLAQRB\tBootStrapA\tDivQLBQRA\tPerIDQLBQRA\tBootStrapB\tFlag\tLeftWindow\tRightWindow\n";
171 //***************************************************************************************************************
172 int ChimeraSlayer::print(ostream& out, ostream& outAcc) {
174 if (chimeraFlags == "yes") {
175 string chimeraFlag = "no";
176 if( (chimeraResults[0].bsa >= minBS && chimeraResults[0].divr_qla_qrb >= divR)
178 (chimeraResults[0].bsb >= minBS && chimeraResults[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
181 if (chimeraFlag == "yes") {
182 if ((chimeraResults[0].bsa >= minBS) || (chimeraResults[0].bsb >= minBS)) {
183 m->mothurOut(querySeq->getName() + "\tyes"); m->mothurOutEndLine();
184 outAcc << querySeq->getName() << endl;
188 printBlock(chimeraResults[0], out);
190 }else { out << querySeq->getName() << "\tno" << endl; }
195 catch(exception& e) {
196 m->errorOut(e, "ChimeraSlayer", "print");
201 //***************************************************************************************************************
202 int ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc) {
205 bool results = false;
206 string outAccString = "";
207 string outputString = "";
209 if (chimeraFlags == "yes") {
210 string chimeraFlag = "no";
211 if( (chimeraResults[0].bsa >= minBS && chimeraResults[0].divr_qla_qrb >= divR)
213 (chimeraResults[0].bsb >= minBS && chimeraResults[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
216 if (chimeraFlag == "yes") {
217 if ((chimeraResults[0].bsa >= minBS) || (chimeraResults[0].bsb >= minBS)) {
218 cout << querySeq->getName() << "\tyes" << endl;
219 outAccString += querySeq->getName() + "\n";
222 //write to accnos file
223 int length = outAccString.length();
225 strcpy(buf2, outAccString.c_str());
227 MPI_File_write_shared(outAcc, buf2, length, MPI_CHAR, &status);
231 outputString = getBlock(chimeraResults[0]);
232 outputString += "\n";
234 }else { outputString += querySeq->getName() + "\tno\n"; }
236 //write to output file
237 int length = outputString.length();
239 strcpy(buf, outputString.c_str());
241 MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
245 catch(exception& e) {
246 m->errorOut(e, "ChimeraSlayer", "print");
252 //***************************************************************************************************************
253 int ChimeraSlayer::getChimeras(Sequence* query) {
258 spotMap = runFilter(query);
262 //referenceSeqs, numWanted, matchScore, misMatchPenalty, divR, minSimilarity
263 maligner = new Maligner(templateSeqs, numWanted, match, misMatch, divR, minSim, minCov, searchMethod, databaseLeft, databaseRight);
264 slayer = new Slayer(window, increment, minSim, divR, iters, minSNP);
266 if (m->control_pressed) { return 0; }
268 string chimeraFlag = maligner->getResults(query, decalc);
269 if (m->control_pressed) { return 0; }
270 vector<results> Results = maligner->getOutput();
272 //found in testing realigning only made things worse
274 ChimeraReAligner realigner(templateSeqs, match, misMatch);
275 realigner.reAlign(query, Results);
278 if (chimeraFlag == "yes") {
280 //get sequence that were given from maligner results
281 vector<SeqDist> seqs;
282 map<string, float> removeDups;
283 map<string, float>::iterator itDup;
284 map<string, string> parentNameSeq;
285 map<string, string>::iterator itSeq;
286 for (int j = 0; j < Results.size(); j++) {
287 float dist = (Results[j].regionEnd - Results[j].regionStart + 1) * Results[j].queryToParentLocal;
288 //only add if you are not a duplicate
289 itDup = removeDups.find(Results[j].parent);
290 if (itDup == removeDups.end()) { //this is not duplicate
291 removeDups[Results[j].parent] = dist;
292 parentNameSeq[Results[j].parent] = Results[j].parentAligned;
293 }else if (dist > itDup->second) { //is this a stronger number for this parent
294 removeDups[Results[j].parent] = dist;
295 parentNameSeq[Results[j].parent] = Results[j].parentAligned;
299 for (itDup = removeDups.begin(); itDup != removeDups.end(); itDup++) {
300 //Sequence* seq = getSequence(itDup->first); //makes copy so you can filter and mask and not effect template
301 itSeq = parentNameSeq.find(itDup->first);
302 //cout << itDup->first << itSeq->second << endl;
303 Sequence* seq = new Sequence(itDup->first, itSeq->second);
307 member.dist = itDup->second;
309 seqs.push_back(member);
312 //limit number of parents to explore - default 3
313 if (Results.size() > parents) {
315 sort(seqs.begin(), seqs.end(), compareSeqDist);
316 //prioritize larger more similiar sequence fragments
317 reverse(seqs.begin(), seqs.end());
319 for (int k = seqs.size()-1; k > (parents-1); k--) {
325 //put seqs into vector to send to slayer
326 vector<Sequence*> seqsForSlayer;
327 for (int k = 0; k < seqs.size(); k++) { seqsForSlayer.push_back(seqs[k].seq); }
329 //mask then send to slayer...
331 decalc->setMask(seqMask);
334 decalc->runMask(query);
337 for (int k = 0; k < seqsForSlayer.size(); k++) {
338 decalc->runMask(seqsForSlayer[k]);
341 spotMap = decalc->getMaskMap();
344 if (m->control_pressed) { for (int k = 0; k < seqs.size(); k++) { delete seqs[k].seq; } return 0; }
347 chimeraFlags = slayer->getResults(query, seqsForSlayer);
348 if (m->control_pressed) { return 0; }
349 chimeraResults = slayer->getOutput();
352 for (int k = 0; k < seqs.size(); k++) { delete seqs[k].seq; }
360 catch(exception& e) {
361 m->errorOut(e, "ChimeraSlayer", "getChimeras");
365 //***************************************************************************************************************
366 void ChimeraSlayer::printBlock(data_struct data, ostream& out){
370 out << querySeq->getName() << '\t';
371 out << data.parentA.getName() << "\t" << data.parentB.getName() << '\t';
372 //out << "Left Window: " << spotMap[data.winLStart] << " " << spotMap[data.winLEnd] << endl;
373 //out << "Right Window: " << spotMap[data.winRStart] << " " << spotMap[data.winREnd] << endl;
375 out << data.divr_qla_qrb << '\t' << data.qla_qrb << '\t' << data.bsa << '\t';
376 out << data.divr_qlb_qra << '\t' << data.qlb_qra << '\t' << data.bsb << '\t';
378 out << "yes\t" << spotMap[data.winLStart] << "-" << spotMap[data.winLEnd] << '\t' << spotMap[data.winRStart] << "-" << spotMap[data.winREnd] << '\t';
380 //out << "Similarity of parents: " << data.ab << endl;
381 //out << "Similarity of query to parentA: " << data.qa << endl;
382 //out << "Similarity of query to parentB: " << data.qb << endl;
385 //out << "Per_id(QL,A): " << data.qla << endl;
386 //out << "Per_id(QL,B): " << data.qlb << endl;
387 //out << "Per_id(QR,A): " << data.qra << endl;
388 //out << "Per_id(QR,B): " << data.qrb << endl;
391 //out << "DeltaL: " << (data.qla - data.qlb) << endl;
392 //out << "DeltaR: " << (data.qra - data.qrb) << endl;
395 catch(exception& e) {
396 m->errorOut(e, "ChimeraSlayer", "printBlock");
400 //***************************************************************************************************************
401 string ChimeraSlayer::getBlock(data_struct data){
404 string outputString = "";
406 outputString += querySeq->getName() + "\t";
407 outputString += data.parentA.getName() + "\t" + data.parentB.getName() + "\t";
409 outputString += toString(data.divr_qla_qrb) + "\t" + toString(data.qla_qrb) + "\t" + toString(data.bsa) + "\t";
410 outputString += toString(data.divr_qlb_qra) + "\t" + toString(data.qlb_qra) + "\t" + toString(data.bsb) + "\t";
412 outputString += "yes\t" + toString(spotMap[data.winLStart]) + "-" + toString(spotMap[data.winLEnd]) + "\t" + toString(spotMap[data.winRStart]) + "-" + toString(spotMap[data.winREnd]) + "\t";
416 catch(exception& e) {
417 m->errorOut(e, "ChimeraSlayer", "getBlock");
421 //***************************************************************************************************************/