5 * Created by westcott on 9/25/09.
6 * Copyright 2009 Pschloss Lab. All rights reserved.
10 #include "chimeraslayer.h"
11 #include "chimerarealigner.h"
14 //***************************************************************************************************************
15 ChimeraSlayer::ChimeraSlayer(string mode, bool r, string f) : searchMethod(mode), realign(r), fastafile(f) {
16 decalc = new DeCalculator();
18 //***************************************************************************************************************
19 int ChimeraSlayer::doPrep() {
22 string kmerDBNameLeft;
23 string kmerDBNameRight;
25 //generate the kmerdb to pass to maligner
26 if (searchMethod == "kmer") {
28 string leftTemplateFileName = "left." + templateFileName;
29 databaseLeft = new KmerDB(leftTemplateFileName, kmerSize);
30 kmerDBNameLeft = leftTemplateFileName.substr(0,leftTemplateFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
31 ifstream kmerFileTestLeft(kmerDBNameLeft.c_str());
33 if(!kmerFileTestLeft){
35 for (int i = 0; i < templateSeqs.size(); i++) {
37 if (m->control_pressed) { return 0; }
39 string leftFrag = templateSeqs[i]->getUnaligned();
40 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
42 Sequence leftTemp(templateSeqs[i]->getName(), leftFrag);
43 databaseLeft->addSequence(leftTemp);
45 databaseLeft->generateDB();
48 databaseLeft->readKmerDB(kmerFileTestLeft);
50 kmerFileTestLeft.close();
52 databaseLeft->setNumSeqs(templateSeqs.size());
55 string rightTemplateFileName = "right." + templateFileName;
56 databaseRight = new KmerDB(rightTemplateFileName, kmerSize);
57 kmerDBNameRight = rightTemplateFileName.substr(0,rightTemplateFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
58 ifstream kmerFileTestRight(kmerDBNameRight.c_str());
60 if(!kmerFileTestRight){
62 for (int i = 0; i < templateSeqs.size(); i++) {
63 if (m->control_pressed) { return 0; }
65 string rightFrag = templateSeqs[i]->getUnaligned();
66 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
68 Sequence rightTemp(templateSeqs[i]->getName(), rightFrag);
69 databaseRight->addSequence(rightTemp);
71 databaseRight->generateDB();
74 databaseRight->readKmerDB(kmerFileTestRight);
76 kmerFileTestRight.close();
78 databaseRight->setNumSeqs(templateSeqs.size());
82 int start = time(NULL);
83 //filter the sequences
84 //read in all query seqs
86 openInputFile(fastafile, in);
88 vector<Sequence*> tempQuerySeqs;
90 if (m->control_pressed) { for (int i = 0; i < tempQuerySeqs.size(); i++) { delete tempQuerySeqs[i]; } return 0; }
92 Sequence* s = new Sequence(in);
95 if (s->getName() != "") { tempQuerySeqs.push_back(s); }
99 vector<Sequence*> temp = templateSeqs;
100 for (int i = 0; i < tempQuerySeqs.size(); i++) { temp.push_back(tempQuerySeqs[i]); }
102 createFilter(temp, 0.0); //just removed columns where all seqs have a gap
104 for (int i = 0; i < tempQuerySeqs.size(); i++) { delete tempQuerySeqs[i]; }
106 if (m->control_pressed) { return 0; }
109 //run filter on template
110 for (int i = 0; i < templateSeqs.size(); i++) { if (m->control_pressed) { return 0; } runFilter(templateSeqs[i]); }
112 m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to filter."); m->mothurOutEndLine();
117 catch(exception& e) {
118 m->errorOut(e, "ChimeraSlayer", "doprep");
122 //***************************************************************************************************************
123 ChimeraSlayer::~ChimeraSlayer() { delete decalc; if (searchMethod == "kmer") { delete databaseRight; delete databaseLeft; } }
124 //***************************************************************************************************************
125 void ChimeraSlayer::printHeader(ostream& out) {
126 m->mothurOutEndLine();
127 m->mothurOut("Only reporting sequence supported by " + toString(minBS) + "% of bootstrapped results.");
128 m->mothurOutEndLine();
130 out << "Name\tLeftParent\tRightParent\tDivQLAQRB\tPerIDQLAQRB\tBootStrapA\tDivQLBQRA\tPerIDQLBQRA\tBootStrapB\tFlag\tLeftWindow\tRightWindow\n";
132 //***************************************************************************************************************
133 int ChimeraSlayer::print(ostream& out, ostream& outAcc) {
135 if (chimeraFlags == "yes") {
136 string chimeraFlag = "no";
137 if( (chimeraResults[0].bsa >= minBS && chimeraResults[0].divr_qla_qrb >= divR)
139 (chimeraResults[0].bsb >= minBS && chimeraResults[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
142 if (chimeraFlag == "yes") {
143 if ((chimeraResults[0].bsa >= minBS) || (chimeraResults[0].bsb >= minBS)) {
144 m->mothurOut(querySeq->getName() + "\tyes"); m->mothurOutEndLine();
145 outAcc << querySeq->getName() << endl;
149 printBlock(chimeraResults[0], out);
151 }else { out << querySeq->getName() << "\tno" << endl; }
156 catch(exception& e) {
157 m->errorOut(e, "ChimeraSlayer", "print");
161 //***************************************************************************************************************
162 int ChimeraSlayer::getChimeras(Sequence* query) {
167 spotMap = runFilter(query);
171 //referenceSeqs, numWanted, matchScore, misMatchPenalty, divR, minSimilarity
172 maligner = new Maligner(templateSeqs, numWanted, match, misMatch, divR, minSim, minCov, searchMethod, databaseLeft, databaseRight);
173 slayer = new Slayer(window, increment, minSim, divR, iters, minSNP);
175 if (m->control_pressed) { return 0; }
177 string chimeraFlag = maligner->getResults(query, decalc);
178 if (m->control_pressed) { return 0; }
179 vector<results> Results = maligner->getOutput();
181 //found in testing realigning only made things worse
183 ChimeraReAligner realigner(templateSeqs, match, misMatch);
184 realigner.reAlign(query, Results);
187 if (chimeraFlag == "yes") {
189 //get sequence that were given from maligner results
190 vector<SeqDist> seqs;
191 map<string, float> removeDups;
192 map<string, float>::iterator itDup;
193 map<string, string> parentNameSeq;
194 map<string, string>::iterator itSeq;
195 for (int j = 0; j < Results.size(); j++) {
196 float dist = (Results[j].regionEnd - Results[j].regionStart + 1) * Results[j].queryToParentLocal;
197 //only add if you are not a duplicate
198 itDup = removeDups.find(Results[j].parent);
199 if (itDup == removeDups.end()) { //this is not duplicate
200 removeDups[Results[j].parent] = dist;
201 parentNameSeq[Results[j].parent] = Results[j].parentAligned;
202 }else if (dist > itDup->second) { //is this a stronger number for this parent
203 removeDups[Results[j].parent] = dist;
204 parentNameSeq[Results[j].parent] = Results[j].parentAligned;
208 for (itDup = removeDups.begin(); itDup != removeDups.end(); itDup++) {
209 //Sequence* seq = getSequence(itDup->first); //makes copy so you can filter and mask and not effect template
210 itSeq = parentNameSeq.find(itDup->first);
211 //cout << itDup->first << itSeq->second << endl;
212 Sequence* seq = new Sequence(itDup->first, itSeq->second);
216 member.dist = itDup->second;
218 seqs.push_back(member);
221 //limit number of parents to explore - default 3
222 if (Results.size() > parents) {
224 sort(seqs.begin(), seqs.end(), compareSeqDist);
225 //prioritize larger more similiar sequence fragments
226 reverse(seqs.begin(), seqs.end());
228 for (int k = seqs.size()-1; k > (parents-1); k--) {
234 //put seqs into vector to send to slayer
235 vector<Sequence*> seqsForSlayer;
236 for (int k = 0; k < seqs.size(); k++) { seqsForSlayer.push_back(seqs[k].seq); }
238 //mask then send to slayer...
240 decalc->setMask(seqMask);
243 decalc->runMask(query);
246 for (int k = 0; k < seqsForSlayer.size(); k++) {
247 decalc->runMask(seqsForSlayer[k]);
250 spotMap = decalc->getMaskMap();
253 if (m->control_pressed) { for (int k = 0; k < seqs.size(); k++) { delete seqs[k].seq; } return 0; }
256 chimeraFlags = slayer->getResults(query, seqsForSlayer);
257 if (m->control_pressed) { return 0; }
258 chimeraResults = slayer->getOutput();
261 for (int k = 0; k < seqs.size(); k++) { delete seqs[k].seq; }
269 catch(exception& e) {
270 m->errorOut(e, "ChimeraSlayer", "getChimeras");
274 //***************************************************************************************************************
275 void ChimeraSlayer::printBlock(data_struct data, ostream& out){
277 //out << "Name\tParentA\tParentB\tDivQLAQRB\tPerIDQLAQRB\tBootStrapA\tDivQLBQRA\tPerIDQLBQRA\tBootStrapB\tFlag\tLeftWindow\tRightWindow\n";
279 out << querySeq->getName() << '\t';
280 out << data.parentA.getName() << "\t" << data.parentB.getName() << '\t';
281 //out << "Left Window: " << spotMap[data.winLStart] << " " << spotMap[data.winLEnd] << endl;
282 //out << "Right Window: " << spotMap[data.winRStart] << " " << spotMap[data.winREnd] << endl;
284 out << data.divr_qla_qrb << '\t' << data.qla_qrb << '\t' << data.bsa << '\t';
285 out << data.divr_qlb_qra << '\t' << data.qlb_qra << '\t' << data.bsb << '\t';
287 out << "yes\t" << spotMap[data.winLStart] << "-" << spotMap[data.winLEnd] << '\t' << spotMap[data.winRStart] << "-" << spotMap[data.winREnd] << '\t';
289 //out << "Similarity of parents: " << data.ab << endl;
290 //out << "Similarity of query to parentA: " << data.qa << endl;
291 //out << "Similarity of query to parentB: " << data.qb << endl;
294 //out << "Per_id(QL,A): " << data.qla << endl;
295 //out << "Per_id(QL,B): " << data.qlb << endl;
296 //out << "Per_id(QR,A): " << data.qra << endl;
297 //out << "Per_id(QR,B): " << data.qrb << endl;
300 //out << "DeltaL: " << (data.qla - data.qlb) << endl;
301 //out << "DeltaR: " << (data.qra - data.qrb) << endl;
304 catch(exception& e) {
305 m->errorOut(e, "ChimeraSlayer", "printBlock");
309 //***************************************************************************************************************