5 * Created by westcott on 9/25/09.
6 * Copyright 2009 Pschloss Lab. All rights reserved.
10 #include "chimeraslayer.h"
11 #include "chimerarealigner.h"
14 //***************************************************************************************************************
15 ChimeraSlayer::ChimeraSlayer(string mode, bool r, string f) : searchMethod(mode), realign(r), fastafile(f) {
16 decalc = new DeCalculator();
18 //***************************************************************************************************************
19 void ChimeraSlayer::doPrep() {
22 string kmerDBNameLeft;
23 string kmerDBNameRight;
25 //generate the kmerdb to pass to maligner
26 if (searchMethod == "kmer") {
28 string leftTemplateFileName = "left." + templateFileName;
29 databaseLeft = new KmerDB(leftTemplateFileName, kmerSize);
30 kmerDBNameLeft = leftTemplateFileName.substr(0,leftTemplateFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
31 ifstream kmerFileTestLeft(kmerDBNameLeft.c_str());
33 if(!kmerFileTestLeft){
35 for (int i = 0; i < templateSeqs.size(); i++) {
36 string leftFrag = templateSeqs[i]->getUnaligned();
37 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
39 Sequence leftTemp(templateSeqs[i]->getName(), leftFrag);
40 databaseLeft->addSequence(leftTemp);
42 databaseLeft->generateDB();
45 databaseLeft->readKmerDB(kmerFileTestLeft);
47 kmerFileTestLeft.close();
49 databaseLeft->setNumSeqs(templateSeqs.size());
52 string rightTemplateFileName = "right." + templateFileName;
53 databaseRight = new KmerDB(rightTemplateFileName, kmerSize);
54 kmerDBNameRight = rightTemplateFileName.substr(0,rightTemplateFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
55 ifstream kmerFileTestRight(kmerDBNameRight.c_str());
57 if(!kmerFileTestRight){
59 for (int i = 0; i < templateSeqs.size(); i++) {
60 string rightFrag = templateSeqs[i]->getUnaligned();
61 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
63 Sequence rightTemp(templateSeqs[i]->getName(), rightFrag);
64 databaseRight->addSequence(rightTemp);
66 databaseRight->generateDB();
69 databaseRight->readKmerDB(kmerFileTestRight);
71 kmerFileTestRight.close();
73 databaseRight->setNumSeqs(templateSeqs.size());
77 int start = time(NULL);
78 //filter the sequences
79 //read in all query seqs
81 openInputFile(fastafile, in);
83 vector<Sequence*> tempQuerySeqs;
85 Sequence* s = new Sequence(in);
88 if (s->getName() != "") { tempQuerySeqs.push_back(s); }
92 vector<Sequence*> temp = templateSeqs;
93 for (int i = 0; i < tempQuerySeqs.size(); i++) { temp.push_back(tempQuerySeqs[i]); }
95 createFilter(temp, 0.0); //just removed columns where all seqs have a gap
97 for (int i = 0; i < tempQuerySeqs.size(); i++) { delete tempQuerySeqs[i]; }
99 //run filter on template
100 for (int i = 0; i < templateSeqs.size(); i++) { runFilter(templateSeqs[i]); }
102 mothurOutEndLine(); mothurOut("It took " + toString(time(NULL) - start) + " secs to filter."); mothurOutEndLine();
105 catch(exception& e) {
106 errorOut(e, "ChimeraSlayer", "doprep");
110 //***************************************************************************************************************
111 ChimeraSlayer::~ChimeraSlayer() { delete decalc; if (searchMethod == "kmer") { delete databaseRight; delete databaseLeft; } }
112 //***************************************************************************************************************
113 void ChimeraSlayer::printHeader(ostream& out) {
115 mothurOut("Only reporting sequence supported by " + toString(minBS) + "% of bootstrapped results.");
118 out << "Name\tLeftParent\tRightParent\tDivQLAQRB\tPerIDQLAQRB\tBootStrapA\tDivQLBQRA\tPerIDQLBQRA\tBootStrapB\tFlag\tLeftWindow\tRightWindow\n";
120 //***************************************************************************************************************
121 void ChimeraSlayer::print(ostream& out, ostream& outAcc) {
123 if (chimeraFlags == "yes") {
124 string chimeraFlag = "no";
125 if( (chimeraResults[0].bsa >= minBS && chimeraResults[0].divr_qla_qrb >= divR)
127 (chimeraResults[0].bsb >= minBS && chimeraResults[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
130 if (chimeraFlag == "yes") {
131 if ((chimeraResults[0].bsa >= minBS) || (chimeraResults[0].bsb >= minBS)) {
132 mothurOut(querySeq->getName() + "\tyes"); mothurOutEndLine();
133 outAcc << querySeq->getName() << endl;
137 printBlock(chimeraResults[0], out);
139 }else { out << querySeq->getName() << "\tno" << endl; }
142 catch(exception& e) {
143 errorOut(e, "ChimeraSlayer", "print");
147 //***************************************************************************************************************
148 int ChimeraSlayer::getChimeras(Sequence* query) {
153 spotMap = runFilter(query);
157 //referenceSeqs, numWanted, matchScore, misMatchPenalty, divR, minSimilarity
158 maligner = new Maligner(templateSeqs, numWanted, match, misMatch, divR, minSim, minCov, searchMethod, databaseLeft, databaseRight);
159 slayer = new Slayer(window, increment, minSim, divR, iters, minSNP);
161 string chimeraFlag = maligner->getResults(query, decalc);
162 vector<results> Results = maligner->getOutput();
164 //found in testing realigning only made things worse
166 ChimeraReAligner realigner(templateSeqs, match, misMatch);
167 realigner.reAlign(query, Results);
170 if (chimeraFlag == "yes") {
172 //get sequence that were given from maligner results
173 vector<SeqDist> seqs;
174 map<string, float> removeDups;
175 map<string, float>::iterator itDup;
176 map<string, string> parentNameSeq;
177 map<string, string>::iterator itSeq;
178 for (int j = 0; j < Results.size(); j++) {
179 float dist = (Results[j].regionEnd - Results[j].regionStart + 1) * Results[j].queryToParentLocal;
180 //only add if you are not a duplicate
181 itDup = removeDups.find(Results[j].parent);
182 if (itDup == removeDups.end()) { //this is not duplicate
183 removeDups[Results[j].parent] = dist;
184 parentNameSeq[Results[j].parent] = Results[j].parentAligned;
185 }else if (dist > itDup->second) { //is this a stronger number for this parent
186 removeDups[Results[j].parent] = dist;
187 parentNameSeq[Results[j].parent] = Results[j].parentAligned;
191 for (itDup = removeDups.begin(); itDup != removeDups.end(); itDup++) {
192 //Sequence* seq = getSequence(itDup->first); //makes copy so you can filter and mask and not effect template
193 itSeq = parentNameSeq.find(itDup->first);
194 //cout << itDup->first << itSeq->second << endl;
195 Sequence* seq = new Sequence(itDup->first, itSeq->second);
199 member.dist = itDup->second;
201 seqs.push_back(member);
204 //limit number of parents to explore - default 3
205 if (Results.size() > parents) {
207 sort(seqs.begin(), seqs.end(), compareSeqDist);
208 //prioritize larger more similiar sequence fragments
209 reverse(seqs.begin(), seqs.end());
211 for (int k = seqs.size()-1; k > (parents-1); k--) {
217 //put seqs into vector to send to slayer
218 vector<Sequence*> seqsForSlayer;
219 for (int k = 0; k < seqs.size(); k++) { seqsForSlayer.push_back(seqs[k].seq); }
221 //mask then send to slayer...
223 decalc->setMask(seqMask);
226 decalc->runMask(query);
229 for (int k = 0; k < seqsForSlayer.size(); k++) {
230 decalc->runMask(seqsForSlayer[k]);
233 spotMap = decalc->getMaskMap();
237 chimeraFlags = slayer->getResults(query, seqsForSlayer);
238 chimeraResults = slayer->getOutput();
241 for (int k = 0; k < seqs.size(); k++) { delete seqs[k].seq; }
249 catch(exception& e) {
250 errorOut(e, "ChimeraSlayer", "getChimeras");
254 //***************************************************************************************************************
255 void ChimeraSlayer::printBlock(data_struct data, ostream& out){
257 //out << "Name\tParentA\tParentB\tDivQLAQRB\tPerIDQLAQRB\tBootStrapA\tDivQLBQRA\tPerIDQLBQRA\tBootStrapB\tFlag\tLeftWindow\tRightWindow\n";
259 out << querySeq->getName() << '\t';
260 out << data.parentA.getName() << "\t" << data.parentB.getName() << '\t';
261 //out << "Left Window: " << spotMap[data.winLStart] << " " << spotMap[data.winLEnd] << endl;
262 //out << "Right Window: " << spotMap[data.winRStart] << " " << spotMap[data.winREnd] << endl;
264 out << data.divr_qla_qrb << '\t' << data.qla_qrb << '\t' << data.bsa << '\t';
265 out << data.divr_qlb_qra << '\t' << data.qlb_qra << '\t' << data.bsb << '\t';
267 out << "yes\t" << spotMap[data.winLStart] << "-" << spotMap[data.winLEnd] << '\t' << spotMap[data.winRStart] << "-" << spotMap[data.winREnd] << '\t';
269 //out << "Similarity of parents: " << data.ab << endl;
270 //out << "Similarity of query to parentA: " << data.qa << endl;
271 //out << "Similarity of query to parentB: " << data.qb << endl;
274 //out << "Per_id(QL,A): " << data.qla << endl;
275 //out << "Per_id(QL,B): " << data.qlb << endl;
276 //out << "Per_id(QR,A): " << data.qra << endl;
277 //out << "Per_id(QR,B): " << data.qrb << endl;
280 //out << "DeltaL: " << (data.qla - data.qlb) << endl;
281 //out << "DeltaR: " << (data.qra - data.qrb) << endl;
284 catch(exception& e) {
285 errorOut(e, "ChimeraSlayer", "printBlock");
289 //***************************************************************************************************************