]> git.donarmstrong.com Git - mothur.git/blob - chimeraslayer.cpp
8a0cbd067cc6cbfb8ced9fbb3a3cb89ea6fdc62b
[mothur.git] / chimeraslayer.cpp
1 /*
2  *  chimeraslayer.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 9/25/09.
6  *  Copyright 2009 Pschloss Lab. All rights reserved.
7  *
8  */
9
10 #include "chimeraslayer.h"
11 #include "chimerarealigner.h"
12
13 //***************************************************************************************************************
14 ChimeraSlayer::ChimeraSlayer(string mode, bool r) : searchMethod(mode), realign(r) {    decalc = new DeCalculator();      }
15 //***************************************************************************************************************
16 ChimeraSlayer::~ChimeraSlayer() {       delete decalc;   }
17 //***************************************************************************************************************
18 void ChimeraSlayer::printHeader(ostream& out) {
19         mothurOutEndLine();
20         mothurOut("Only reporting sequence supported by 90% of bootstrapped results.");
21         mothurOutEndLine();
22 }
23 //***************************************************************************************************************
24 void ChimeraSlayer::print(ostream& out) {
25         try {
26                 if (chimeraFlags == "yes") {
27                         string chimeraFlag = "no";
28                         if(  (chimeraResults[0].bsa >= minBS && chimeraResults[0].divr_qla_qrb >= divR)
29                            ||
30                            (chimeraResults[0].bsb >= minBS && chimeraResults[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
31                         
32                         
33                         if (chimeraFlag == "yes") {     
34                                 if ((chimeraResults[0].bsa >= minBS) || (chimeraResults[0].bsb >= minBS)) {
35                                         mothurOut(querySeq->getName() + "\tyes"); mothurOutEndLine();
36                                 }
37                         }
38                         out << querySeq->getName() << "\tyes" << endl;
39                         printBlock(chimeraResults[0], out);
40                         out << endl;
41                 }else {  out << querySeq->getName() << "\tno" << endl;  }
42                 
43         }
44         catch(exception& e) {
45                 errorOut(e, "ChimeraSlayer", "print");
46                 exit(1);
47         }
48 }
49 //***************************************************************************************************************
50 int ChimeraSlayer::getChimeras(Sequence* query) {
51         try {
52                 chimeraFlags = "no";
53                 querySeq = query;
54                 
55                 for (int i = 0; i < query->getAligned().length(); i++) {
56                         spotMap[i] = i;
57                 }
58                 
59                 //referenceSeqs, numWanted, matchScore, misMatchPenalty, divR, minSimilarity
60                 maligner = new Maligner(templateSeqs, numWanted, match, misMatch, divR, minSim, minCov, searchMethod);
61                 slayer = new Slayer(window, increment, minSim, divR, iters, minSNP);
62                 
63                 string chimeraFlag = maligner->getResults(query, decalc);
64                 vector<results> Results = maligner->getOutput();
65
66                 //realign query to parents to improve slayers detection rate???
67                 if (realign) {
68                         ChimeraReAligner realigner(templateSeqs, match, misMatch);
69                         realigner.reAlign(query, Results);
70                 }
71
72                         //if (chimeraFlag == "yes") {
73                         
74                 //get sequence that were given from maligner results
75                 vector<SeqDist> seqs;
76                 map<string, float> removeDups;
77                 map<string, float>::iterator itDup;
78                 for (int j = 0; j < Results.size(); j++) {
79                         float dist = (Results[j].regionEnd - Results[j].regionStart + 1) * Results[j].queryToParentLocal;
80                         //only add if you are not a duplicate
81                         itDup = removeDups.find(Results[j].parent);
82                         if (itDup == removeDups.end()) { //this is not duplicate
83                                 removeDups[Results[j].parent] = dist;
84                         }else if (dist > itDup->second) { //is this a stronger number for this parent
85                                 removeDups[Results[j].parent] = dist;
86                         }
87                 }
88                 
89                 for (itDup = removeDups.begin(); itDup != removeDups.end(); itDup++) {
90                         Sequence* seq = getSequence(itDup->first); //makes copy so you can filter and mask and not effect template
91                         
92                         SeqDist member;
93                         member.seq = seq;
94                         member.dist = itDup->second;
95                         
96                         seqs.push_back(member);
97                 }
98                 
99                 //limit number of parents to explore - default 3
100                 if (Results.size() > parents) {
101                         //sort by distance
102                         sort(seqs.begin(), seqs.end(), compareSeqDist);
103                         //prioritize larger more similiar sequence fragments
104                         reverse(seqs.begin(), seqs.end());
105                         
106                         for (int k = seqs.size()-1; k > (parents-1); k--)  {  
107                                 delete seqs[k].seq;
108                                 seqs.pop_back();        
109                         }
110                 }
111                 
112                 //put seqs into vector to send to slayer
113                 vector<Sequence*> seqsForSlayer;
114                 for (int k = 0; k < seqs.size(); k++) {  seqsForSlayer.push_back(seqs[k].seq);  }
115                 //cout << i+1 << "num parents = " << seqsForSlayer.size() << '\t' << chimeraFlag << endl;
116 //ofstream out;
117 //string name = querySeqs[i]->getName();
118 //cout << name << endl;
119 //name = name.substr(name.find_first_of("|")+1);
120 //cout << name << endl;
121 //name = name.substr(name.find_first_of("|")+1);
122 //cout << name << endl;
123 //name = name.substr(0, name.find_last_of("|"));
124 //cout << name << endl;
125 //string filename = toString(i+1) + ".seqsforslayer";
126 //openOutputFile(filename, out);        
127 //cout << querySeqs[i]->getName() << endl;
128 //for (int u = 0; u < seqsForSlayer.size(); u++) { cout << seqsForSlayer[u]->getName() << '\t'; seqsForSlayer[u]->printSequence(out);   }
129 //cout << endl;
130 //out.close();
131 //filename = toString(i+1) + ".fasta";
132 //openOutputFile(filename, out);        
133 //querySeqs[i]->printSequence(out);
134 //out.close();
135
136
137                         
138                 //mask then send to slayer...
139                 if (seqMask != "") {
140                         decalc->setMask(seqMask);
141                         
142                         //mask querys
143                         decalc->runMask(query);
144                         
145                         //mask parents
146                         for (int k = 0; k < seqsForSlayer.size(); k++) {
147                                 decalc->runMask(seqsForSlayer[k]);
148                         }
149                         
150                         spotMap = decalc->getMaskMap();
151                 }
152                 
153                 //send to slayer
154                 chimeraFlags = slayer->getResults(query, seqsForSlayer);
155                 chimeraResults = slayer->getOutput();
156         
157                 //free memory
158                 for (int k = 0; k < seqs.size(); k++) {  delete seqs[k].seq;   }
159                 //}
160                         
161                 return 0;
162         }
163         catch(exception& e) {
164                 errorOut(e, "ChimeraSlayer", "getChimeras");
165                 exit(1);
166         }
167 }
168 //***************************************************************************************************************
169 void ChimeraSlayer::printBlock(data_struct data, ostream& out){
170         try {
171                 
172                 out << "parentA: " << data.parentA.getName() << "  parentB: " << data.parentB.getName()  << endl;
173                 out << "Left Window: " << spotMap[data.winLStart] << " " << spotMap[data.winLEnd] << endl;
174                 out << "Right Window: " << spotMap[data.winRStart] << " " << spotMap[data.winREnd] << endl;
175                 
176                 out << "Divergence of Query to Leftside ParentA and Rightside ParentB: " << data.divr_qla_qrb << '\t' << "Bootstrap: " << data.bsa << endl;
177                 out << "Divergence of Query to Rightside ParentA and Leftside ParentB: " << data.divr_qlb_qra << '\t' << "Bootstrap: " << data.bsb << endl;
178                 
179                 out << "Similarity of parents: " << data.ab << endl;
180                 out << "Similarity of query to parentA: " << data.qa << endl;
181                 out << "Similarity of query to parentB: " << data.qb << endl;
182                 
183                 out << "Percent_ID QLA_QRB: " << data.qla_qrb << endl;
184                 out << "Percent_ID QLB_QRA: " << data.qlb_qra << endl;
185                 //out << "Per_id(QL,A): " << data.qla << endl;
186                 //out << "Per_id(QL,B): " << data.qlb << endl;
187                 //out << "Per_id(QR,A): " << data.qra << endl;
188                 //out << "Per_id(QR,B): " << data.qrb << endl;
189
190                 
191                 out << "DeltaL: " << (data.qla - data.qlb) << endl;
192                 out << "DeltaR: " << (data.qra - data.qrb) << endl;
193
194         }
195         catch(exception& e) {
196                 errorOut(e, "ChimeraSlayer", "printBlock");
197                 exit(1);
198         }
199 }
200 //***************************************************************************************************************
201