]> git.donarmstrong.com Git - mothur.git/blob - chimeraslayer.cpp
2cbd5d8343f8b96614020d2be5f04250089b73ee
[mothur.git] / chimeraslayer.cpp
1 /*
2  *  chimeraslayer.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 9/25/09.
6  *  Copyright 2009 Pschloss Lab. All rights reserved.
7  *
8  */
9
10 #include "chimeraslayer.h"
11 #include "chimerarealigner.h"
12
13 //***************************************************************************************************************
14 ChimeraSlayer::ChimeraSlayer(string mode) : searchMethod(mode) {        decalc = new DeCalculator();      }
15 //***************************************************************************************************************
16 ChimeraSlayer::~ChimeraSlayer() {       delete decalc;   }
17 //***************************************************************************************************************
18 void ChimeraSlayer::printHeader(ostream& out) {
19         mothurOutEndLine();
20         mothurOut("Only reporting sequence supported by 90% of bootstrapped results.");
21         mothurOutEndLine();
22 }
23 //***************************************************************************************************************
24 void ChimeraSlayer::print(ostream& out) {
25         try {
26                 if (chimeraFlags == "yes") {
27                         string chimeraFlag = "no";
28                         if(  (chimeraResults[0].bsa >= minBS && chimeraResults[0].divr_qla_qrb >= divR)
29                            ||
30                            (chimeraResults[0].bsb >= minBS && chimeraResults[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
31                         
32                         
33                         if (chimeraFlag == "yes") {     
34                                 if ((chimeraResults[0].bsa >= minBS) || (chimeraResults[0].bsb >= minBS)) {
35                                         mothurOut(querySeq->getName() + "\tyes"); mothurOutEndLine();
36                                 }
37                         }
38                         out << querySeq->getName() << "\tyes" << endl;
39                         printBlock(chimeraResults[0], out);
40                         out << endl;
41                 }else {  out << querySeq->getName() << "\tno" << endl;  }
42                 
43         }
44         catch(exception& e) {
45                 errorOut(e, "ChimeraSlayer", "print");
46                 exit(1);
47         }
48 }
49 //***************************************************************************************************************
50 int ChimeraSlayer::getChimeras(Sequence* query) {
51         try {
52                 chimeraFlags = "no";
53                 querySeq = query;
54                 
55                 for (int i = 0; i < query->getAligned().length(); i++) {
56                         spotMap[i] = i;
57                 }
58                 
59                 //referenceSeqs, numWanted, matchScore, misMatchPenalty, divR, minSimilarity
60                 maligner = new Maligner(templateSeqs, numWanted, match, misMatch, divR, minSim, minCov, searchMethod);
61                 slayer = new Slayer(window, increment, minSim, divR, iters, minSNP);
62                 
63                 string chimeraFlag = maligner->getResults(query, decalc);
64                 vector<results> Results = maligner->getOutput();
65
66                 //realign query to parents to improve slayers detection rate
67                 ChimeraReAligner realigner(templateSeqs, match, misMatch);
68                 realigner.reAlign(query, Results);
69 cout << query->getName() << '\n' << query->getAligned() << endl;
70                         //if (chimeraFlag == "yes") {
71                         
72                 //get sequence that were given from maligner results
73                 vector<SeqDist> seqs;
74                 map<string, float> removeDups;
75                 map<string, float>::iterator itDup;
76                 for (int j = 0; j < Results.size(); j++) {
77                         float dist = (Results[j].regionEnd - Results[j].regionStart + 1) * Results[j].queryToParentLocal;
78                         //only add if you are not a duplicate
79                         itDup = removeDups.find(Results[j].parent);
80                         if (itDup == removeDups.end()) { //this is not duplicate
81                                 removeDups[Results[j].parent] = dist;
82                         }else if (dist > itDup->second) { //is this a stronger number for this parent
83                                 removeDups[Results[j].parent] = dist;
84                         }
85                 }
86                 
87                 for (itDup = removeDups.begin(); itDup != removeDups.end(); itDup++) {
88                         Sequence* seq = getSequence(itDup->first); //makes copy so you can filter and mask and not effect template
89                         
90                         SeqDist member;
91                         member.seq = seq;
92                         member.dist = itDup->second;
93                         
94                         seqs.push_back(member);
95                 }
96                 
97                 //limit number of parents to explore - default 3
98                 if (Results.size() > parents) {
99                         //sort by distance
100                         sort(seqs.begin(), seqs.end(), compareSeqDist);
101                         //prioritize larger more similiar sequence fragments
102                         reverse(seqs.begin(), seqs.end());
103                         
104                         for (int k = seqs.size()-1; k > (parents-1); k--)  {  
105                                 delete seqs[k].seq;
106                                 seqs.pop_back();        
107                         }
108                 }
109                 
110                 //put seqs into vector to send to slayer
111                 vector<Sequence*> seqsForSlayer;
112                 for (int k = 0; k < seqs.size(); k++) {  seqsForSlayer.push_back(seqs[k].seq);  }
113                 //cout << i+1 << "num parents = " << seqsForSlayer.size() << '\t' << chimeraFlag << endl;
114 //ofstream out;
115 //string name = querySeqs[i]->getName();
116 //cout << name << endl;
117 //name = name.substr(name.find_first_of("|")+1);
118 //cout << name << endl;
119 //name = name.substr(name.find_first_of("|")+1);
120 //cout << name << endl;
121 //name = name.substr(0, name.find_last_of("|"));
122 //cout << name << endl;
123 //string filename = toString(i+1) + ".seqsforslayer";
124 //openOutputFile(filename, out);        
125 //cout << querySeqs[i]->getName() << endl;
126 //for (int u = 0; u < seqsForSlayer.size(); u++) { cout << seqsForSlayer[u]->getName() << '\t'; seqsForSlayer[u]->printSequence(out);   }
127 //cout << endl;
128 //out.close();
129 //filename = toString(i+1) + ".fasta";
130 //openOutputFile(filename, out);        
131 //querySeqs[i]->printSequence(out);
132 //out.close();
133
134
135                         
136                 //mask then send to slayer...
137                 if (seqMask != "") {
138                         decalc->setMask(seqMask);
139                         
140                         //mask querys
141                         decalc->runMask(query);
142                         
143                         //mask parents
144                         for (int k = 0; k < seqsForSlayer.size(); k++) {
145                                 decalc->runMask(seqsForSlayer[k]);
146                         }
147                         
148                         spotMap = decalc->getMaskMap();
149                 }
150                 
151                 //send to slayer
152                 chimeraFlags = slayer->getResults(query, seqsForSlayer);
153                 chimeraResults = slayer->getOutput();
154         
155                 //free memory
156                 for (int k = 0; k < seqs.size(); k++) {  delete seqs[k].seq;   }
157                 //}
158                         
159                 return 0;
160         }
161         catch(exception& e) {
162                 errorOut(e, "ChimeraSlayer", "getChimeras");
163                 exit(1);
164         }
165 }
166 //***************************************************************************************************************
167 void ChimeraSlayer::printBlock(data_struct data, ostream& out){
168         try {
169                 
170                 out << "parentA: " << data.parentA.getName() << "  parentB: " << data.parentB.getName()  << endl;
171                 out << "Left Window: " << spotMap[data.winLStart] << " " << spotMap[data.winLEnd] << endl;
172                 out << "Right Window: " << spotMap[data.winRStart] << " " << spotMap[data.winREnd] << endl;
173                 
174                 out << "Divergence of Query to Leftside ParentA and Rightside ParentB: " << data.divr_qla_qrb << '\t' << "Bootstrap: " << data.bsa << endl;
175                 out << "Divergence of Query to Rightside ParentA and Leftside ParentB: " << data.divr_qlb_qra << '\t' << "Bootstrap: " << data.bsb << endl;
176                 
177                 out << "Similarity of parents: " << data.ab << endl;
178                 out << "Similarity of query to parentA: " << data.qa << endl;
179                 out << "Similarity of query to parentB: " << data.qb << endl;
180                 
181                 out << "Percent_ID QLA_QRB: " << data.qla_qrb << endl;
182                 out << "Percent_ID QLB_QRA: " << data.qlb_qra << endl;
183                 //out << "Per_id(QL,A): " << data.qla << endl;
184                 //out << "Per_id(QL,B): " << data.qlb << endl;
185                 //out << "Per_id(QR,A): " << data.qra << endl;
186                 //out << "Per_id(QR,B): " << data.qrb << endl;
187
188                 
189                 out << "DeltaL: " << (data.qla - data.qlb) << endl;
190                 out << "DeltaR: " << (data.qra - data.qrb) << endl;
191
192         }
193         catch(exception& e) {
194                 errorOut(e, "ChimeraSlayer", "printBlock");
195                 exit(1);
196         }
197 }
198 //***************************************************************************************************************
199