]> git.donarmstrong.com Git - mothur.git/blob - chimeraslayer.cpp
chimeras, fix to sabundvector and sharedsabundvector that caused getRabundVector...
[mothur.git] / chimeraslayer.cpp
1 /*
2  *  chimeraslayer.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 9/25/09.
6  *  Copyright 2009 Pschloss Lab. All rights reserved.
7  *
8  */
9
10 #include "chimeraslayer.h"
11 #include "chimerarealigner.h"
12 #include "kmerdb.hpp"
13
14 //***************************************************************************************************************
15 ChimeraSlayer::ChimeraSlayer(string mode, bool r, string f) : searchMethod(mode), realign(r), fastafile(f) {    
16         decalc = new DeCalculator();    
17 }
18 //***************************************************************************************************************
19 void ChimeraSlayer::doPrep() {
20         try {
21         
22                 string  kmerDBNameLeft;
23                 string  kmerDBNameRight;
24                 
25                 //generate the kmerdb to pass to maligner
26                 if (searchMethod == "kmer") { 
27                         //leftside
28                         string leftTemplateFileName = "left." + templateFileName;
29                         databaseLeft = new KmerDB(leftTemplateFileName, kmerSize);                      
30                         kmerDBNameLeft = leftTemplateFileName.substr(0,leftTemplateFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
31                         ifstream kmerFileTestLeft(kmerDBNameLeft.c_str());
32                         
33                         if(!kmerFileTestLeft){  
34                         
35                                 for (int i = 0; i < templateSeqs.size(); i++) {
36                                         string leftFrag = templateSeqs[i]->getUnaligned();
37                                         leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
38                                         
39                                         Sequence leftTemp(templateSeqs[i]->getName(), leftFrag);
40                                         databaseLeft->addSequence(leftTemp);    
41                                 }
42                                 databaseLeft->generateDB();
43                                 
44                         }else { 
45                                 databaseLeft->readKmerDB(kmerFileTestLeft);     
46                         }
47                         kmerFileTestLeft.close();
48                         
49                         databaseLeft->setNumSeqs(templateSeqs.size());
50                         
51                         //rightside
52                         string rightTemplateFileName = "right." + templateFileName;
53                         databaseRight = new KmerDB(rightTemplateFileName, kmerSize);                    
54                         kmerDBNameRight = rightTemplateFileName.substr(0,rightTemplateFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
55                         ifstream kmerFileTestRight(kmerDBNameRight.c_str());
56                         
57                         if(!kmerFileTestRight){ 
58                         
59                                 for (int i = 0; i < templateSeqs.size(); i++) {
60                                         string rightFrag = templateSeqs[i]->getUnaligned();
61                                         rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
62                                         
63                                         Sequence rightTemp(templateSeqs[i]->getName(), rightFrag);
64                                         databaseRight->addSequence(rightTemp);  
65                                 }
66                                 databaseRight->generateDB();
67                                 
68                         }else { 
69                                 databaseRight->readKmerDB(kmerFileTestRight);   
70                         }
71                         kmerFileTestRight.close();
72                         
73                         databaseRight->setNumSeqs(templateSeqs.size());
74
75                 }
76                 
77                 int start = time(NULL); 
78                 //filter the sequences
79                 //read in all query seqs
80                 ifstream in; 
81                 openInputFile(fastafile, in);
82                 
83                 vector<Sequence*> tempQuerySeqs;
84                 while(!in.eof()){
85                         Sequence* s = new Sequence(in);
86                         gobble(in);
87                         
88                         if (s->getName() != "") { tempQuerySeqs.push_back(s); }
89                 }
90                 in.close();
91                 
92                 vector<Sequence*> temp = templateSeqs;
93                 for (int i = 0; i < tempQuerySeqs.size(); i++) {  temp.push_back(tempQuerySeqs[i]);  }
94                                 
95                 createFilter(temp, 0.0); //just removed columns where all seqs have a gap
96                                 
97                 for (int i = 0; i < tempQuerySeqs.size(); i++) { delete tempQuerySeqs[i];  }
98                 
99                 //run filter on template
100                 for (int i = 0; i < templateSeqs.size(); i++) {  runFilter(templateSeqs[i]);  }
101                 
102                 mothurOutEndLine(); mothurOut("It took " + toString(time(NULL) - start) + " secs to filter.");  mothurOutEndLine();
103
104         }
105         catch(exception& e) {
106                 errorOut(e, "ChimeraSlayer", "doprep");
107                 exit(1);
108         }
109 }
110 //***************************************************************************************************************
111 ChimeraSlayer::~ChimeraSlayer() {       delete decalc;  if (searchMethod == "kmer") {  delete databaseRight;  delete databaseLeft;  }    }
112 //***************************************************************************************************************
113 void ChimeraSlayer::printHeader(ostream& out) {
114         mothurOutEndLine();
115         mothurOut("Only reporting sequence supported by " + toString(minBS) + "% of bootstrapped results.");
116         mothurOutEndLine();
117         
118         out << "Name\tLeftParent\tRightParent\tDivQLAQRB\tPerIDQLAQRB\tBootStrapA\tDivQLBQRA\tPerIDQLBQRA\tBootStrapB\tFlag\tLeftWindow\tRightWindow\n";
119 }
120 //***************************************************************************************************************
121 void ChimeraSlayer::print(ostream& out) {
122         try {
123                 if (chimeraFlags == "yes") {
124                         string chimeraFlag = "no";
125                         if(  (chimeraResults[0].bsa >= minBS && chimeraResults[0].divr_qla_qrb >= divR)
126                            ||
127                            (chimeraResults[0].bsb >= minBS && chimeraResults[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
128                         
129                         
130                         if (chimeraFlag == "yes") {     
131                                 if ((chimeraResults[0].bsa >= minBS) || (chimeraResults[0].bsb >= minBS)) {
132                                         mothurOut(querySeq->getName() + "\tyes"); mothurOutEndLine();
133                                 }
134                         }
135                         
136                         printBlock(chimeraResults[0], out);
137                         out << endl;
138                 }else {  out << querySeq->getName() << "\tno" << endl;  }
139                 
140         }
141         catch(exception& e) {
142                 errorOut(e, "ChimeraSlayer", "print");
143                 exit(1);
144         }
145 }
146 //***************************************************************************************************************
147 int ChimeraSlayer::getChimeras(Sequence* query) {
148         try {
149                 chimeraFlags = "no";
150                 
151                 //filter query
152                 spotMap = runFilter(query);
153                 
154                 querySeq = query;
155                 
156                 //referenceSeqs, numWanted, matchScore, misMatchPenalty, divR, minSimilarity
157                 maligner = new Maligner(templateSeqs, numWanted, match, misMatch, divR, minSim, minCov, searchMethod, databaseLeft, databaseRight);
158                 slayer = new Slayer(window, increment, minSim, divR, iters, minSNP);
159                 
160                 string chimeraFlag = maligner->getResults(query, decalc);
161                 vector<results> Results = maligner->getOutput();
162                                 
163                 //found in testing realigning only made things worse
164                 if (realign) {
165                         ChimeraReAligner realigner(templateSeqs, match, misMatch);
166                         realigner.reAlign(query, Results);
167                 }
168
169                 if (chimeraFlag == "yes") {
170                         
171                         //get sequence that were given from maligner results
172                         vector<SeqDist> seqs;
173                         map<string, float> removeDups;
174                         map<string, float>::iterator itDup;
175                         map<string, string> parentNameSeq;
176                         map<string, string>::iterator itSeq;
177                         for (int j = 0; j < Results.size(); j++) {
178                                 float dist = (Results[j].regionEnd - Results[j].regionStart + 1) * Results[j].queryToParentLocal;
179                                 //only add if you are not a duplicate
180                                 itDup = removeDups.find(Results[j].parent);
181                                 if (itDup == removeDups.end()) { //this is not duplicate
182                                         removeDups[Results[j].parent] = dist;
183                                         parentNameSeq[Results[j].parent] = Results[j].parentAligned;
184                                 }else if (dist > itDup->second) { //is this a stronger number for this parent
185                                         removeDups[Results[j].parent] = dist;
186                                         parentNameSeq[Results[j].parent] = Results[j].parentAligned;
187                                 }
188                         }
189                         
190                         for (itDup = removeDups.begin(); itDup != removeDups.end(); itDup++) {
191                                 //Sequence* seq = getSequence(itDup->first); //makes copy so you can filter and mask and not effect template
192                                 itSeq = parentNameSeq.find(itDup->first);
193 //cout << itDup->first << itSeq->second << endl;
194                                 Sequence* seq = new Sequence(itDup->first, itSeq->second);
195                                 
196                                 SeqDist member;
197                                 member.seq = seq;
198                                 member.dist = itDup->second;
199                                 
200                                 seqs.push_back(member);
201                         }
202                         
203                         //limit number of parents to explore - default 3
204                         if (Results.size() > parents) {
205                                 //sort by distance
206                                 sort(seqs.begin(), seqs.end(), compareSeqDist);
207                                 //prioritize larger more similiar sequence fragments
208                                 reverse(seqs.begin(), seqs.end());
209                                 
210                                 for (int k = seqs.size()-1; k > (parents-1); k--)  {  
211                                         delete seqs[k].seq;
212                                         seqs.pop_back();        
213                                 }
214                         }
215                         
216                         //put seqs into vector to send to slayer
217                         vector<Sequence*> seqsForSlayer;
218                         for (int k = 0; k < seqs.size(); k++) {  seqsForSlayer.push_back(seqs[k].seq);  }
219                         
220                         //mask then send to slayer...
221                         if (seqMask != "") {
222                                 decalc->setMask(seqMask);
223                                 
224                                 //mask querys
225                                 decalc->runMask(query);
226                                 
227                                 //mask parents
228                                 for (int k = 0; k < seqsForSlayer.size(); k++) {
229                                         decalc->runMask(seqsForSlayer[k]);
230                                 }
231                                 
232                                 spotMap = decalc->getMaskMap();
233                         }
234                         
235                         //send to slayer
236                         chimeraFlags = slayer->getResults(query, seqsForSlayer);
237                         chimeraResults = slayer->getOutput();
238                         
239                         //free memory
240                         for (int k = 0; k < seqs.size(); k++) {  delete seqs[k].seq;   }
241                 }
242                 
243                 delete maligner;
244                 delete slayer;
245                 
246                 return 0;
247         }
248         catch(exception& e) {
249                 errorOut(e, "ChimeraSlayer", "getChimeras");
250                 exit(1);
251         }
252 }
253 //***************************************************************************************************************
254 void ChimeraSlayer::printBlock(data_struct data, ostream& out){
255         try {
256         //out << "Name\tParentA\tParentB\tDivQLAQRB\tPerIDQLAQRB\tBootStrapA\tDivQLBQRA\tPerIDQLBQRA\tBootStrapB\tFlag\tLeftWindow\tRightWindow\n";
257                 
258                 out << querySeq->getName() << '\t';
259                 out << data.parentA.getName() << "\t" << data.parentB.getName()  << '\t';
260                 //out << "Left Window: " << spotMap[data.winLStart] << " " << spotMap[data.winLEnd] << endl;
261                 //out << "Right Window: " << spotMap[data.winRStart] << " " << spotMap[data.winREnd] << endl;
262                 
263                 out << data.divr_qla_qrb << '\t' << data.qla_qrb << '\t' << data.bsa << '\t';
264                 out << data.divr_qlb_qra << '\t' << data.qlb_qra << '\t' << data.bsb << '\t';
265                 
266                 out << "yes\t" << spotMap[data.winLStart] << "-" << spotMap[data.winLEnd] << '\t' << spotMap[data.winRStart] << "-" << spotMap[data.winREnd] << '\t';
267                 
268                 //out << "Similarity of parents: " << data.ab << endl;
269                 //out << "Similarity of query to parentA: " << data.qa << endl;
270                 //out << "Similarity of query to parentB: " << data.qb << endl;
271                 
272                 
273                 //out << "Per_id(QL,A): " << data.qla << endl;
274                 //out << "Per_id(QL,B): " << data.qlb << endl;
275                 //out << "Per_id(QR,A): " << data.qra << endl;
276                 //out << "Per_id(QR,B): " << data.qrb << endl;
277
278                 
279                 //out << "DeltaL: " << (data.qla - data.qlb) << endl;
280                 //out << "DeltaR: " << (data.qra - data.qrb) << endl;
281
282         }
283         catch(exception& e) {
284                 errorOut(e, "ChimeraSlayer", "printBlock");
285                 exit(1);
286         }
287 }
288 //***************************************************************************************************************
289