]> git.donarmstrong.com Git - mothur.git/blob - chimeraslayer.cpp
added warning about merging with something above cutoff to cluster. working on chimeras
[mothur.git] / chimeraslayer.cpp
1 /*
2  *  chimeraslayer.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 9/25/09.
6  *  Copyright 2009 Pschloss Lab. All rights reserved.
7  *
8  */
9
10 #include "chimeraslayer.h"
11 #include "chimerarealigner.h"
12 #include "kmerdb.hpp"
13
14 //***************************************************************************************************************
15 ChimeraSlayer::ChimeraSlayer(string mode, bool r, string f) : searchMethod(mode), realign(r), fastafile(f) {    
16         decalc = new DeCalculator();    
17 }
18 //***************************************************************************************************************
19 void ChimeraSlayer::doPrep() {
20         try {
21         
22                 string  kmerDBNameLeft;
23                 string  kmerDBNameRight;
24                 
25                 //generate the kmerdb to pass to maligner
26                 if (searchMethod == "kmer") { 
27                         //leftside
28                         string leftTemplateFileName = "left." + templateFileName;
29                         databaseLeft = new KmerDB(leftTemplateFileName, kmerSize);                      
30                         kmerDBNameLeft = leftTemplateFileName.substr(0,leftTemplateFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
31                         ifstream kmerFileTestLeft(kmerDBNameLeft.c_str());
32                         
33                         if(!kmerFileTestLeft){  
34                         
35                                 for (int i = 0; i < templateSeqs.size(); i++) {
36                                         string leftFrag = templateSeqs[i]->getUnaligned();
37                                         leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
38                                         
39                                         Sequence leftTemp(templateSeqs[i]->getName(), leftFrag);
40                                         databaseLeft->addSequence(leftTemp);    
41                                 }
42                                 databaseLeft->generateDB();
43                                 
44                         }else { 
45                                 databaseLeft->readKmerDB(kmerFileTestLeft);     
46                         }
47                         kmerFileTestLeft.close();
48                         
49                         databaseLeft->setNumSeqs(templateSeqs.size());
50                         
51                         //rightside
52                         string rightTemplateFileName = "right." + templateFileName;
53                         databaseRight = new KmerDB(rightTemplateFileName, kmerSize);                    
54                         kmerDBNameRight = rightTemplateFileName.substr(0,rightTemplateFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
55                         ifstream kmerFileTestRight(kmerDBNameRight.c_str());
56                         
57                         if(!kmerFileTestRight){ 
58                         
59                                 for (int i = 0; i < templateSeqs.size(); i++) {
60                                         string rightFrag = templateSeqs[i]->getUnaligned();
61                                         rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
62                                         
63                                         Sequence rightTemp(templateSeqs[i]->getName(), rightFrag);
64                                         databaseRight->addSequence(rightTemp);  
65                                 }
66                                 databaseRight->generateDB();
67                                 
68                         }else { 
69                                 databaseRight->readKmerDB(kmerFileTestRight);   
70                         }
71                         kmerFileTestRight.close();
72                         
73                         databaseRight->setNumSeqs(templateSeqs.size());
74
75                 }
76                 
77                 int start = time(NULL); 
78                 //filter the sequences
79                 //read in all query seqs
80                 ifstream in; 
81                 openInputFile(fastafile, in);
82                 
83                 vector<Sequence*> tempQuerySeqs;
84                 while(!in.eof()){
85                         Sequence* s = new Sequence(in);
86                         gobble(in);
87                         
88                         if (s->getName() != "") { tempQuerySeqs.push_back(s); }
89                 }
90                 in.close();
91                 
92                 vector<Sequence*> temp = templateSeqs;
93                 for (int i = 0; i < tempQuerySeqs.size(); i++) {  temp.push_back(tempQuerySeqs[i]);  }
94                                 
95                 createFilter(temp, 0.0); //just removed columns where all seqs have a gap
96                                 
97                 for (int i = 0; i < tempQuerySeqs.size(); i++) { delete tempQuerySeqs[i];  }
98                 
99                 //run filter on template
100                 for (int i = 0; i < templateSeqs.size(); i++) {  runFilter(templateSeqs[i]);  }
101                 
102                 mothurOutEndLine(); mothurOut("It took " + toString(time(NULL) - start) + " secs to filter.");  mothurOutEndLine();
103
104         }
105         catch(exception& e) {
106                 errorOut(e, "ChimeraSlayer", "doprep");
107                 exit(1);
108         }
109 }
110 //***************************************************************************************************************
111 ChimeraSlayer::~ChimeraSlayer() {       delete decalc;  if (searchMethod == "kmer") {  delete databaseRight;  delete databaseLeft;  }    }
112 //***************************************************************************************************************
113 void ChimeraSlayer::printHeader(ostream& out) {
114         mothurOutEndLine();
115         mothurOut("Only reporting sequence supported by " + toString(minBS) + "% of bootstrapped results.");
116         mothurOutEndLine();
117         
118         out << "Name\tLeftParent\tRightParent\tDivQLAQRB\tPerIDQLAQRB\tBootStrapA\tDivQLBQRA\tPerIDQLBQRA\tBootStrapB\tFlag\tLeftWindow\tRightWindow\n";
119 }
120 //***************************************************************************************************************
121 void ChimeraSlayer::print(ostream& out, ostream& outAcc) {
122         try {
123                 if (chimeraFlags == "yes") {
124                         string chimeraFlag = "no";
125                         if(  (chimeraResults[0].bsa >= minBS && chimeraResults[0].divr_qla_qrb >= divR)
126                            ||
127                            (chimeraResults[0].bsb >= minBS && chimeraResults[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
128                         
129                         
130                         if (chimeraFlag == "yes") {     
131                                 if ((chimeraResults[0].bsa >= minBS) || (chimeraResults[0].bsb >= minBS)) {
132                                         mothurOut(querySeq->getName() + "\tyes"); mothurOutEndLine();
133                                         outAcc << querySeq->getName() << endl;
134                                 }
135                         }
136                         
137                         printBlock(chimeraResults[0], out);
138                         out << endl;
139                 }else {  out << querySeq->getName() << "\tno" << endl;  }
140                 
141         }
142         catch(exception& e) {
143                 errorOut(e, "ChimeraSlayer", "print");
144                 exit(1);
145         }
146 }
147 //***************************************************************************************************************
148 int ChimeraSlayer::getChimeras(Sequence* query) {
149         try {
150                 chimeraFlags = "no";
151                 
152                 //filter query
153                 spotMap = runFilter(query);
154                 
155                 querySeq = query;
156                 
157                 //referenceSeqs, numWanted, matchScore, misMatchPenalty, divR, minSimilarity
158                 maligner = new Maligner(templateSeqs, numWanted, match, misMatch, divR, minSim, minCov, searchMethod, databaseLeft, databaseRight);
159                 slayer = new Slayer(window, increment, minSim, divR, iters, minSNP);
160                 
161                 string chimeraFlag = maligner->getResults(query, decalc);
162                 vector<results> Results = maligner->getOutput();
163                                 
164                 //found in testing realigning only made things worse
165                 if (realign) {
166                         ChimeraReAligner realigner(templateSeqs, match, misMatch);
167                         realigner.reAlign(query, Results);
168                 }
169
170                 if (chimeraFlag == "yes") {
171                         
172                         //get sequence that were given from maligner results
173                         vector<SeqDist> seqs;
174                         map<string, float> removeDups;
175                         map<string, float>::iterator itDup;
176                         map<string, string> parentNameSeq;
177                         map<string, string>::iterator itSeq;
178                         for (int j = 0; j < Results.size(); j++) {
179                                 float dist = (Results[j].regionEnd - Results[j].regionStart + 1) * Results[j].queryToParentLocal;
180                                 //only add if you are not a duplicate
181                                 itDup = removeDups.find(Results[j].parent);
182                                 if (itDup == removeDups.end()) { //this is not duplicate
183                                         removeDups[Results[j].parent] = dist;
184                                         parentNameSeq[Results[j].parent] = Results[j].parentAligned;
185                                 }else if (dist > itDup->second) { //is this a stronger number for this parent
186                                         removeDups[Results[j].parent] = dist;
187                                         parentNameSeq[Results[j].parent] = Results[j].parentAligned;
188                                 }
189                         }
190                         
191                         for (itDup = removeDups.begin(); itDup != removeDups.end(); itDup++) {
192                                 //Sequence* seq = getSequence(itDup->first); //makes copy so you can filter and mask and not effect template
193                                 itSeq = parentNameSeq.find(itDup->first);
194 //cout << itDup->first << itSeq->second << endl;
195                                 Sequence* seq = new Sequence(itDup->first, itSeq->second);
196                                 
197                                 SeqDist member;
198                                 member.seq = seq;
199                                 member.dist = itDup->second;
200                                 
201                                 seqs.push_back(member);
202                         }
203                         
204                         //limit number of parents to explore - default 3
205                         if (Results.size() > parents) {
206                                 //sort by distance
207                                 sort(seqs.begin(), seqs.end(), compareSeqDist);
208                                 //prioritize larger more similiar sequence fragments
209                                 reverse(seqs.begin(), seqs.end());
210                                 
211                                 for (int k = seqs.size()-1; k > (parents-1); k--)  {  
212                                         delete seqs[k].seq;
213                                         seqs.pop_back();        
214                                 }
215                         }
216                         
217                         //put seqs into vector to send to slayer
218                         vector<Sequence*> seqsForSlayer;
219                         for (int k = 0; k < seqs.size(); k++) {  seqsForSlayer.push_back(seqs[k].seq);  }
220                         
221                         //mask then send to slayer...
222                         if (seqMask != "") {
223                                 decalc->setMask(seqMask);
224                                 
225                                 //mask querys
226                                 decalc->runMask(query);
227                                 
228                                 //mask parents
229                                 for (int k = 0; k < seqsForSlayer.size(); k++) {
230                                         decalc->runMask(seqsForSlayer[k]);
231                                 }
232                                 
233                                 spotMap = decalc->getMaskMap();
234                         }
235                         
236                         //send to slayer
237                         chimeraFlags = slayer->getResults(query, seqsForSlayer);
238                         chimeraResults = slayer->getOutput();
239                         
240                         //free memory
241                         for (int k = 0; k < seqs.size(); k++) {  delete seqs[k].seq;   }
242                 }
243                 
244                 delete maligner;
245                 delete slayer;
246                 
247                 return 0;
248         }
249         catch(exception& e) {
250                 errorOut(e, "ChimeraSlayer", "getChimeras");
251                 exit(1);
252         }
253 }
254 //***************************************************************************************************************
255 void ChimeraSlayer::printBlock(data_struct data, ostream& out){
256         try {
257         //out << "Name\tParentA\tParentB\tDivQLAQRB\tPerIDQLAQRB\tBootStrapA\tDivQLBQRA\tPerIDQLBQRA\tBootStrapB\tFlag\tLeftWindow\tRightWindow\n";
258                 
259                 out << querySeq->getName() << '\t';
260                 out << data.parentA.getName() << "\t" << data.parentB.getName()  << '\t';
261                 //out << "Left Window: " << spotMap[data.winLStart] << " " << spotMap[data.winLEnd] << endl;
262                 //out << "Right Window: " << spotMap[data.winRStart] << " " << spotMap[data.winREnd] << endl;
263                 
264                 out << data.divr_qla_qrb << '\t' << data.qla_qrb << '\t' << data.bsa << '\t';
265                 out << data.divr_qlb_qra << '\t' << data.qlb_qra << '\t' << data.bsb << '\t';
266                 
267                 out << "yes\t" << spotMap[data.winLStart] << "-" << spotMap[data.winLEnd] << '\t' << spotMap[data.winRStart] << "-" << spotMap[data.winREnd] << '\t';
268                 
269                 //out << "Similarity of parents: " << data.ab << endl;
270                 //out << "Similarity of query to parentA: " << data.qa << endl;
271                 //out << "Similarity of query to parentB: " << data.qb << endl;
272                 
273                 
274                 //out << "Per_id(QL,A): " << data.qla << endl;
275                 //out << "Per_id(QL,B): " << data.qlb << endl;
276                 //out << "Per_id(QR,A): " << data.qra << endl;
277                 //out << "Per_id(QR,B): " << data.qrb << endl;
278
279                 
280                 //out << "DeltaL: " << (data.qla - data.qlb) << endl;
281                 //out << "DeltaR: " << (data.qra - data.qrb) << endl;
282
283         }
284         catch(exception& e) {
285                 errorOut(e, "ChimeraSlayer", "printBlock");
286                 exit(1);
287         }
288 }
289 //***************************************************************************************************************
290