]> git.donarmstrong.com Git - mothur.git/blob - chimeraslayer.cpp
added checks for ^C to quit command instead of program
[mothur.git] / chimeraslayer.cpp
1 /*
2  *  chimeraslayer.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 9/25/09.
6  *  Copyright 2009 Pschloss Lab. All rights reserved.
7  *
8  */
9
10 #include "chimeraslayer.h"
11 #include "chimerarealigner.h"
12 #include "kmerdb.hpp"
13
14 //***************************************************************************************************************
15 ChimeraSlayer::ChimeraSlayer(string mode, bool r, string f) : searchMethod(mode), realign(r), fastafile(f) {    
16         decalc = new DeCalculator();    
17 }
18 //***************************************************************************************************************
19 int ChimeraSlayer::doPrep() {
20         try {
21         
22                 string  kmerDBNameLeft;
23                 string  kmerDBNameRight;
24                 
25                 //generate the kmerdb to pass to maligner
26                 if (searchMethod == "kmer") { 
27                         //leftside
28                         string leftTemplateFileName = "left." + templateFileName;
29                         databaseLeft = new KmerDB(leftTemplateFileName, kmerSize);                      
30                         kmerDBNameLeft = leftTemplateFileName.substr(0,leftTemplateFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
31                         ifstream kmerFileTestLeft(kmerDBNameLeft.c_str());
32                         
33                         if(!kmerFileTestLeft){  
34                         
35                                 for (int i = 0; i < templateSeqs.size(); i++) {
36                                         
37                                         if (m->control_pressed) { return 0; } 
38                                         
39                                         string leftFrag = templateSeqs[i]->getUnaligned();
40                                         leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
41                                         
42                                         Sequence leftTemp(templateSeqs[i]->getName(), leftFrag);
43                                         databaseLeft->addSequence(leftTemp);    
44                                 }
45                                 databaseLeft->generateDB();
46                                 
47                         }else { 
48                                 databaseLeft->readKmerDB(kmerFileTestLeft);     
49                         }
50                         kmerFileTestLeft.close();
51                         
52                         databaseLeft->setNumSeqs(templateSeqs.size());
53                         
54                         //rightside
55                         string rightTemplateFileName = "right." + templateFileName;
56                         databaseRight = new KmerDB(rightTemplateFileName, kmerSize);                    
57                         kmerDBNameRight = rightTemplateFileName.substr(0,rightTemplateFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
58                         ifstream kmerFileTestRight(kmerDBNameRight.c_str());
59                         
60                         if(!kmerFileTestRight){ 
61                         
62                                 for (int i = 0; i < templateSeqs.size(); i++) {
63                                         if (m->control_pressed) { return 0; } 
64                                         
65                                         string rightFrag = templateSeqs[i]->getUnaligned();
66                                         rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
67                                         
68                                         Sequence rightTemp(templateSeqs[i]->getName(), rightFrag);
69                                         databaseRight->addSequence(rightTemp);  
70                                 }
71                                 databaseRight->generateDB();
72                                 
73                         }else { 
74                                 databaseRight->readKmerDB(kmerFileTestRight);   
75                         }
76                         kmerFileTestRight.close();
77                         
78                         databaseRight->setNumSeqs(templateSeqs.size());
79
80                 }
81                 
82                 int start = time(NULL); 
83                 //filter the sequences
84                 //read in all query seqs
85                 ifstream in; 
86                 openInputFile(fastafile, in);
87                 
88                 vector<Sequence*> tempQuerySeqs;
89                 while(!in.eof()){
90                         if (m->control_pressed) { for (int i = 0; i < tempQuerySeqs.size(); i++) { delete tempQuerySeqs[i];  } return 0; } 
91                 
92                         Sequence* s = new Sequence(in);
93                         gobble(in);
94                         
95                         if (s->getName() != "") { tempQuerySeqs.push_back(s); }
96                 }
97                 in.close();
98                 
99                 vector<Sequence*> temp = templateSeqs;
100                 for (int i = 0; i < tempQuerySeqs.size(); i++) {  temp.push_back(tempQuerySeqs[i]);  }
101                                 
102                 createFilter(temp, 0.0); //just removed columns where all seqs have a gap
103                                 
104                 for (int i = 0; i < tempQuerySeqs.size(); i++) { delete tempQuerySeqs[i];  }
105                 
106                 if (m->control_pressed) {  return 0; } 
107
108                 
109                 //run filter on template
110                 for (int i = 0; i < templateSeqs.size(); i++) {  if (m->control_pressed) {  return 0; }  runFilter(templateSeqs[i]);  }
111                 
112                 m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to filter.");    m->mothurOutEndLine();
113                 
114                 return 0;
115
116         }
117         catch(exception& e) {
118                 m->errorOut(e, "ChimeraSlayer", "doprep");
119                 exit(1);
120         }
121 }
122 //***************************************************************************************************************
123 ChimeraSlayer::~ChimeraSlayer() {       delete decalc;  if (searchMethod == "kmer") {  delete databaseRight;  delete databaseLeft;  }    }
124 //***************************************************************************************************************
125 void ChimeraSlayer::printHeader(ostream& out) {
126         m->mothurOutEndLine();
127         m->mothurOut("Only reporting sequence supported by " + toString(minBS) + "% of bootstrapped results.");
128         m->mothurOutEndLine();
129         
130         out << "Name\tLeftParent\tRightParent\tDivQLAQRB\tPerIDQLAQRB\tBootStrapA\tDivQLBQRA\tPerIDQLBQRA\tBootStrapB\tFlag\tLeftWindow\tRightWindow\n";
131 }
132 //***************************************************************************************************************
133 int ChimeraSlayer::print(ostream& out, ostream& outAcc) {
134         try {
135                 if (chimeraFlags == "yes") {
136                         string chimeraFlag = "no";
137                         if(  (chimeraResults[0].bsa >= minBS && chimeraResults[0].divr_qla_qrb >= divR)
138                            ||
139                            (chimeraResults[0].bsb >= minBS && chimeraResults[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
140                         
141                         
142                         if (chimeraFlag == "yes") {     
143                                 if ((chimeraResults[0].bsa >= minBS) || (chimeraResults[0].bsb >= minBS)) {
144                                         m->mothurOut(querySeq->getName() + "\tyes"); m->mothurOutEndLine();
145                                         outAcc << querySeq->getName() << endl;
146                                 }
147                         }
148                         
149                         printBlock(chimeraResults[0], out);
150                         out << endl;
151                 }else {  out << querySeq->getName() << "\tno" << endl;  }
152                 
153                 return 0;
154                 
155         }
156         catch(exception& e) {
157                 m->errorOut(e, "ChimeraSlayer", "print");
158                 exit(1);
159         }
160 }
161 //***************************************************************************************************************
162 int ChimeraSlayer::getChimeras(Sequence* query) {
163         try {
164                 chimeraFlags = "no";
165                 
166                 //filter query
167                 spotMap = runFilter(query);
168                 
169                 querySeq = query;
170                 
171                 //referenceSeqs, numWanted, matchScore, misMatchPenalty, divR, minSimilarity
172                 maligner = new Maligner(templateSeqs, numWanted, match, misMatch, divR, minSim, minCov, searchMethod, databaseLeft, databaseRight);
173                 slayer = new Slayer(window, increment, minSim, divR, iters, minSNP);
174                 
175                 if (m->control_pressed) {  return 0;  }
176                 
177                 string chimeraFlag = maligner->getResults(query, decalc);
178                 if (m->control_pressed) {  return 0;  }
179                 vector<results> Results = maligner->getOutput();
180                                 
181                 //found in testing realigning only made things worse
182                 if (realign) {
183                         ChimeraReAligner realigner(templateSeqs, match, misMatch);
184                         realigner.reAlign(query, Results);
185                 }
186
187                 if (chimeraFlag == "yes") {
188                         
189                         //get sequence that were given from maligner results
190                         vector<SeqDist> seqs;
191                         map<string, float> removeDups;
192                         map<string, float>::iterator itDup;
193                         map<string, string> parentNameSeq;
194                         map<string, string>::iterator itSeq;
195                         for (int j = 0; j < Results.size(); j++) {
196                                 float dist = (Results[j].regionEnd - Results[j].regionStart + 1) * Results[j].queryToParentLocal;
197                                 //only add if you are not a duplicate
198                                 itDup = removeDups.find(Results[j].parent);
199                                 if (itDup == removeDups.end()) { //this is not duplicate
200                                         removeDups[Results[j].parent] = dist;
201                                         parentNameSeq[Results[j].parent] = Results[j].parentAligned;
202                                 }else if (dist > itDup->second) { //is this a stronger number for this parent
203                                         removeDups[Results[j].parent] = dist;
204                                         parentNameSeq[Results[j].parent] = Results[j].parentAligned;
205                                 }
206                         }
207                         
208                         for (itDup = removeDups.begin(); itDup != removeDups.end(); itDup++) {
209                                 //Sequence* seq = getSequence(itDup->first); //makes copy so you can filter and mask and not effect template
210                                 itSeq = parentNameSeq.find(itDup->first);
211 //cout << itDup->first << itSeq->second << endl;
212                                 Sequence* seq = new Sequence(itDup->first, itSeq->second);
213                                 
214                                 SeqDist member;
215                                 member.seq = seq;
216                                 member.dist = itDup->second;
217                                 
218                                 seqs.push_back(member);
219                         }
220                         
221                         //limit number of parents to explore - default 3
222                         if (Results.size() > parents) {
223                                 //sort by distance
224                                 sort(seqs.begin(), seqs.end(), compareSeqDist);
225                                 //prioritize larger more similiar sequence fragments
226                                 reverse(seqs.begin(), seqs.end());
227                                 
228                                 for (int k = seqs.size()-1; k > (parents-1); k--)  {  
229                                         delete seqs[k].seq;
230                                         seqs.pop_back();        
231                                 }
232                         }
233                         
234                         //put seqs into vector to send to slayer
235                         vector<Sequence*> seqsForSlayer;
236                         for (int k = 0; k < seqs.size(); k++) {  seqsForSlayer.push_back(seqs[k].seq);  }
237                         
238                         //mask then send to slayer...
239                         if (seqMask != "") {
240                                 decalc->setMask(seqMask);
241                                 
242                                 //mask querys
243                                 decalc->runMask(query);
244                                 
245                                 //mask parents
246                                 for (int k = 0; k < seqsForSlayer.size(); k++) {
247                                         decalc->runMask(seqsForSlayer[k]);
248                                 }
249                                 
250                                 spotMap = decalc->getMaskMap();
251                         }
252                         
253                         if (m->control_pressed) {  for (int k = 0; k < seqs.size(); k++) {  delete seqs[k].seq;   }  return 0;  }
254                         
255                         //send to slayer
256                         chimeraFlags = slayer->getResults(query, seqsForSlayer);
257                         if (m->control_pressed) {  return 0;  }
258                         chimeraResults = slayer->getOutput();
259                         
260                         //free memory
261                         for (int k = 0; k < seqs.size(); k++) {  delete seqs[k].seq;   }
262                 }
263                 
264                 delete maligner;
265                 delete slayer;
266                 
267                 return 0;
268         }
269         catch(exception& e) {
270                 m->errorOut(e, "ChimeraSlayer", "getChimeras");
271                 exit(1);
272         }
273 }
274 //***************************************************************************************************************
275 void ChimeraSlayer::printBlock(data_struct data, ostream& out){
276         try {
277         //out << "Name\tParentA\tParentB\tDivQLAQRB\tPerIDQLAQRB\tBootStrapA\tDivQLBQRA\tPerIDQLBQRA\tBootStrapB\tFlag\tLeftWindow\tRightWindow\n";
278                 
279                 out << querySeq->getName() << '\t';
280                 out << data.parentA.getName() << "\t" << data.parentB.getName()  << '\t';
281                 //out << "Left Window: " << spotMap[data.winLStart] << " " << spotMap[data.winLEnd] << endl;
282                 //out << "Right Window: " << spotMap[data.winRStart] << " " << spotMap[data.winREnd] << endl;
283                 
284                 out << data.divr_qla_qrb << '\t' << data.qla_qrb << '\t' << data.bsa << '\t';
285                 out << data.divr_qlb_qra << '\t' << data.qlb_qra << '\t' << data.bsb << '\t';
286                 
287                 out << "yes\t" << spotMap[data.winLStart] << "-" << spotMap[data.winLEnd] << '\t' << spotMap[data.winRStart] << "-" << spotMap[data.winREnd] << '\t';
288                 
289                 //out << "Similarity of parents: " << data.ab << endl;
290                 //out << "Similarity of query to parentA: " << data.qa << endl;
291                 //out << "Similarity of query to parentB: " << data.qb << endl;
292                 
293                 
294                 //out << "Per_id(QL,A): " << data.qla << endl;
295                 //out << "Per_id(QL,B): " << data.qlb << endl;
296                 //out << "Per_id(QR,A): " << data.qra << endl;
297                 //out << "Per_id(QR,B): " << data.qrb << endl;
298
299                 
300                 //out << "DeltaL: " << (data.qla - data.qlb) << endl;
301                 //out << "DeltaR: " << (data.qra - data.qrb) << endl;
302
303         }
304         catch(exception& e) {
305                 m->errorOut(e, "ChimeraSlayer", "printBlock");
306                 exit(1);
307         }
308 }
309 //***************************************************************************************************************
310