]> git.donarmstrong.com Git - mothur.git/blob - chimeraslayer.cpp
5d3d6706ede49b38c5385d29c1d127b1488c4000
[mothur.git] / chimeraslayer.cpp
1 /*
2  *  chimeraslayer.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 9/25/09.
6  *  Copyright 2009 Pschloss Lab. All rights reserved.
7  *
8  */
9
10 #include "chimeraslayer.h"
11
12 //***************************************************************************************************************
13 ChimeraSlayer::ChimeraSlayer(string filename, string temp) {  fastafile = filename;  templateFile = temp;  }
14 //***************************************************************************************************************
15
16 ChimeraSlayer::~ChimeraSlayer() {
17         try {
18                 for (int i = 0; i < querySeqs.size(); i++)                      {  delete querySeqs[i];                 }
19                 for (int i = 0; i < templateSeqs.size(); i++)           {  delete templateSeqs[i];              }
20         }
21         catch(exception& e) {
22                 errorOut(e, "ChimeraSlayer", "~ChimeraSlayer");
23                 exit(1);
24         }
25 }       
26 //***************************************************************************************************************
27 void ChimeraSlayer::print(ostream& out) {
28         try {
29                 mothurOutEndLine();
30                 mothurOut("Only reporting sequence supported by 90% of bootstrapped results.");
31                 mothurOutEndLine();
32                 
33                 for (int i = 0; i < querySeqs.size(); i++) {
34                 
35                         if (chimeraFlags[i] == "yes") { 
36 //                              cout << i << endl;
37                                 if ((chimeraResults[i][0].bsa >= 90.0) || (chimeraResults[i][0].bsb >= 90.0)) {
38                                         mothurOut(querySeqs[i]->getName() + "\tyes"); mothurOutEndLine();
39                                         out << querySeqs[i]->getName() << "\tyes" << endl;
40                                 }else {
41                                         out << querySeqs[i]->getName() << "\tyes" << endl;
42                                         //mothurOut(querySeqs[i]->getName() + "\tno"); mothurOutEndLine();
43                                 }
44
45                                 printBlock(chimeraResults[i][0], out, i);
46                                 
47                                 out << endl;
48                         }else{
49                                 out << querySeqs[i]->getName() << "\tno" << endl;
50                                 //mothurOut(querySeqs[i]->getName() + "\tno"); mothurOutEndLine();
51                         }
52                 }
53                                 
54         }
55         catch(exception& e) {
56                 errorOut(e, "ChimeraSlayer", "print");
57                 exit(1);
58         }
59 }
60
61 //***************************************************************************************************************
62 int ChimeraSlayer::getChimeras() {
63         try {
64                 
65                 //read in query sequences and subject sequences
66                 mothurOut("Reading sequences and template file... "); cout.flush();
67                 querySeqs = readSeqs(fastafile);
68                 templateSeqs = readSeqs(templateFile);
69                 mothurOut("Done."); mothurOutEndLine();
70                 
71                 int numSeqs = querySeqs.size();
72                 
73                 if (unaligned) { mothurOut("Your sequences need to be aligned when you use the chimeraslayer method."); mothurOutEndLine(); return 1;  }
74                 
75                 chimeraResults.resize(numSeqs);
76                 chimeraFlags.resize(numSeqs, "no");
77                 spotMap.resize(numSeqs);
78                 
79                 //break up file if needed
80                 int linesPerProcess = numSeqs / processors ;
81                 
82                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
83                         //find breakup of sequences for all times we will Parallelize
84                         if (processors == 1) {   lines.push_back(new linePair(0, numSeqs));  }
85                         else {
86                                 //fill line pairs
87                                 for (int i = 0; i < (processors-1); i++) {                      
88                                         lines.push_back(new linePair((i*linesPerProcess), ((i*linesPerProcess) + linesPerProcess)));
89                                 }
90                                 //this is necessary to get remainder of processors / numSeqs so you don't miss any lines at the end
91                                 int i = processors - 1;
92                                 lines.push_back(new linePair((i*linesPerProcess), numSeqs));
93                         }
94                 #else
95                         lines.push_back(new linePair(0, numSeqs));
96                 #endif
97                 
98                 if (seqMask != "") {    decalc = new DeCalculator();    } //to use below
99                 
100                 //initialize spotMap
101                 for (int j = 0; j < numSeqs; j++) {
102                         for (int i = 0; i < querySeqs[0]->getAligned().length(); i++) {
103                                 spotMap[j][i] = i;
104                         }
105                 }
106                 
107                 //referenceSeqs, numWanted, matchScore, misMatchPenalty, divR, minSimilarity
108                 maligner = new Maligner(templateSeqs, numWanted, match, misMatch, 1.01, minSim);
109                 slayer = new Slayer(window, increment, minSim, divR, iters);
110                 
111                 for (int i = 0; i < querySeqs.size(); i++) {
112                 
113                         string chimeraFlag = maligner->getResults(querySeqs[i]);
114                         //float percentIdentical = maligner->getPercentID();
115                         vector<results> Results = maligner->getOutput();
116                         
117                         cout << "Processing sequence: " << i+1 << endl;
118                         
119                         //for (int j = 0; j < Results.size(); j++) {
120                                 //cout << "regionStart = " << Results[j].regionStart << "\tRegionEnd = " << Results[j].regionEnd << "\tName = " << Results[j].parent << "\tPerQP = " << Results[j].queryToParent << "\tLocalPerQP = " << Results[j].queryToParentLocal << "\tdivR = " << Results[j].divR << endl;
121                         //}
122                         
123                         //if (chimeraFlag == "yes") {
124                         
125                                 //get sequence that were given from maligner results
126                                 vector<SeqDist> seqs;
127                                 map<string, float> removeDups;
128                                 map<string, float>::iterator itDup;
129                                 for (int j = 0; j < Results.size(); j++) {
130                                         float dist = (Results[j].regionEnd - Results[j].regionStart + 1) * Results[j].queryToParentLocal;
131                                         //only add if you are not a duplicate
132                                         itDup = removeDups.find(Results[j].parent);
133                                         if (itDup == removeDups.end()) { //this is not duplicate
134                                                 removeDups[Results[j].parent] = dist;
135                                         }else if (dist > itDup->second) { //is this a stronger number for this parent
136                                                 removeDups[Results[j].parent] = dist;
137                                         }
138                                 }
139                                 
140                                 for (itDup = removeDups.begin(); itDup != removeDups.end(); itDup++) {
141                                         Sequence* seq = getSequence(itDup->first); //makes copy so you can filter and mask and not effect template
142                                         
143                                         SeqDist member;
144                                         member.seq = seq;
145                                         member.dist = itDup->second;
146                                         
147                                         seqs.push_back(member);
148                                 }
149                                 
150                                 //limit number of parents to explore - default 5
151                                 if (Results.size() > parents) {
152                                         //sort by distance
153                                         sort(seqs.begin(), seqs.end(), compareSeqDist);
154                                         //prioritize larger more similiar sequence fragments
155                                         reverse(seqs.begin(), seqs.end());
156                                         
157                                         for (int k = seqs.size()-1; k > (parents-1); k--)  {  
158                                                 delete seqs[k].seq;
159                                                 seqs.pop_back();        
160                                         }
161                                 }
162                 
163                                 //put seqs into vector to send to slayer
164                                 vector<Sequence*> seqsForSlayer;
165                                 for (int k = 0; k < seqs.size(); k++) {  seqsForSlayer.push_back(seqs[k].seq);  }
166 //ofstream out;
167 //string name = querySeqs[i]->getName();
168 //cout << name << endl;
169 //name = name.substr(name.find_first_of("|")+1);
170 //cout << name << endl;
171 //name = name.substr(name.find_first_of("|")+1);
172 //cout << name << endl;
173 //name = name.substr(0, name.find_last_of("|"));
174 //cout << name << endl;
175 //string filename = name + ".seqsforslayer";
176 //openOutputFile(filename, out);        
177 //for (int u = 0; u < seqsForSlayer.size(); u++) { seqsForSlayer[u]->printSequence(out);        }
178 //out.close();
179 //filename = name + ".fasta";
180 //openOutputFile(filename, out);        
181 //q->printSequence(out);
182 //out.close();
183
184                         
185                                 //mask then send to slayer...
186                                 if (seqMask != "") {
187                                         decalc->setMask(seqMask);
188
189                                         //mask querys
190                                         decalc->runMask(querySeqs[i]);
191                                         
192                                         //mask parents
193                                         for (int k = 0; k < seqsForSlayer.size(); k++) {
194                                                 decalc->runMask(seqsForSlayer[k]);
195                                         }
196                                         
197                                         for (int i = 0; i < numSeqs; i++) {
198                                                 spotMap[i] = decalc->getMaskMap();
199                                         }
200
201                                 }
202                                 
203                                 //send to slayer
204                                 chimeraFlags[i] = slayer->getResults(querySeqs[i], seqsForSlayer);
205                                 chimeraResults[i] = slayer->getOutput();
206                         
207                                 //free memory
208                                 for (int k = 0; k < seqs.size(); k++) {  delete seqs[k].seq;   }
209                         //}
210                         
211                 }       
212                 //free memory
213                 for (int i = 0; i < lines.size(); i++)                                  {       delete lines[i];        }
214                 
215                 if (seqMask != "") {
216                         delete decalc; 
217                 }
218                 
219                 return 0;
220         }
221         catch(exception& e) {
222                 errorOut(e, "ChimeraSlayer", "getChimeras");
223                 exit(1);
224         }
225 }
226 //***************************************************************************************************************
227 Sequence* ChimeraSlayer::getSequence(string name) {
228         try{
229                 Sequence* temp;
230                 
231                 //look through templateSeqs til you find it
232                 int spot = -1;
233                 for (int i = 0; i < templateSeqs.size(); i++) {
234                         if (name == templateSeqs[i]->getName()) {  
235                                 spot = i;
236                                 break;
237                         }
238                 }
239                 
240                 if(spot == -1) { mothurOut("Error: Could not find sequence in chimeraSlayer."); mothurOutEndLine(); return NULL; }
241                 
242                 temp = new Sequence(templateSeqs[spot]->getName(), templateSeqs[spot]->getAligned());
243                 
244                 return temp;
245         }
246         catch(exception& e) {
247                 errorOut(e, "ChimeraSlayer", "getSequence");
248                 exit(1);
249         }
250 }
251 //***************************************************************************************************************
252 void ChimeraSlayer::printBlock(data_struct data, ostream& out, int i){
253         try {
254                 
255                 out << "parentA: " << data.parentA.getName() << "  parentB: " << data.parentB.getName()  << endl;
256                 out << "Left Window: " << spotMap[i][data.winLStart] << " " << spotMap[i][data.winLEnd] << endl;
257                 out << "Right Window: " << spotMap[i][data.winRStart] << " " << spotMap[i][data.winREnd] << endl;
258                 
259                 out << "Divergence of Query to Leftside ParentA and Rightside ParentB: " << data.divr_qla_qrb << '\t' << "Bootstrap: " << data.bsa << endl;
260                 out << "Divergence of Query to Rightside ParentA and Leftside ParentB: " << data.divr_qlb_qra << '\t' << "Bootstrap: " << data.bsb << endl;
261                 
262                 out << "Similarity of parents: " << data.ab << endl;
263                 out << "Similarity of query to parentA: " << data.qa << endl;
264                 out << "Similarity of query to parentB: " << data.qb << endl;
265                 
266                 //out << "Per_id(QL,A): " << data.qla << endl;
267                 //out << "Per_id(QL,B): " << data.qlb << endl;
268                 //out << "Per_id(QR,A): " << data.qra << endl;
269                 //out << "Per_id(QR,B): " << data.qrb << endl;
270
271                 
272                 out << "DeltaL: " << (data.qla - data.qlb) << endl;
273                 out << "DeltaR: " << (data.qra - data.qrb) << endl;
274
275         }
276         catch(exception& e) {
277                 errorOut(e, "ChimeraSlayer", "printBlock");
278                 exit(1);
279         }
280 }
281 //***************************************************************************************************************
282