]> git.donarmstrong.com Git - mothur.git/blob - chimeraslayer.cpp
a few changes to chimera classes
[mothur.git] / chimeraslayer.cpp
1 /*
2  *  chimeraslayer.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 9/25/09.
6  *  Copyright 2009 Pschloss Lab. All rights reserved.
7  *
8  */
9
10 #include "chimeraslayer.h"
11
12 //***************************************************************************************************************
13 ChimeraSlayer::ChimeraSlayer(string filename, string temp) {  fastafile = filename;  templateFile = temp;  }
14 //***************************************************************************************************************
15
16 ChimeraSlayer::~ChimeraSlayer() {
17         try {
18                 for (int i = 0; i < querySeqs.size(); i++)                      {  delete querySeqs[i];                 }
19                 for (int i = 0; i < templateSeqs.size(); i++)           {  delete templateSeqs[i];              }
20         }
21         catch(exception& e) {
22                 errorOut(e, "ChimeraSlayer", "~ChimeraSlayer");
23                 exit(1);
24         }
25 }       
26 //***************************************************************************************************************
27 void ChimeraSlayer::print(ostream& out) {
28         try {
29                 mothurOutEndLine();
30                 mothurOut("Only reporting sequence supported by 90% of bootstrapped results.");
31                 mothurOutEndLine();
32                 
33                 for (int i = 0; i < querySeqs.size(); i++) {
34                 
35                         if (chimeraFlags[i] == "yes") { 
36                                 if ((chimeraResults[i][0].bsa >= 90.0) || (chimeraResults[i][0].bsb >= 90.0)) {
37                                         mothurOut(querySeqs[i]->getName() + "\tyes"); mothurOutEndLine();
38                                         out << querySeqs[i]->getName() << "\tyes" << endl;
39                                 }else {
40                                         out << querySeqs[i]->getName() << "\tno" << endl;
41                                         //mothurOut(querySeqs[i]->getName() + "\tno"); mothurOutEndLine();
42                                 }
43
44                                 printBlock(chimeraResults[i][0], out, i);
45                                 
46                                 out << endl;
47                         }else{
48                                 out << querySeqs[i]->getName() << "\tno" << endl;
49                                 //mothurOut(querySeqs[i]->getName() + "\tno"); mothurOutEndLine();
50                         }
51                 }
52                                 
53         }
54         catch(exception& e) {
55                 errorOut(e, "ChimeraSlayer", "print");
56                 exit(1);
57         }
58 }
59
60 //***************************************************************************************************************
61 void ChimeraSlayer::getChimeras() {
62         try {
63                 
64                 //read in query sequences and subject sequences
65                 mothurOut("Reading sequences and template file... "); cout.flush();
66                 querySeqs = readSeqs(fastafile);
67                 templateSeqs = readSeqs(templateFile);
68                 mothurOut("Done."); mothurOutEndLine();
69                 
70                 int numSeqs = querySeqs.size();
71                 
72                 chimeraResults.resize(numSeqs);
73                 chimeraFlags.resize(numSeqs, "no");
74                 spotMap.resize(numSeqs);
75                 
76                 //break up file if needed
77                 int linesPerProcess = numSeqs / processors ;
78                 
79                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
80                         //find breakup of sequences for all times we will Parallelize
81                         if (processors == 1) {   lines.push_back(new linePair(0, numSeqs));  }
82                         else {
83                                 //fill line pairs
84                                 for (int i = 0; i < (processors-1); i++) {                      
85                                         lines.push_back(new linePair((i*linesPerProcess), ((i*linesPerProcess) + linesPerProcess)));
86                                 }
87                                 //this is necessary to get remainder of processors / numSeqs so you don't miss any lines at the end
88                                 int i = processors - 1;
89                                 lines.push_back(new linePair((i*linesPerProcess), numSeqs));
90                         }
91                 #else
92                         lines.push_back(new linePair(0, numSeqs));
93                 #endif
94                 
95                 if (seqMask != "") {    decalc = new DeCalculator();    } //to use below
96                 
97                 //initialize spotMap
98                 for (int j = 0; j < numSeqs; j++) {
99                         for (int i = 0; i < querySeqs[0]->getAligned().length(); i++) {
100                                 spotMap[j][i] = i;
101                         }
102                 }
103                 
104                 //referenceSeqs, numWanted, matchScore, misMatchPenalty, divR, minSimilarity
105                 maligner = new Maligner(templateSeqs, numWanted, match, misMatch, 1.01, minSim);
106                 slayer = new Slayer(window, increment, minSim, divR, iters);
107                 
108                 for (int i = 0; i < querySeqs.size(); i++) {
109                 
110                         string chimeraFlag = maligner->getResults(querySeqs[i]);
111                         //float percentIdentical = maligner->getPercentID();
112                         vector<results> Results = maligner->getOutput();
113                         
114                         cout << "Processing sequence: " << i+1 << endl;
115                         
116                         //for (int j = 0; j < Results.size(); j++) {
117                                 //cout << "regionStart = " << Results[j].regionStart << "\tRegionEnd = " << Results[j].regionEnd << "\tName = " << Results[j].parent << "\tPerQP = " << Results[j].queryToParent << "\tLocalPerQP = " << Results[j].queryToParentLocal << "\tdivR = " << Results[j].divR << endl;
118                         //}
119                         
120                         //if (chimeraFlag == "yes") {
121                         
122                                 //get sequence that were given from maligner results
123                                 vector<SeqDist> seqs;
124                                 map<string, float> removeDups;
125                                 map<string, float>::iterator itDup;
126                                 for (int j = 0; j < Results.size(); j++) {
127                                         float dist = (Results[j].regionEnd - Results[j].regionStart + 1) * Results[j].queryToParentLocal;
128                                         //only add if you are not a duplicate
129                                         itDup = removeDups.find(Results[j].parent);
130                                         if (itDup == removeDups.end()) { //this is not duplicate
131                                                 removeDups[Results[j].parent] = dist;
132                                         }else if (dist > itDup->second) { //is this a stronger number for this parent
133                                                 removeDups[Results[j].parent] = dist;
134                                         }
135                                 }
136                                 
137                                 for (itDup = removeDups.begin(); itDup != removeDups.end(); itDup++) {
138                                         Sequence* seq = getSequence(itDup->first); //makes copy so you can filter and mask and not effect template
139                                         
140                                         SeqDist member;
141                                         member.seq = seq;
142                                         member.dist = itDup->second;
143                                         
144                                         seqs.push_back(member);
145                                 }
146                                 
147                                 //limit number of parents to explore - default 5
148                                 if (Results.size() > parents) {
149                                         //sort by distance
150                                         sort(seqs.begin(), seqs.end(), compareSeqDist);
151                                         //prioritize larger more similiar sequence fragments
152                                         reverse(seqs.begin(), seqs.end());
153                                         
154                                         for (int k = seqs.size()-1; k > (parents-1); k--)  {  
155                                                 delete seqs[k].seq;
156                                                 seqs.pop_back();        
157                                         }
158                                 }
159                 
160                                 //put seqs into vector to send to slayer
161                                 vector<Sequence*> seqsForSlayer;
162                                 for (int k = 0; k < seqs.size(); k++) {  seqsForSlayer.push_back(seqs[k].seq);  }
163 //ofstream out;
164 //string name = querySeqs[i]->getName();
165 //cout << name << endl;
166 //name = name.substr(name.find_first_of("|")+1);
167 //cout << name << endl;
168 //name = name.substr(name.find_first_of("|")+1);
169 //cout << name << endl;
170 //name = name.substr(0, name.find_last_of("|"));
171 //cout << name << endl;
172 //string filename = name + ".seqsforslayer";
173 //openOutputFile(filename, out);        
174 //for (int u = 0; u < seqsForSlayer.size(); u++) { seqsForSlayer[u]->printSequence(out);        }
175 //out.close();
176 //filename = name + ".fasta";
177 //openOutputFile(filename, out);        
178 //q->printSequence(out);
179 //out.close();
180
181                         
182                                 //mask then send to slayer...
183                                 if (seqMask != "") {
184                                         decalc->setMask(seqMask);
185
186                                         //mask querys
187                                         decalc->runMask(querySeqs[i]);
188                                         
189                                         //mask parents
190                                         for (int k = 0; k < seqsForSlayer.size(); k++) {
191                                                 decalc->runMask(seqsForSlayer[k]);
192                                         }
193                                         
194                                         for (int i = 0; i < numSeqs; i++) {
195                                                 spotMap[i] = decalc->getMaskMap();
196                                         }
197
198                                 }
199                                 
200                                 //send to slayer
201                                 chimeraFlags[i] = slayer->getResults(querySeqs[i], seqsForSlayer);
202                                 chimeraResults[i] = slayer->getOutput();
203                         
204                                 //free memory
205                                 for (int k = 0; k < seqs.size(); k++) {  delete seqs[k].seq;   }
206                         //}
207                         
208                 }       
209                 //free memory
210                 for (int i = 0; i < lines.size(); i++)                                  {       delete lines[i];        }
211                 
212                 if (seqMask != "") {
213                         delete decalc; 
214                 }
215         }
216         catch(exception& e) {
217                 errorOut(e, "ChimeraSlayer", "getChimeras");
218                 exit(1);
219         }
220 }
221 //***************************************************************************************************************
222 Sequence* ChimeraSlayer::getSequence(string name) {
223         try{
224                 Sequence* temp;
225                 
226                 //look through templateSeqs til you find it
227                 int spot = -1;
228                 for (int i = 0; i < templateSeqs.size(); i++) {
229                         if (name == templateSeqs[i]->getName()) {  
230                                 spot = i;
231                                 break;
232                         }
233                 }
234                 
235                 if(spot == -1) { mothurOut("Error: Could not find sequence in chimeraSlayer."); mothurOutEndLine(); return NULL; }
236                 
237                 temp = new Sequence(templateSeqs[spot]->getName(), templateSeqs[spot]->getAligned());
238                 
239                 return temp;
240         }
241         catch(exception& e) {
242                 errorOut(e, "ChimeraSlayer", "getSequence");
243                 exit(1);
244         }
245 }
246 //***************************************************************************************************************
247 void ChimeraSlayer::printBlock(data_struct data, ostream& out, int i){
248         try {
249                 
250                 out << "parentA: " << data.parentA.getName() << "  parentB: " << data.parentB.getName()  << endl;
251                 out << "Left Window: " << spotMap[i][data.winLStart] << " " << spotMap[i][data.winLEnd] << endl;
252                 out << "Right Window: " << spotMap[i][data.winRStart] << " " << spotMap[i][data.winREnd] << endl;
253                 
254                 out << "Divergence of Query to Leftside ParentA and Rightside ParentB: " << data.divr_qla_qrb << '\t' << "Bootstrap: " << data.bsa << endl;
255                 out << "Divergence of Query to Rightside ParentA and Leftside ParentB: " << data.divr_qlb_qra << '\t' << "Bootstrap: " << data.bsb << endl;
256                 
257                 out << "Similarity of parents: " << data.ab << endl;
258                 out << "Similarity of query to parentA: " << data.qa << endl;
259                 out << "Similarity of query to parentB: " << data.qb << endl;
260                 
261                 //out << "Per_id(QL,A): " << data.qla << endl;
262                 //out << "Per_id(QL,B): " << data.qlb << endl;
263                 //out << "Per_id(QR,A): " << data.qra << endl;
264                 //out << "Per_id(QR,B): " << data.qrb << endl;
265
266                 
267                 out << "DeltaL: " << (data.qla - data.qlb) << endl;
268                 out << "DeltaR: " << (data.qra - data.qrb) << endl;
269
270         }
271         catch(exception& e) {
272                 errorOut(e, "ChimeraSlayer", "printBlock");
273                 exit(1);
274         }
275 }
276 //***************************************************************************************************************
277