]> git.donarmstrong.com Git - mothur.git/blob - chimeraslayer.cpp
added sorted parameter to get.oturep, added error checking to chimera classes in...
[mothur.git] / chimeraslayer.cpp
1 /*
2  *  chimeraslayer.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 9/25/09.
6  *  Copyright 2009 Pschloss Lab. All rights reserved.
7  *
8  */
9
10 #include "chimeraslayer.h"
11
12 //***************************************************************************************************************
13 ChimeraSlayer::ChimeraSlayer(string filename, string temp) {  fastafile = filename;  templateFile = temp;  }
14 //***************************************************************************************************************
15
16 ChimeraSlayer::~ChimeraSlayer() {
17         try {
18                 for (int i = 0; i < querySeqs.size(); i++)                      {  delete querySeqs[i];                 }
19                 for (int i = 0; i < templateSeqs.size(); i++)           {  delete templateSeqs[i];              }
20         }
21         catch(exception& e) {
22                 errorOut(e, "ChimeraSlayer", "~ChimeraSlayer");
23                 exit(1);
24         }
25 }       
26 //***************************************************************************************************************
27 void ChimeraSlayer::print(ostream& out) {
28         try {
29                 mothurOutEndLine();
30                 mothurOut("Only reporting sequence supported by 90% of bootstrapped results.");
31                 mothurOutEndLine();
32                 
33                 for (int i = 0; i < querySeqs.size(); i++) {
34                 
35                         if (chimeraFlags[i] == "yes") { 
36                                 if ((chimeraResults[i][0].bsa >= 90.0) || (chimeraResults[i][0].bsb >= 90.0)) {
37                                         mothurOut(querySeqs[i]->getName() + "\tyes"); mothurOutEndLine();
38                                         out << querySeqs[i]->getName() << "\tyes" << endl;
39                                 }else {
40                                         out << querySeqs[i]->getName() << "\tno" << endl;
41                                         //mothurOut(querySeqs[i]->getName() + "\tno"); mothurOutEndLine();
42                                 }
43
44                                 printBlock(chimeraResults[i][0], out, i);
45                                 
46                                 out << endl;
47                         }else{
48                                 out << querySeqs[i]->getName() << "\tno" << endl;
49                                 //mothurOut(querySeqs[i]->getName() + "\tno"); mothurOutEndLine();
50                         }
51                 }
52                                 
53         }
54         catch(exception& e) {
55                 errorOut(e, "ChimeraSlayer", "print");
56                 exit(1);
57         }
58 }
59
60 //***************************************************************************************************************
61 int ChimeraSlayer::getChimeras() {
62         try {
63                 
64                 //read in query sequences and subject sequences
65                 mothurOut("Reading sequences and template file... "); cout.flush();
66                 querySeqs = readSeqs(fastafile);
67                 templateSeqs = readSeqs(templateFile);
68                 mothurOut("Done."); mothurOutEndLine();
69                 
70                 int numSeqs = querySeqs.size();
71                 
72                 if (unaligned) { mothurOut("Your sequences need to be aligned when you use the chimeraslayer method."); mothurOutEndLine(); return 1;  }
73                 
74                 chimeraResults.resize(numSeqs);
75                 chimeraFlags.resize(numSeqs, "no");
76                 spotMap.resize(numSeqs);
77                 
78                 //break up file if needed
79                 int linesPerProcess = numSeqs / processors ;
80                 
81                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
82                         //find breakup of sequences for all times we will Parallelize
83                         if (processors == 1) {   lines.push_back(new linePair(0, numSeqs));  }
84                         else {
85                                 //fill line pairs
86                                 for (int i = 0; i < (processors-1); i++) {                      
87                                         lines.push_back(new linePair((i*linesPerProcess), ((i*linesPerProcess) + linesPerProcess)));
88                                 }
89                                 //this is necessary to get remainder of processors / numSeqs so you don't miss any lines at the end
90                                 int i = processors - 1;
91                                 lines.push_back(new linePair((i*linesPerProcess), numSeqs));
92                         }
93                 #else
94                         lines.push_back(new linePair(0, numSeqs));
95                 #endif
96                 
97                 if (seqMask != "") {    decalc = new DeCalculator();    } //to use below
98                 
99                 //initialize spotMap
100                 for (int j = 0; j < numSeqs; j++) {
101                         for (int i = 0; i < querySeqs[0]->getAligned().length(); i++) {
102                                 spotMap[j][i] = i;
103                         }
104                 }
105                 
106                 //referenceSeqs, numWanted, matchScore, misMatchPenalty, divR, minSimilarity
107                 maligner = new Maligner(templateSeqs, numWanted, match, misMatch, 1.01, minSim);
108                 slayer = new Slayer(window, increment, minSim, divR, iters);
109                 
110                 for (int i = 0; i < querySeqs.size(); i++) {
111                 
112                         string chimeraFlag = maligner->getResults(querySeqs[i]);
113                         //float percentIdentical = maligner->getPercentID();
114                         vector<results> Results = maligner->getOutput();
115                         
116                         cout << "Processing sequence: " << i+1 << endl;
117                         
118                         //for (int j = 0; j < Results.size(); j++) {
119                                 //cout << "regionStart = " << Results[j].regionStart << "\tRegionEnd = " << Results[j].regionEnd << "\tName = " << Results[j].parent << "\tPerQP = " << Results[j].queryToParent << "\tLocalPerQP = " << Results[j].queryToParentLocal << "\tdivR = " << Results[j].divR << endl;
120                         //}
121                         
122                         //if (chimeraFlag == "yes") {
123                         
124                                 //get sequence that were given from maligner results
125                                 vector<SeqDist> seqs;
126                                 map<string, float> removeDups;
127                                 map<string, float>::iterator itDup;
128                                 for (int j = 0; j < Results.size(); j++) {
129                                         float dist = (Results[j].regionEnd - Results[j].regionStart + 1) * Results[j].queryToParentLocal;
130                                         //only add if you are not a duplicate
131                                         itDup = removeDups.find(Results[j].parent);
132                                         if (itDup == removeDups.end()) { //this is not duplicate
133                                                 removeDups[Results[j].parent] = dist;
134                                         }else if (dist > itDup->second) { //is this a stronger number for this parent
135                                                 removeDups[Results[j].parent] = dist;
136                                         }
137                                 }
138                                 
139                                 for (itDup = removeDups.begin(); itDup != removeDups.end(); itDup++) {
140                                         Sequence* seq = getSequence(itDup->first); //makes copy so you can filter and mask and not effect template
141                                         
142                                         SeqDist member;
143                                         member.seq = seq;
144                                         member.dist = itDup->second;
145                                         
146                                         seqs.push_back(member);
147                                 }
148                                 
149                                 //limit number of parents to explore - default 5
150                                 if (Results.size() > parents) {
151                                         //sort by distance
152                                         sort(seqs.begin(), seqs.end(), compareSeqDist);
153                                         //prioritize larger more similiar sequence fragments
154                                         reverse(seqs.begin(), seqs.end());
155                                         
156                                         for (int k = seqs.size()-1; k > (parents-1); k--)  {  
157                                                 delete seqs[k].seq;
158                                                 seqs.pop_back();        
159                                         }
160                                 }
161                 
162                                 //put seqs into vector to send to slayer
163                                 vector<Sequence*> seqsForSlayer;
164                                 for (int k = 0; k < seqs.size(); k++) {  seqsForSlayer.push_back(seqs[k].seq);  }
165 //ofstream out;
166 //string name = querySeqs[i]->getName();
167 //cout << name << endl;
168 //name = name.substr(name.find_first_of("|")+1);
169 //cout << name << endl;
170 //name = name.substr(name.find_first_of("|")+1);
171 //cout << name << endl;
172 //name = name.substr(0, name.find_last_of("|"));
173 //cout << name << endl;
174 //string filename = name + ".seqsforslayer";
175 //openOutputFile(filename, out);        
176 //for (int u = 0; u < seqsForSlayer.size(); u++) { seqsForSlayer[u]->printSequence(out);        }
177 //out.close();
178 //filename = name + ".fasta";
179 //openOutputFile(filename, out);        
180 //q->printSequence(out);
181 //out.close();
182
183                         
184                                 //mask then send to slayer...
185                                 if (seqMask != "") {
186                                         decalc->setMask(seqMask);
187
188                                         //mask querys
189                                         decalc->runMask(querySeqs[i]);
190                                         
191                                         //mask parents
192                                         for (int k = 0; k < seqsForSlayer.size(); k++) {
193                                                 decalc->runMask(seqsForSlayer[k]);
194                                         }
195                                         
196                                         for (int i = 0; i < numSeqs; i++) {
197                                                 spotMap[i] = decalc->getMaskMap();
198                                         }
199
200                                 }
201                                 
202                                 //send to slayer
203                                 chimeraFlags[i] = slayer->getResults(querySeqs[i], seqsForSlayer);
204                                 chimeraResults[i] = slayer->getOutput();
205                         
206                                 //free memory
207                                 for (int k = 0; k < seqs.size(); k++) {  delete seqs[k].seq;   }
208                         //}
209                         
210                 }       
211                 //free memory
212                 for (int i = 0; i < lines.size(); i++)                                  {       delete lines[i];        }
213                 
214                 if (seqMask != "") {
215                         delete decalc; 
216                 }
217                 
218                 return 0;
219         }
220         catch(exception& e) {
221                 errorOut(e, "ChimeraSlayer", "getChimeras");
222                 exit(1);
223         }
224 }
225 //***************************************************************************************************************
226 Sequence* ChimeraSlayer::getSequence(string name) {
227         try{
228                 Sequence* temp;
229                 
230                 //look through templateSeqs til you find it
231                 int spot = -1;
232                 for (int i = 0; i < templateSeqs.size(); i++) {
233                         if (name == templateSeqs[i]->getName()) {  
234                                 spot = i;
235                                 break;
236                         }
237                 }
238                 
239                 if(spot == -1) { mothurOut("Error: Could not find sequence in chimeraSlayer."); mothurOutEndLine(); return NULL; }
240                 
241                 temp = new Sequence(templateSeqs[spot]->getName(), templateSeqs[spot]->getAligned());
242                 
243                 return temp;
244         }
245         catch(exception& e) {
246                 errorOut(e, "ChimeraSlayer", "getSequence");
247                 exit(1);
248         }
249 }
250 //***************************************************************************************************************
251 void ChimeraSlayer::printBlock(data_struct data, ostream& out, int i){
252         try {
253                 
254                 out << "parentA: " << data.parentA.getName() << "  parentB: " << data.parentB.getName()  << endl;
255                 out << "Left Window: " << spotMap[i][data.winLStart] << " " << spotMap[i][data.winLEnd] << endl;
256                 out << "Right Window: " << spotMap[i][data.winRStart] << " " << spotMap[i][data.winREnd] << endl;
257                 
258                 out << "Divergence of Query to Leftside ParentA and Rightside ParentB: " << data.divr_qla_qrb << '\t' << "Bootstrap: " << data.bsa << endl;
259                 out << "Divergence of Query to Rightside ParentA and Leftside ParentB: " << data.divr_qlb_qra << '\t' << "Bootstrap: " << data.bsb << endl;
260                 
261                 out << "Similarity of parents: " << data.ab << endl;
262                 out << "Similarity of query to parentA: " << data.qa << endl;
263                 out << "Similarity of query to parentB: " << data.qb << endl;
264                 
265                 //out << "Per_id(QL,A): " << data.qla << endl;
266                 //out << "Per_id(QL,B): " << data.qlb << endl;
267                 //out << "Per_id(QR,A): " << data.qra << endl;
268                 //out << "Per_id(QR,B): " << data.qrb << endl;
269
270                 
271                 out << "DeltaL: " << (data.qla - data.qlb) << endl;
272                 out << "DeltaR: " << (data.qra - data.qrb) << endl;
273
274         }
275         catch(exception& e) {
276                 errorOut(e, "ChimeraSlayer", "printBlock");
277                 exit(1);
278         }
279 }
280 //***************************************************************************************************************
281