5 * Created by westcott on 9/25/09.
6 * Copyright 2009 Pschloss Lab. All rights reserved.
10 #include "chimeraslayer.h"
12 //***************************************************************************************************************
13 ChimeraSlayer::ChimeraSlayer(string filename, string temp) { fastafile = filename; templateFile = temp; }
14 //***************************************************************************************************************
16 ChimeraSlayer::~ChimeraSlayer() {
18 for (int i = 0; i < querySeqs.size(); i++) { delete querySeqs[i]; }
19 for (int i = 0; i < templateSeqs.size(); i++) { delete templateSeqs[i]; }
22 errorOut(e, "ChimeraSlayer", "~ChimeraSlayer");
26 //***************************************************************************************************************
27 void ChimeraSlayer::print(ostream& out) {
30 mothurOut("Only reporting sequence supported by 90% of bootstrapped results.");
33 for (int i = 0; i < querySeqs.size(); i++) {
35 if (chimeraFlags[i] == "yes") {
36 if ((chimeraResults[i][0].bsa >= 90.0) || (chimeraResults[i][0].bsb >= 90.0)) {
37 mothurOut(querySeqs[i]->getName() + "\tyes"); mothurOutEndLine();
38 out << querySeqs[i]->getName() << "\tyes" << endl;
40 out << querySeqs[i]->getName() << "\tno" << endl;
41 //mothurOut(querySeqs[i]->getName() + "\tno"); mothurOutEndLine();
44 printBlock(chimeraResults[i][0], out, i);
48 out << querySeqs[i]->getName() << "\tno" << endl;
49 //mothurOut(querySeqs[i]->getName() + "\tno"); mothurOutEndLine();
55 errorOut(e, "ChimeraSlayer", "print");
60 //***************************************************************************************************************
61 void ChimeraSlayer::getChimeras() {
64 //read in query sequences and subject sequences
65 mothurOut("Reading sequences and template file... "); cout.flush();
66 querySeqs = readSeqs(fastafile);
67 templateSeqs = readSeqs(templateFile);
68 mothurOut("Done."); mothurOutEndLine();
70 int numSeqs = querySeqs.size();
72 chimeraResults.resize(numSeqs);
73 chimeraFlags.resize(numSeqs, "no");
74 spotMap.resize(numSeqs);
76 //break up file if needed
77 int linesPerProcess = numSeqs / processors ;
79 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
80 //find breakup of sequences for all times we will Parallelize
81 if (processors == 1) { lines.push_back(new linePair(0, numSeqs)); }
84 for (int i = 0; i < (processors-1); i++) {
85 lines.push_back(new linePair((i*linesPerProcess), ((i*linesPerProcess) + linesPerProcess)));
87 //this is necessary to get remainder of processors / numSeqs so you don't miss any lines at the end
88 int i = processors - 1;
89 lines.push_back(new linePair((i*linesPerProcess), numSeqs));
92 lines.push_back(new linePair(0, numSeqs));
95 if (seqMask != "") { decalc = new DeCalculator(); } //to use below
98 for (int j = 0; j < numSeqs; j++) {
99 for (int i = 0; i < querySeqs[0]->getAligned().length(); i++) {
104 //referenceSeqs, numWanted, matchScore, misMatchPenalty, divR, minSimilarity
105 maligner = new Maligner(templateSeqs, numWanted, match, misMatch, 1.01, minSim);
106 slayer = new Slayer(window, increment, minSim, divR, iters);
108 for (int i = 0; i < querySeqs.size(); i++) {
110 string chimeraFlag = maligner->getResults(querySeqs[i]);
111 //float percentIdentical = maligner->getPercentID();
112 vector<results> Results = maligner->getOutput();
114 cout << "Processing sequence: " << i+1 << endl;
116 //for (int j = 0; j < Results.size(); j++) {
117 //cout << "regionStart = " << Results[j].regionStart << "\tRegionEnd = " << Results[j].regionEnd << "\tName = " << Results[j].parent << "\tPerQP = " << Results[j].queryToParent << "\tLocalPerQP = " << Results[j].queryToParentLocal << "\tdivR = " << Results[j].divR << endl;
120 //if (chimeraFlag == "yes") {
122 //get sequence that were given from maligner results
123 vector<SeqDist> seqs;
124 map<string, float> removeDups;
125 map<string, float>::iterator itDup;
126 for (int j = 0; j < Results.size(); j++) {
127 float dist = (Results[j].regionEnd - Results[j].regionStart + 1) * Results[j].queryToParentLocal;
128 //only add if you are not a duplicate
129 itDup = removeDups.find(Results[j].parent);
130 if (itDup == removeDups.end()) { //this is not duplicate
131 removeDups[Results[j].parent] = dist;
132 }else if (dist > itDup->second) { //is this a stronger number for this parent
133 removeDups[Results[j].parent] = dist;
137 for (itDup = removeDups.begin(); itDup != removeDups.end(); itDup++) {
138 Sequence* seq = getSequence(itDup->first); //makes copy so you can filter and mask and not effect template
142 member.dist = itDup->second;
144 seqs.push_back(member);
147 //limit number of parents to explore - default 5
148 if (Results.size() > parents) {
150 sort(seqs.begin(), seqs.end(), compareSeqDist);
151 //prioritize larger more similiar sequence fragments
152 reverse(seqs.begin(), seqs.end());
154 for (int k = seqs.size()-1; k > (parents-1); k--) {
160 //put seqs into vector to send to slayer
161 vector<Sequence*> seqsForSlayer;
162 for (int k = 0; k < seqs.size(); k++) { seqsForSlayer.push_back(seqs[k].seq); }
164 //string name = querySeqs[i]->getName();
165 //cout << name << endl;
166 //name = name.substr(name.find_first_of("|")+1);
167 //cout << name << endl;
168 //name = name.substr(name.find_first_of("|")+1);
169 //cout << name << endl;
170 //name = name.substr(0, name.find_last_of("|"));
171 //cout << name << endl;
172 //string filename = name + ".seqsforslayer";
173 //openOutputFile(filename, out);
174 //for (int u = 0; u < seqsForSlayer.size(); u++) { seqsForSlayer[u]->printSequence(out); }
176 //filename = name + ".fasta";
177 //openOutputFile(filename, out);
178 //q->printSequence(out);
182 //mask then send to slayer...
184 decalc->setMask(seqMask);
187 decalc->runMask(querySeqs[i]);
190 for (int k = 0; k < seqsForSlayer.size(); k++) {
191 decalc->runMask(seqsForSlayer[k]);
194 for (int i = 0; i < numSeqs; i++) {
195 spotMap[i] = decalc->getMaskMap();
201 chimeraFlags[i] = slayer->getResults(querySeqs[i], seqsForSlayer);
202 chimeraResults[i] = slayer->getOutput();
205 for (int k = 0; k < seqs.size(); k++) { delete seqs[k].seq; }
210 for (int i = 0; i < lines.size(); i++) { delete lines[i]; }
216 catch(exception& e) {
217 errorOut(e, "ChimeraSlayer", "getChimeras");
221 //***************************************************************************************************************
222 Sequence* ChimeraSlayer::getSequence(string name) {
226 //look through templateSeqs til you find it
228 for (int i = 0; i < templateSeqs.size(); i++) {
229 if (name == templateSeqs[i]->getName()) {
235 if(spot == -1) { mothurOut("Error: Could not find sequence in chimeraSlayer."); mothurOutEndLine(); return NULL; }
237 temp = new Sequence(templateSeqs[spot]->getName(), templateSeqs[spot]->getAligned());
241 catch(exception& e) {
242 errorOut(e, "ChimeraSlayer", "getSequence");
246 //***************************************************************************************************************
247 void ChimeraSlayer::printBlock(data_struct data, ostream& out, int i){
250 out << "parentA: " << data.parentA.getName() << " parentB: " << data.parentB.getName() << endl;
251 out << "Left Window: " << spotMap[i][data.winLStart] << " " << spotMap[i][data.winLEnd] << endl;
252 out << "Right Window: " << spotMap[i][data.winRStart] << " " << spotMap[i][data.winREnd] << endl;
254 out << "Divergence of Query to Leftside ParentA and Rightside ParentB: " << data.divr_qla_qrb << '\t' << "Bootstrap: " << data.bsa << endl;
255 out << "Divergence of Query to Rightside ParentA and Leftside ParentB: " << data.divr_qlb_qra << '\t' << "Bootstrap: " << data.bsb << endl;
257 out << "Similarity of parents: " << data.ab << endl;
258 out << "Similarity of query to parentA: " << data.qa << endl;
259 out << "Similarity of query to parentB: " << data.qb << endl;
261 //out << "Per_id(QL,A): " << data.qla << endl;
262 //out << "Per_id(QL,B): " << data.qlb << endl;
263 //out << "Per_id(QR,A): " << data.qra << endl;
264 //out << "Per_id(QR,B): " << data.qrb << endl;
267 out << "DeltaL: " << (data.qla - data.qlb) << endl;
268 out << "DeltaR: " << (data.qra - data.qrb) << endl;
271 catch(exception& e) {
272 errorOut(e, "ChimeraSlayer", "printBlock");
276 //***************************************************************************************************************