5 * Created by westcott on 9/25/09.
6 * Copyright 2009 Pschloss Lab. All rights reserved.
10 #include "chimeraslayer.h"
12 //***************************************************************************************************************
13 ChimeraSlayer::ChimeraSlayer(string filename, string temp) { fastafile = filename; templateFile = temp; }
14 //***************************************************************************************************************
16 ChimeraSlayer::~ChimeraSlayer() {
18 for (int i = 0; i < querySeqs.size(); i++) { delete querySeqs[i]; }
19 for (int i = 0; i < templateSeqs.size(); i++) { delete templateSeqs[i]; }
22 errorOut(e, "ChimeraSlayer", "~ChimeraSlayer");
26 //***************************************************************************************************************
27 void ChimeraSlayer::print(ostream& out) {
30 mothurOut("Only reporting sequence supported by 90% of bootstrapped results.");
33 for (int i = 0; i < querySeqs.size(); i++) {
35 if (chimeraFlags[i] == "yes") {
37 if ((chimeraResults[i][0].bsa >= 90.0) || (chimeraResults[i][0].bsb >= 90.0)) {
38 mothurOut(querySeqs[i]->getName() + "\tyes"); mothurOutEndLine();
39 out << querySeqs[i]->getName() << "\tyes" << endl;
41 out << querySeqs[i]->getName() << "\tyes" << endl;
42 //mothurOut(querySeqs[i]->getName() + "\tno"); mothurOutEndLine();
45 printBlock(chimeraResults[i][0], out, i);
49 out << querySeqs[i]->getName() << "\tno" << endl;
50 //mothurOut(querySeqs[i]->getName() + "\tno"); mothurOutEndLine();
56 errorOut(e, "ChimeraSlayer", "print");
61 //***************************************************************************************************************
62 int ChimeraSlayer::getChimeras() {
65 //read in query sequences and subject sequences
66 mothurOut("Reading sequences and template file... "); cout.flush();
67 querySeqs = readSeqs(fastafile);
68 templateSeqs = readSeqs(templateFile);
69 mothurOut("Done."); mothurOutEndLine();
71 int numSeqs = querySeqs.size();
73 if (unaligned) { mothurOut("Your sequences need to be aligned when you use the chimeraslayer method."); mothurOutEndLine(); return 1; }
75 chimeraResults.resize(numSeqs);
76 chimeraFlags.resize(numSeqs, "no");
77 spotMap.resize(numSeqs);
79 //break up file if needed
80 int linesPerProcess = numSeqs / processors ;
82 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
83 //find breakup of sequences for all times we will Parallelize
84 if (processors == 1) { lines.push_back(new linePair(0, numSeqs)); }
87 for (int i = 0; i < (processors-1); i++) {
88 lines.push_back(new linePair((i*linesPerProcess), ((i*linesPerProcess) + linesPerProcess)));
90 //this is necessary to get remainder of processors / numSeqs so you don't miss any lines at the end
91 int i = processors - 1;
92 lines.push_back(new linePair((i*linesPerProcess), numSeqs));
95 lines.push_back(new linePair(0, numSeqs));
98 if (seqMask != "") { decalc = new DeCalculator(); } //to use below
101 for (int j = 0; j < numSeqs; j++) {
102 for (int i = 0; i < querySeqs[0]->getAligned().length(); i++) {
107 //referenceSeqs, numWanted, matchScore, misMatchPenalty, divR, minSimilarity
108 maligner = new Maligner(templateSeqs, numWanted, match, misMatch, 1.01, minSim);
109 slayer = new Slayer(window, increment, minSim, divR, iters);
111 for (int i = 0; i < querySeqs.size(); i++) {
113 string chimeraFlag = maligner->getResults(querySeqs[i]);
114 //float percentIdentical = maligner->getPercentID();
115 vector<results> Results = maligner->getOutput();
117 cout << "Processing sequence: " << i+1 << endl;
119 //for (int j = 0; j < Results.size(); j++) {
120 //cout << "regionStart = " << Results[j].regionStart << "\tRegionEnd = " << Results[j].regionEnd << "\tName = " << Results[j].parent << "\tPerQP = " << Results[j].queryToParent << "\tLocalPerQP = " << Results[j].queryToParentLocal << "\tdivR = " << Results[j].divR << endl;
123 //if (chimeraFlag == "yes") {
125 //get sequence that were given from maligner results
126 vector<SeqDist> seqs;
127 map<string, float> removeDups;
128 map<string, float>::iterator itDup;
129 for (int j = 0; j < Results.size(); j++) {
130 float dist = (Results[j].regionEnd - Results[j].regionStart + 1) * Results[j].queryToParentLocal;
131 //only add if you are not a duplicate
132 itDup = removeDups.find(Results[j].parent);
133 if (itDup == removeDups.end()) { //this is not duplicate
134 removeDups[Results[j].parent] = dist;
135 }else if (dist > itDup->second) { //is this a stronger number for this parent
136 removeDups[Results[j].parent] = dist;
140 for (itDup = removeDups.begin(); itDup != removeDups.end(); itDup++) {
141 Sequence* seq = getSequence(itDup->first); //makes copy so you can filter and mask and not effect template
145 member.dist = itDup->second;
147 seqs.push_back(member);
150 //limit number of parents to explore - default 5
151 if (Results.size() > parents) {
153 sort(seqs.begin(), seqs.end(), compareSeqDist);
154 //prioritize larger more similiar sequence fragments
155 reverse(seqs.begin(), seqs.end());
157 for (int k = seqs.size()-1; k > (parents-1); k--) {
163 //put seqs into vector to send to slayer
164 vector<Sequence*> seqsForSlayer;
165 for (int k = 0; k < seqs.size(); k++) { seqsForSlayer.push_back(seqs[k].seq); }
167 //string name = querySeqs[i]->getName();
168 //cout << name << endl;
169 //name = name.substr(name.find_first_of("|")+1);
170 //cout << name << endl;
171 //name = name.substr(name.find_first_of("|")+1);
172 //cout << name << endl;
173 //name = name.substr(0, name.find_last_of("|"));
174 //cout << name << endl;
175 //string filename = name + ".seqsforslayer";
176 //openOutputFile(filename, out);
177 //for (int u = 0; u < seqsForSlayer.size(); u++) { seqsForSlayer[u]->printSequence(out); }
179 //filename = name + ".fasta";
180 //openOutputFile(filename, out);
181 //q->printSequence(out);
185 //mask then send to slayer...
187 decalc->setMask(seqMask);
190 decalc->runMask(querySeqs[i]);
193 for (int k = 0; k < seqsForSlayer.size(); k++) {
194 decalc->runMask(seqsForSlayer[k]);
197 for (int i = 0; i < numSeqs; i++) {
198 spotMap[i] = decalc->getMaskMap();
204 chimeraFlags[i] = slayer->getResults(querySeqs[i], seqsForSlayer);
205 chimeraResults[i] = slayer->getOutput();
208 for (int k = 0; k < seqs.size(); k++) { delete seqs[k].seq; }
213 for (int i = 0; i < lines.size(); i++) { delete lines[i]; }
221 catch(exception& e) {
222 errorOut(e, "ChimeraSlayer", "getChimeras");
226 //***************************************************************************************************************
227 Sequence* ChimeraSlayer::getSequence(string name) {
231 //look through templateSeqs til you find it
233 for (int i = 0; i < templateSeqs.size(); i++) {
234 if (name == templateSeqs[i]->getName()) {
240 if(spot == -1) { mothurOut("Error: Could not find sequence in chimeraSlayer."); mothurOutEndLine(); return NULL; }
242 temp = new Sequence(templateSeqs[spot]->getName(), templateSeqs[spot]->getAligned());
246 catch(exception& e) {
247 errorOut(e, "ChimeraSlayer", "getSequence");
251 //***************************************************************************************************************
252 void ChimeraSlayer::printBlock(data_struct data, ostream& out, int i){
255 out << "parentA: " << data.parentA.getName() << " parentB: " << data.parentB.getName() << endl;
256 out << "Left Window: " << spotMap[i][data.winLStart] << " " << spotMap[i][data.winLEnd] << endl;
257 out << "Right Window: " << spotMap[i][data.winRStart] << " " << spotMap[i][data.winREnd] << endl;
259 out << "Divergence of Query to Leftside ParentA and Rightside ParentB: " << data.divr_qla_qrb << '\t' << "Bootstrap: " << data.bsa << endl;
260 out << "Divergence of Query to Rightside ParentA and Leftside ParentB: " << data.divr_qlb_qra << '\t' << "Bootstrap: " << data.bsb << endl;
262 out << "Similarity of parents: " << data.ab << endl;
263 out << "Similarity of query to parentA: " << data.qa << endl;
264 out << "Similarity of query to parentB: " << data.qb << endl;
266 //out << "Per_id(QL,A): " << data.qla << endl;
267 //out << "Per_id(QL,B): " << data.qlb << endl;
268 //out << "Per_id(QR,A): " << data.qra << endl;
269 //out << "Per_id(QR,B): " << data.qrb << endl;
272 out << "DeltaL: " << (data.qla - data.qlb) << endl;
273 out << "DeltaR: " << (data.qra - data.qrb) << endl;
276 catch(exception& e) {
277 errorOut(e, "ChimeraSlayer", "printBlock");
281 //***************************************************************************************************************