5 * Created by westcott on 9/25/09.
6 * Copyright 2009 Pschloss Lab. All rights reserved.
10 #include "chimeraslayer.h"
12 //***************************************************************************************************************
13 ChimeraSlayer::ChimeraSlayer(string filename, string temp) { fastafile = filename; templateFile = temp; }
14 //***************************************************************************************************************
16 ChimeraSlayer::~ChimeraSlayer() {
18 for (int i = 0; i < querySeqs.size(); i++) { delete querySeqs[i]; }
19 for (int i = 0; i < templateSeqs.size(); i++) { delete templateSeqs[i]; }
22 errorOut(e, "ChimeraSlayer", "~ChimeraSlayer");
26 //***************************************************************************************************************
27 void ChimeraSlayer::print(ostream& out) {
31 for (int i = 0; i < querySeqs.size(); i++) {
33 if (chimeraFlags[i] == "yes") {
34 mothurOut(querySeqs[i]->getName() + "\tyes"); mothurOutEndLine();
37 out << querySeqs[i]->getName() << "\tno" << endl;
43 my $div_ratio_QLA_QRB = $data_struct->{div_ratio_QLA_QRB};
44 my $div_ratio_QRA_QLB = $data_struct->{div_ratio_QLB_QRA};
46 my $per_id_QLA = $data_struct->{per_id_QLA};
47 my $per_id_QRB = $data_struct->{per_id_QRB};
48 my $per_id_AB = $data_struct->{per_id_AB};
49 my $per_id_QA = $data_struct->{per_id_QA};
50 my $per_id_QB = $data_struct->{per_id_QB};
51 my $per_id_LAB = $data_struct->{per_id_LAB};
52 my $per_id_RAB = $data_struct->{per_id_RAB};
53 my $per_id_QRA = $data_struct->{per_id_QRA};
54 my $per_id_QLB = $data_struct->{per_id_QLB};
55 my $per_id_QLB_QRA = $data_struct->{per_id_QLB_QRA};
56 my $per_id_QLA_QRB = $data_struct->{per_id_QLA_QRB};
58 my $win_left_end5 = $data_struct->{win_left_end5};
59 my $win_left_end3 = $data_struct->{win_left_end3};
60 my $win_right_end5 = $data_struct->{win_right_end5};
61 my $win_right_end3 = $data_struct->{win_right_end3};
62 my $Q = $data_struct->{query_alignment};
63 my $A = $data_struct->{parent_A_alignment};
64 my $B = $data_struct->{parent_B_alignment};
65 my $BS_A = $data_struct->{BS_A};
66 my $BS_B = $data_struct->{BS_B};
68 my @Q_chars = @{$Q->{align}};
69 my @A_chars = @{$A->{align}};
70 my @B_chars = @{$B->{align}};
72 my $query_acc = $Q->{acc};
73 my $A_acc = $A->{acc};
74 my $B_acc = $B->{acc};
76 my $break_left = $Q->{seqPos}->[$win_left_end3];
77 my $break_right = $Q->{seqPos}->[$win_right_end5];
80 cout << "//\n## CHIMERA\t" << querySeqs[i]->getName() << "\t" << $break_left-$break_right" << endl
81 << "\tDIV_QLARB: ". sprintf("%.3f", $div_ratio_QLA_QRB)
82 << "\tBS_QLARB: " . sprintf("%.2f", $BS_A)
83 << "\tDIV_QRALB: " . sprintf("%.3f", $div_ratio_QRA_QLB)
84 << "\tBS_QRALB: " . sprintf("%.2f", $BS_B)
86 << "\tbreakpoint: $break_left-$break_right\n\n";
90 print " Per_id parents: " . sprintf("%.2f", $per_id_AB) . "\n\n";
91 print " Per_id(Q,A): " . sprintf("%.2f", $per_id_QA) . "\n";
92 print "--------------------------------------------------- A: $A_acc\n"
93 . " " . sprintf("%.2f", $per_id_QLA) . " " . sprintf("%.2f", $per_id_QRA) . "\n"
94 . "~~~~~~~~~~~~~~~~~~~~~~~~\\ /~~~~~~~~~~~~~~~~~~~~~~~~ Q: $query_acc\n"
95 . "DivR: " . sprintf("%.3f", $div_ratio_QLA_QRB) . " BS: " . sprintf("%.2f", $BS_A) . " |\n"
96 . "Per_id(QLA,QRB): " . sprintf("%.2f", $per_id_QLA_QRB) . " |\n"
98 . " (L-AB: " . sprintf("%.2f", $per_id_LAB) . ") | (R-AB: " . sprintf("%.2f", $per_id_RAB) . ")\n"
99 . " WinL:$win_left_end5-$win_left_end3 | WinR:$win_right_end5-$win_right_end3\n"
101 . "Per_id(QLB,QRA): " . sprintf("%.2f", $per_id_QLB_QRA) . " |\n"
102 . "DivR: " . sprintf("%.3f", $div_ratio_QRA_QLB) . " BS: " . sprintf("%.2f", $BS_B) . " |\n"
103 . "~~~~~~~~~~~~~~~~~~~~~~~~/ \\~~~~~~~~~~~~~~~~~~~~~~~~~ Q: $query_acc\n"
104 . " " . sprintf("%.2f", $per_id_QLB) . " " . sprintf("%.2f", $per_id_QRB) . "\n"
105 . "---------------------------------------------------- B: $B_acc\n";
106 print " Per_id(Q,B): ". sprintf("%.2f", $per_id_QB) . "\n\n";
108 my $deltaL = $per_id_QLA - $per_id_QLB;
109 my $deltaR = $per_id_QRA - $per_id_QRB;
111 print "DeltaL: " . sprintf("%.2f", $deltaL) . " DeltaR: " . sprintf("%.2f", $deltaR) . "\n\n";
113 unless ($printAlignmentsFlag) { return; }
116 ## build the left windows:
117 my @Q_left_win = @Q_chars[$win_left_end5..$win_left_end3];
118 my @A_left_win = @A_chars[$win_left_end5..$win_left_end3];
119 my @B_left_win = @B_chars[$win_left_end5..$win_left_end3];
121 &print_alignment($A_acc, \@A_left_win,
122 $query_acc, \@Q_left_win,
123 $B_acc, \@B_left_win);
125 print "\t\t** Breakpoint **\n\n";
127 my @Q_right_win = @Q_chars[$win_right_end5..$win_right_end3];
128 my @A_right_win = @A_chars[$win_right_end5..$win_right_end3];
129 my @B_right_win = @B_chars[$win_right_end5..$win_right_end3];
131 &print_alignment($A_acc, \@A_right_win,
132 $query_acc, \@Q_right_win,
133 $B_acc, \@B_right_win);
144 catch(exception& e) {
145 errorOut(e, "ChimeraSlayer", "print");
150 //***************************************************************************************************************
151 void ChimeraSlayer::getChimeras() {
154 //read in query sequences and subject sequences
155 mothurOut("Reading sequences and template file... "); cout.flush();
156 querySeqs = readSeqs(fastafile);
157 templateSeqs = readSeqs(templateFile);
158 mothurOut("Done."); mothurOutEndLine();
160 int numSeqs = querySeqs.size();
162 chimeraResults.resize(numSeqs);
163 chimeraFlags.resize(numSeqs, "no");
165 //break up file if needed
166 int linesPerProcess = numSeqs / processors ;
168 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
169 //find breakup of sequences for all times we will Parallelize
170 if (processors == 1) { lines.push_back(new linePair(0, numSeqs)); }
173 for (int i = 0; i < (processors-1); i++) {
174 lines.push_back(new linePair((i*linesPerProcess), ((i*linesPerProcess) + linesPerProcess)));
176 //this is necessary to get remainder of processors / numSeqs so you don't miss any lines at the end
177 int i = processors - 1;
178 lines.push_back(new linePair((i*linesPerProcess), numSeqs));
181 lines.push_back(new linePair(0, numSeqs));
184 if (seqMask != "") { decalc = new DeCalculator(); } //to use below
186 //referenceSeqs, numWanted, matchScore, misMatchPenalty, divR, minSimilarity
187 maligner = new Maligner(templateSeqs, numWanted, match, misMatch, 1.01, minSim);
188 slayer = new Slayer(window, increment, minSim, divR);
190 for (int i = 0; i < querySeqs.size(); i++) {
192 string chimeraFlag = maligner->getResults(querySeqs[i]);
193 float percentIdentical = maligner->getPercentID();
194 vector<results> Results = maligner->getOutput();
196 //cout << querySeqs[i]->getName() << '\t' << chimeraFlag << '\t' << percentIdentical << endl;
198 for (int j = 0; j < Results.size(); j++) {
199 //cout << "regionStart = " << Results[j].regionStart << "\tRegionEnd = " << Results[j].regionEnd << "\tName = " << Results[j].parent << "\tPerQP = " << Results[j].queryToParent << "\tLocalPerQP = " << Results[j].queryToParentLocal << "\tdivR = " << Results[j].divR << endl;
202 if (chimeraFlag == "yes") {
204 //get sequence that were given from maligner results
205 vector<SeqDist> seqs;
206 for (int j = 0; j < Results.size(); j++) {
207 Sequence* seq = getSequence(Results[j].parent); //makes copy so you can filter and mask and not effect template
209 //seq = NULL if error occurred in getSequence
210 if (seq == NULL) { break; }
214 member.dist = (Results[j].regionEnd - Results[j].regionStart + 1) * Results[j].queryToParentLocal;
215 seqs.push_back(member);
219 //limit number of parents to explore - default 5
220 if (Results.size() > parents) {
222 sort(seqs.begin(), seqs.end(), compareSeqDist);
223 //prioritize larger more similiar sequence fragments
224 reverse(seqs.begin(), seqs.end());
226 for (int k = seqs.size()-1; k > (parents-1); k--) {
232 //put seqs into vector to send to slayer
233 vector<Sequence*> seqsForSlayer;
234 for (int k = 0; k < seqs.size(); k++) { seqsForSlayer.push_back(seqs[k].seq); }
236 //mask then send to slayer...
238 decalc->setMask(seqMask);
241 decalc->runMask(querySeqs[i]);
244 for (int k = 0; k < seqsForSlayer.size(); k++) {
245 decalc->runMask(seqsForSlayer[k]);
251 chimeraFlags[i] = slayer->getResults(querySeqs[i], seqsForSlayer);
252 chimeraResults[i] = slayer->getOutput();
255 for (int k = 0; k < seqs.size(); k++) { delete seqs[k].seq; }
260 for (int i = 0; i < lines.size(); i++) { delete lines[i]; }
268 catch(exception& e) {
269 errorOut(e, "ChimeraSlayer", "getChimeras");
273 //***************************************************************************************************************
274 Sequence* ChimeraSlayer::getSequence(string name) {
278 //look through templateSeqs til you find it
280 for (int i = 0; i < templateSeqs.size(); i++) {
281 if (name == templateSeqs[i]->getName()) {
287 if(spot == -1) { mothurOut("Error: Could not find sequence in chimeraSlayer."); mothurOutEndLine(); return NULL; }
289 temp = new Sequence(templateSeqs[spot]->getName(), templateSeqs[spot]->getAligned());
293 catch(exception& e) {
294 errorOut(e, "ChimeraSlayer", "getSequence");
298 //***************************************************************************************************************