5 * Created by westcott on 2/12/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "chimerarealigner.h"
11 #include "needlemanoverlap.hpp"
14 //***************************************************************************************************************
15 ChimeraReAligner::ChimeraReAligner() { m = MothurOut::getInstance(); }
16 //***************************************************************************************************************
17 ChimeraReAligner::~ChimeraReAligner() {}
18 //***************************************************************************************************************
19 void ChimeraReAligner::reAlign(Sequence* query, vector<string> parents) {
23 // if (parents.size() != 0) {
24 // vector<Sequence*> queryParts;
25 // vector<Sequence*> parentParts; //queryParts[0] relates to parentParts[0]
27 // string qAligned = query->getAligned();
28 // string newQuery = "";
30 // //sort parents by region start
31 // sort(parents.begin(), parents.end(), compareRegionStart);
33 // //make sure you don't cutoff beginning of query
34 // if (parents[0].nastRegionStart > 0) { newQuery += qAligned.substr(0, parents[0].nastRegionStart); }
37 // //take query and break apart into pieces using breakpoints given by results of parents
38 // for (int i = 0; i < parents.size(); i++) {
39 // int length = parents[i].nastRegionEnd - parents[i].nastRegionStart+1;
40 // string q = qAligned.substr(parents[i].nastRegionStart, length);
42 // Sequence* queryFrag = new Sequence(query->getName(), q);
43 // queryParts.push_back(queryFrag);
45 // string p = parents[i].parentAligned;
46 // p = p.substr(parents[i].nastRegionStart, length);
47 // Sequence* parent = new Sequence(parents[i].parent, p);
48 // parentParts.push_back(parent);
50 // if (queryFrag->getUnaligned().length() > longest) { longest = queryFrag->getUnaligned().length(); }
51 // if (parent->getUnaligned().length() > longest) { longest = parent->getUnaligned().length(); }
54 // //align each peice to correct parent from results
55 // for (int i = 0; i < queryParts.size(); i++) {
56 // if ((queryParts[i]->getUnaligned() == "") || (parentParts[i]->getUnaligned() == "")) {;}
58 // Alignment* alignment = new NeedlemanOverlap(-2.0, 1.0, -1.0, longest+1); //default gapopen, match, mismatch, longestbase
60 // Nast nast(alignment, queryParts[i], parentParts[i]);
65 // //recombine pieces to form new query sequence
66 // for (int i = 0; i < queryParts.size(); i++) {
67 // //sometimes the parent regions do not meet, for example region 1 may end at 1000 and region 2 starts at 1100.
68 // //we don't want to loose length so in this case we will leave query alone
70 // int space = parents[i].nastRegionStart - parents[i-1].nastRegionEnd - 1;
71 // if (space > 0) { //they don't meet and we need to add query piece
72 // string q = qAligned.substr(parents[i-1].nastRegionEnd+1, space);
77 // newQuery += queryParts[i]->getAligned();
80 // //make sure you don't cutoff end of query
81 // if (parents[parents.size()-1].nastRegionEnd < (qAligned.length()-1)) { newQuery += qAligned.substr(parents[parents.size()-1].nastRegionEnd+1); }
83 // query->setAligned(newQuery);
86 // for (int i = 0; i < queryParts.size(); i++) { delete queryParts[i]; }
87 // for (int i = 0; i < parentParts.size(); i++) { delete parentParts[i]; }
89 // } //else leave query alone, you have bigger problems...
91 if(parents.size() != 0){
93 alignmentLength = query->getAlignLength(); //x
94 int queryUnalignedLength = query->getNumBases(); //y
96 buildTemplateProfile(parents);
98 createAlignMatrix(queryUnalignedLength, alignmentLength);
99 fillAlignMatrix(query->getUnaligned());
100 query->setAligned(getNewAlignment(query->getUnaligned()));
104 catch(exception& e) {
105 m->errorOut(e, "ChimeraReAligner", "reAlign");
109 /***************************************************************************************************************/
111 void ChimeraReAligner::buildTemplateProfile(vector<string> parents) {
113 int numParents = parents.size();
115 profile.resize(alignmentLength);
117 for(int i=0;i<numParents;i++){
118 string seq = parents[i];
120 for(int j=0;j<alignmentLength;j++){
123 if(seq[j] == 'A') { profile[j].A++; }
124 else if(seq[j] == 'T') { profile[j].T++; }
125 else if(seq[j] == 'G') { profile[j].G++; }
126 else if(seq[j] == 'C') { profile[j].C++; }
127 else if(seq[j] == '-') { profile[j].Gap++; }
128 else if(seq[j] == '.') { profile[j].Gap++; }
129 // else { profile[j].A++; }
135 for(int i=0;i<alignmentLength;i++){
136 profile[i].Chars = profile[i].A + profile[i].T + profile[i].G + profile[i].C;
140 catch(exception& e) {
141 m->errorOut(e, "ChimeraReAligner", "buildTemplateProfile");
147 /***************************************************************************************************************/
149 void ChimeraReAligner::createAlignMatrix(int queryUnalignedLength, int alignmentLength){
152 alignMatrix.resize(alignmentLength+1);
153 for(int i=0;i<=alignmentLength;i++){
154 alignMatrix[i].resize(queryUnalignedLength+1);
157 for(int i=1;i<=alignmentLength;i++) { alignMatrix[i][0].direction = 'l'; }
158 for(int j=1;j<=queryUnalignedLength;j++){ alignMatrix[0][j].direction = 'u'; }
160 catch(exception& e) {
161 m->errorOut(e, "ChimeraReAligner", "createAlignMatrix");
166 /***************************************************************************************************************/
168 void ChimeraReAligner::fillAlignMatrix(string query){
172 int nrows = alignMatrix.size()-1;
173 int ncols = alignMatrix[0].size()-1;
175 for(int i=1;i<=nrows;i++){
177 bases p = profile[i-1];
178 int numChars = p.Chars;
180 for(int j=1;j<=ncols;j++){
184 // score it for if there was a match
185 int maxScore = calcMatchScore(p, q) + alignMatrix[i-1][j-1].score;
186 int maxDirection = 'd';
188 // score it for if there was a gap in the query
189 int score = alignMatrix[i-1][j].score + (numChars * GAP);
190 if (score > maxScore) {
195 alignMatrix[i][j].score = maxScore;
196 alignMatrix[i][j].direction = maxDirection;
201 catch(exception& e) {
202 m->errorOut(e, "ChimeraReAligner", "fillAlignMatrix");
207 /***************************************************************************************************************/
209 int ChimeraReAligner::calcMatchScore(bases p, char q){
217 if(q == 'G') { score = (MATCH * p.G + MISMATCH * (p.A + p.T + p.C + p.Gap)); }
218 else if(q == 'A') { score = (MATCH * p.A + MISMATCH * (p.G + p.T + p.C + p.Gap)); }
219 else if(q == 'T') { score = (MATCH * p.T + MISMATCH * (p.G + p.A + p.C + p.Gap)); }
220 else if(q == 'C') { score = (MATCH * p.C + MISMATCH * (p.G + p.A + p.T + p.Gap)); }
221 else { score = (MATCH * p.A + MISMATCH * (p.G + p.T + p.C + p.Gap)); }
225 catch(exception& e) {
226 m->errorOut(e, "ChimeraReAligner", "calcMatchScore");
231 /***************************************************************************************************************/
233 string ChimeraReAligner::getNewAlignment(string query){
235 string queryAlignment(alignmentLength, '.');
236 string referenceAlignment(alignmentLength, '.');
239 int maxScore = -99999999;
241 int nrows = alignMatrix.size()-1;
242 int ncols = alignMatrix[0].size()-1;
247 for(int i=1;i<=nrows;i++){
248 int score = alignMatrix[i][ncols].score;
249 if (score > maxScore) {
256 for(int j=1;j<=ncols;j++){
257 int score = alignMatrix[nrows][j].score;
258 if (score > maxScore) {
265 int currentRow = bestRow;
266 int currentCol = bestCol;
268 int alignmentPosition = 0;
269 if(currentRow < alignmentLength){
270 for(int i=alignmentLength;i>currentRow;i--){
275 AlignCell c = alignMatrix[currentRow][currentCol];
276 while(c.direction != 'x'){
280 if(c.direction == 'd'){
281 q = query[currentCol-1];
287 else if (c.direction == 'u') {
290 else if(c.direction == 'l'){
292 if(currentCol == 0) { gapChar = '.'; }
293 else { gapChar = '-'; }
299 cout << "you shouldn't be here..." << endl;
302 queryAlignment[alignmentPosition] = q;
304 c = alignMatrix[currentRow][currentCol];
307 // need to reverse the string
309 for(int i=alignmentLength-1;i>=0;i--){
310 flipSeq += queryAlignment[i];
315 catch(exception& e) {
316 m->errorOut(e, "ChimeraReAligner", "getNewAlignment");
321 /***************************************************************************************************************/
323 // Sequence* ChimeraReAligner::getSequence(string name) {
327 // //look through templateSeqs til you find it
329 // for (int i = 0; i < templateSeqs.size(); i++) {
330 // if (name == templateSeqs[i]->getName()) {
336 // if(spot == -1) { m->mothurOut("Error: Could not find sequence."); m->mothurOutEndLine(); return NULL; }
338 // temp = new Sequence(templateSeqs[spot]->getName(), templateSeqs[spot]->getAligned());
342 // catch(exception& e) {
343 // m->errorOut(e, "ChimeraReAligner", "getSequence");
348 //***************************************************************************************************************/