5 * Created by westcott on 2/12/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "chimerarealigner.h"
11 #include "needlemanoverlap.hpp"
14 //***************************************************************************************************************
15 ChimeraReAligner::ChimeraReAligner() { m = MothurOut::getInstance(); }
16 //***************************************************************************************************************
17 ChimeraReAligner::~ChimeraReAligner() {}
18 //***************************************************************************************************************
19 void ChimeraReAligner::reAlign(Sequence* query, vector<string> parents) {
23 // if (parents.size() != 0) {
24 // vector<Sequence*> queryParts;
25 // vector<Sequence*> parentParts; //queryParts[0] relates to parentParts[0]
27 // string qAligned = query->getAligned();
28 // string newQuery = "";
30 // //sort parents by region start
31 // sort(parents.begin(), parents.end(), compareRegionStart);
33 // //make sure you don't cutoff beginning of query
34 // if (parents[0].nastRegionStart > 0) { newQuery += qAligned.substr(0, parents[0].nastRegionStart); }
37 // //take query and break apart into pieces using breakpoints given by results of parents
38 // for (int i = 0; i < parents.size(); i++) {
39 // int length = parents[i].nastRegionEnd - parents[i].nastRegionStart+1;
40 // string q = qAligned.substr(parents[i].nastRegionStart, length);
42 // Sequence* queryFrag = new Sequence(query->getName(), q);
43 // queryParts.push_back(queryFrag);
45 // string p = parents[i].parentAligned;
46 // p = p.substr(parents[i].nastRegionStart, length);
47 // Sequence* parent = new Sequence(parents[i].parent, p);
48 // parentParts.push_back(parent);
50 // if (queryFrag->getUnaligned().length() > longest) { longest = queryFrag->getUnaligned().length(); }
51 // if (parent->getUnaligned().length() > longest) { longest = parent->getUnaligned().length(); }
54 // //align each peice to correct parent from results
55 // for (int i = 0; i < queryParts.size(); i++) {
56 // if ((queryParts[i]->getUnaligned() == "") || (parentParts[i]->getUnaligned() == "")) {;}
58 // Alignment* alignment = new NeedlemanOverlap(-2.0, 1.0, -1.0, longest+1); //default gapopen, match, mismatch, longestbase
60 // Nast nast(alignment, queryParts[i], parentParts[i]);
65 // //recombine pieces to form new query sequence
66 // for (int i = 0; i < queryParts.size(); i++) {
67 // //sometimes the parent regions do not meet, for example region 1 may end at 1000 and region 2 starts at 1100.
68 // //we don't want to loose length so in this case we will leave query alone
70 // int space = parents[i].nastRegionStart - parents[i-1].nastRegionEnd - 1;
71 // if (space > 0) { //they don't meet and we need to add query piece
72 // string q = qAligned.substr(parents[i-1].nastRegionEnd+1, space);
77 // newQuery += queryParts[i]->getAligned();
80 // //make sure you don't cutoff end of query
81 // if (parents[parents.size()-1].nastRegionEnd < (qAligned.length()-1)) { newQuery += qAligned.substr(parents[parents.size()-1].nastRegionEnd+1); }
83 // query->setAligned(newQuery);
86 // for (int i = 0; i < queryParts.size(); i++) { delete queryParts[i]; }
87 // for (int i = 0; i < parentParts.size(); i++) { delete parentParts[i]; }
89 // } //else leave query alone, you have bigger problems...
91 if(parents.size() != 0){
93 alignmentLength = query->getAlignLength(); //x
94 int queryUnalignedLength = query->getNumBases(); //y
97 buildTemplateProfile(parents);
99 createAlignMatrix(queryUnalignedLength, alignmentLength);
100 fillAlignMatrix(query->getUnaligned());
101 query->setAligned(getNewAlignment(query->getUnaligned()));
105 catch(exception& e) {
106 m->errorOut(e, "ChimeraReAligner", "reAlign");
110 /***************************************************************************************************************/
112 void ChimeraReAligner::buildTemplateProfile(vector<string> parents) {
114 int numParents = parents.size();
116 profile.resize(alignmentLength);
118 for(int i=0;i<numParents;i++){
119 string seq = parents[i];
121 for(int j=0;j<alignmentLength;j++){
124 if(seq[j] == 'A') { profile[j].A++; }
125 else if(seq[j] == 'T') { profile[j].T++; }
126 else if(seq[j] == 'G') { profile[j].G++; }
127 else if(seq[j] == 'C') { profile[j].C++; }
128 else if(seq[j] == '-') { profile[j].Gap++; }
129 else if(seq[j] == '.') { profile[j].Gap++; }
130 else { profile[j].A++; }
136 for(int i=0;i<alignmentLength;i++){
137 profile[i].Chars = profile[i].A + profile[i].T + profile[i].G + profile[i].C;
141 catch(exception& e) {
142 m->errorOut(e, "ChimeraReAligner", "buildTemplateProfile");
148 /***************************************************************************************************************/
150 void ChimeraReAligner::createAlignMatrix(int queryUnalignedLength, int alignmentLength){
153 alignMatrix.resize(alignmentLength+1);
154 for(int i=0;i<=alignmentLength;i++){
155 alignMatrix[i].resize(queryUnalignedLength+1);
158 for(int i=1;i<=alignmentLength;i++) { alignMatrix[i][0].direction = 'l'; }
159 for(int j=1;j<=queryUnalignedLength;j++){ alignMatrix[0][j].direction = 'u'; }
161 catch(exception& e) {
162 m->errorOut(e, "ChimeraReAligner", "createAlignMatrix");
167 /***************************************************************************************************************/
169 void ChimeraReAligner::fillAlignMatrix(string query){
173 int nrows = alignMatrix.size()-1;
174 int ncols = alignMatrix[0].size()-1;
176 for(int i=1;i<=nrows;i++){
178 bases p = profile[i-1];
179 int numChars = p.Chars;
181 for(int j=1;j<=ncols;j++){
185 // score it for if there was a match
186 int maxScore = calcMatchScore(p, q) + alignMatrix[i-1][j-1].score;
187 int maxDirection = 'd';
189 // score it for if there was a gap in the query
190 int score = alignMatrix[i-1][j].score + (numChars * GAP);
191 if (score > maxScore) {
196 alignMatrix[i][j].score = maxScore;
197 alignMatrix[i][j].direction = maxDirection;
202 catch(exception& e) {
203 m->errorOut(e, "ChimeraReAligner", "fillAlignMatrix");
208 /***************************************************************************************************************/
210 int ChimeraReAligner::calcMatchScore(bases p, char q){
218 if(q == 'G') { score = (MATCH * p.G + MISMATCH * (p.A + p.T + p.C + p.Gap)); }
219 else if(q == 'A') { score = (MATCH * p.A + MISMATCH * (p.G + p.T + p.C + p.Gap)); }
220 else if(q == 'T') { score = (MATCH * p.T + MISMATCH * (p.G + p.A + p.C + p.Gap)); }
221 else if(q == 'C') { score = (MATCH * p.C + MISMATCH * (p.G + p.A + p.T + p.Gap)); }
222 else { score = (MATCH * p.A + MISMATCH * (p.G + p.T + p.C + p.Gap)); }
226 catch(exception& e) {
227 m->errorOut(e, "ChimeraReAligner", "calcMatchScore");
232 /***************************************************************************************************************/
234 string ChimeraReAligner::getNewAlignment(string query){
236 string queryAlignment(alignmentLength, '.');
237 string referenceAlignment(alignmentLength, '.');
240 int maxScore = -99999999;
242 int nrows = alignMatrix.size()-1;
243 int ncols = alignMatrix[0].size()-1;
248 for(int i=1;i<=nrows;i++){
249 int score = alignMatrix[i][ncols].score;
250 if (score > maxScore) {
257 for(int j=1;j<=ncols;j++){
258 int score = alignMatrix[nrows][j].score;
259 if (score > maxScore) {
266 int currentRow = bestRow;
267 int currentCol = bestCol;
269 int alignmentPosition = 0;
270 if(currentRow < alignmentLength){
271 for(int i=alignmentLength;i>currentRow;i--){
276 AlignCell c = alignMatrix[currentRow][currentCol];
277 while(c.direction != 'x'){
281 if(c.direction == 'd'){
282 q = query[currentCol-1];
288 else if (c.direction == 'u') {
291 else if(c.direction == 'l'){
293 if(currentCol == 0) { gapChar = '.'; }
294 else { gapChar = '-'; }
300 cout << "you shouldn't be here..." << endl;
303 queryAlignment[alignmentPosition] = q;
305 c = alignMatrix[currentRow][currentCol];
308 // need to reverse the string
310 for(int i=alignmentLength-1;i>=0;i--){
311 flipSeq += queryAlignment[i];
316 catch(exception& e) {
317 m->errorOut(e, "ChimeraReAligner", "getNewAlignment");
322 /***************************************************************************************************************/
324 // Sequence* ChimeraReAligner::getSequence(string name) {
328 // //look through templateSeqs til you find it
330 // for (int i = 0; i < templateSeqs.size(); i++) {
331 // if (name == templateSeqs[i]->getName()) {
337 // if(spot == -1) { m->mothurOut("Error: Could not find sequence."); m->mothurOutEndLine(); return NULL; }
339 // temp = new Sequence(templateSeqs[spot]->getName(), templateSeqs[spot]->getAligned());
343 // catch(exception& e) {
344 // m->errorOut(e, "ChimeraReAligner", "getSequence");
349 //***************************************************************************************************************/