]> git.donarmstrong.com Git - mothur.git/blob - chimerarealigner.cpp
incorporation of nast-ier code
[mothur.git] / chimerarealigner.cpp
1 /*
2  *  chimerarealigner.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 2/12/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "chimerarealigner.h"
11 #include "needlemanoverlap.hpp"
12 #include "nast.hpp"
13
14 //***************************************************************************************************************
15 ChimeraReAligner::ChimeraReAligner()  {  m = MothurOut::getInstance(); }
16 //***************************************************************************************************************
17 ChimeraReAligner::~ChimeraReAligner() {}        
18 //***************************************************************************************************************
19 void ChimeraReAligner::reAlign(Sequence* query, vector<string> parents) {
20
21         
22         try {
23 //              if (parents.size() != 0) {
24 //                      vector<Sequence*> queryParts;
25 //                      vector<Sequence*> parentParts;  //queryParts[0] relates to parentParts[0]
26 //                      
27 //                      string qAligned = query->getAligned();
28 //                      string newQuery = "";
29 //              
30 //                      //sort parents by region start
31 //                      sort(parents.begin(), parents.end(), compareRegionStart);
32 //
33 //                      //make sure you don't cutoff beginning of query 
34 //                      if (parents[0].nastRegionStart > 0) {  newQuery += qAligned.substr(0, parents[0].nastRegionStart);  }
35 //                      int longest = 0;
36 //
37 //                      //take query and break apart into pieces using breakpoints given by results of parents
38 //                      for (int i = 0; i < parents.size(); i++) {
39 //                              int length = parents[i].nastRegionEnd - parents[i].nastRegionStart+1;
40 //                              string q = qAligned.substr(parents[i].nastRegionStart, length);
41 //      
42 //                              Sequence* queryFrag = new Sequence(query->getName(), q);
43 //                              queryParts.push_back(queryFrag);
44 //              
45 //                              string p = parents[i].parentAligned;
46 //                              p = p.substr(parents[i].nastRegionStart, length);
47 //                              Sequence* parent = new Sequence(parents[i].parent, p);
48 //                              parentParts.push_back(parent);
49 //
50 //                              if (queryFrag->getUnaligned().length() > longest)       { longest = queryFrag->getUnaligned().length(); }
51 //                              if (parent->getUnaligned().length() > longest)  { longest = parent->getUnaligned().length();    }
52 //                      }
53 //
54 //                      //align each peice to correct parent from results
55 //                      for (int i = 0; i < queryParts.size(); i++) {
56 //                              if ((queryParts[i]->getUnaligned() == "") || (parentParts[i]->getUnaligned() == "")) {;}
57 //                              else {
58 //                                      Alignment* alignment = new NeedlemanOverlap(-2.0, 1.0, -1.0, longest+1); //default gapopen, match, mismatch, longestbase
59 //                              
60 //                                      Nast nast(alignment, queryParts[i], parentParts[i]);
61 //                                      delete alignment;
62 //                              }
63 //                      }
64 //
65 //                      //recombine pieces to form new query sequence
66 //                      for (int i = 0; i < queryParts.size(); i++) {
67 //                              //sometimes the parent regions do not meet, for example region 1 may end at 1000 and region 2 starts at 1100.  
68 //                              //we don't want to loose length so in this case we will leave query alone
69 //                              if (i != 0) {
70 //                                      int space = parents[i].nastRegionStart - parents[i-1].nastRegionEnd - 1;
71 //                                      if (space > 0) { //they don't meet and we need to add query piece
72 //                                              string q = qAligned.substr(parents[i-1].nastRegionEnd+1, space);
73 //                                              newQuery += q;
74 //                                      }
75 //                              }
76 //
77 //                              newQuery += queryParts[i]->getAligned();
78 //                      }
79 //                      
80 //                      //make sure you don't cutoff end of query 
81 //                      if (parents[parents.size()-1].nastRegionEnd < (qAligned.length()-1)) {  newQuery += qAligned.substr(parents[parents.size()-1].nastRegionEnd+1);  }
82 //                      
83 //                      query->setAligned(newQuery);
84 //
85 //                      //free memory
86 //                      for (int i = 0; i < queryParts.size(); i++) { delete queryParts[i];  }
87 //                      for (int i = 0; i < parentParts.size(); i++) { delete parentParts[i];  }
88 //                      
89 //              } //else leave query alone, you have bigger problems...
90
91                 if(parents.size() != 0){
92
93                         alignmentLength = query->getAlignLength();      //x
94                         int queryUnalignedLength = query->getNumBases();        //y
95                         
96                         
97                         buildTemplateProfile(parents);
98                         
99                         createAlignMatrix(queryUnalignedLength, alignmentLength);
100                         fillAlignMatrix(query->getUnaligned());
101                         query->setAligned(getNewAlignment(query->getUnaligned()));
102                 }
103                 
104         }
105         catch(exception& e) {
106                 m->errorOut(e, "ChimeraReAligner", "reAlign");
107                 exit(1);
108         }
109 }
110 /***************************************************************************************************************/
111
112 void ChimeraReAligner::buildTemplateProfile(vector<string> parents) {
113         try{    
114                 int numParents = parents.size();
115
116                 profile.resize(alignmentLength);
117                                 
118                 for(int i=0;i<numParents;i++){
119                         string seq = parents[i];
120
121                         for(int j=0;j<alignmentLength;j++){
122                                         
123                                 
124                                 if(seq[j] == 'A')               {       profile[j].A++;         }
125                                 else if(seq[j] == 'T')  {       profile[j].T++;         }
126                                 else if(seq[j] == 'G')  {       profile[j].G++;         }
127                                 else if(seq[j] == 'C')  {       profile[j].C++;         }
128                                 else if(seq[j] == '-')  {       profile[j].Gap++;       }
129                                 else if(seq[j] == '.')  {       profile[j].Gap++;       }
130                                 else                                    {       profile[j].A++;         }
131                                 
132                         }
133                 }
134                 
135
136                 for(int i=0;i<alignmentLength;i++){
137                         profile[i].Chars = profile[i].A + profile[i].T + profile[i].G + profile[i].C;
138                 }
139                 
140         }
141         catch(exception& e) {
142                 m->errorOut(e, "ChimeraReAligner", "buildTemplateProfile");
143                 exit(1);
144         }
145 }
146         
147  
148 /***************************************************************************************************************/
149
150 void ChimeraReAligner::createAlignMatrix(int queryUnalignedLength, int alignmentLength){
151         
152         try{
153                 alignMatrix.resize(alignmentLength+1);
154                 for(int i=0;i<=alignmentLength;i++){
155                         alignMatrix[i].resize(queryUnalignedLength+1);
156                 }
157
158                 for(int i=1;i<=alignmentLength;i++)             {       alignMatrix[i][0].direction = 'l';      }
159                 for(int j=1;j<=queryUnalignedLength;j++){       alignMatrix[0][j].direction = 'u';      }
160         }
161         catch(exception& e) {
162                 m->errorOut(e, "ChimeraReAligner", "createAlignMatrix");
163                 exit(1);
164         }
165 }
166
167 /***************************************************************************************************************/
168
169 void ChimeraReAligner::fillAlignMatrix(string query){
170         try{
171                 int GAP = -4;
172                 
173                 int nrows = alignMatrix.size()-1;
174                 int ncols = alignMatrix[0].size()-1;
175                                 
176                 for(int i=1;i<=nrows;i++){
177                         
178                         bases p = profile[i-1];
179                         int numChars = p.Chars;
180                         
181                         for(int j=1;j<=ncols;j++){
182                         
183                                 char q = query[j-1];
184                                 
185                                 //      score it for if there was a match
186                                 int maxScore = calcMatchScore(p, q) + alignMatrix[i-1][j-1].score;
187                                 int maxDirection = 'd';
188                                 
189                                 //      score it for if there was a gap in the query
190                                 int score = alignMatrix[i-1][j].score + (numChars * GAP);
191                                 if (score > maxScore) {
192                                         maxScore = score;
193                                         maxDirection = 'l';
194                                 }
195                                 
196                                 alignMatrix[i][j].score = maxScore;
197                                 alignMatrix[i][j].direction = maxDirection;
198                                 
199                         }
200                 }
201         }
202         catch(exception& e) {
203                 m->errorOut(e, "ChimeraReAligner", "fillAlignMatrix");
204                 exit(1);
205         }
206 }
207
208 /***************************************************************************************************************/
209
210 int ChimeraReAligner::calcMatchScore(bases p, char q){
211         try{
212                 
213                 int MATCH = 5;
214                 int MISMATCH = -4;
215                 
216                 int score = 0;
217                 
218                 if(q == 'G')            {       score = (MATCH * p.G + MISMATCH * (p.A + p.T + p.C + p.Gap));           }
219                 else if(q == 'A')       {       score = (MATCH * p.A + MISMATCH * (p.G + p.T + p.C + p.Gap));           }
220                 else if(q == 'T')       {       score = (MATCH * p.T + MISMATCH * (p.G + p.A + p.C + p.Gap));           }
221                 else if(q == 'C')       {       score = (MATCH * p.C + MISMATCH * (p.G + p.A + p.T + p.Gap));           }
222                 else                            {       score = (MATCH * p.A + MISMATCH * (p.G + p.T + p.C + p.Gap));           }
223
224                 return score;
225         }
226         catch(exception& e) {
227                 m->errorOut(e, "ChimeraReAligner", "calcMatchScore");
228                 exit(1);
229         }
230 }
231
232 /***************************************************************************************************************/
233
234 string ChimeraReAligner::getNewAlignment(string query){
235         try{
236                 string queryAlignment(alignmentLength, '.');
237                 string referenceAlignment(alignmentLength, '.');
238                 
239                 
240                 int maxScore = -99999999;
241                 
242                 int nrows = alignMatrix.size()-1;
243                 int ncols = alignMatrix[0].size()-1;
244
245                 int bestCol = -1;
246                 int bestRow = -1;
247                 
248                 for(int i=1;i<=nrows;i++){
249                         int score = alignMatrix[i][ncols].score;
250                         if (score > maxScore) {
251                                 maxScore = score;
252                                 bestRow = i;
253                                 bestCol = ncols;
254                         }
255                 }
256                 
257                 for(int j=1;j<=ncols;j++){
258                         int score = alignMatrix[nrows][j].score;
259                         if (score > maxScore) {
260                                 maxScore = score;
261                                 bestRow = nrows;
262                                 bestCol = j;
263                         }
264                 }
265                 
266                 int currentRow = bestRow;
267                 int currentCol = bestCol;
268                 
269                 int alignmentPosition = 0;
270                 if(currentRow < alignmentLength){
271                         for(int i=alignmentLength;i>currentRow;i--){
272                                 alignmentPosition++;
273                         }
274                 }
275                 
276                 AlignCell c = alignMatrix[currentRow][currentCol];
277                 while(c.direction != 'x'){
278                         
279                         char q;
280
281                         if(c.direction == 'd'){
282                                 q = query[currentCol-1];
283                                 currentCol--;
284                                 currentRow--;
285                         }
286                         
287                         
288                         else if (c.direction == 'u') {
289                                 break;
290                         }                                       
291                         else if(c.direction == 'l'){
292                                 char gapChar;
293                                 if(currentCol == 0)     {       gapChar = '.';  }
294                                 else                            {       gapChar = '-';  }
295                                                                 
296                                 q = gapChar;
297                                 currentRow--;
298                         }
299                         else{
300                                 cout << "you shouldn't be here..." << endl;
301                         }
302
303                         queryAlignment[alignmentPosition] = q;
304                         alignmentPosition++;
305                         c = alignMatrix[currentRow][currentCol];
306                 }
307
308 //              need to reverse the string
309                 string flipSeq = "";
310                 for(int i=alignmentLength-1;i>=0;i--){
311                         flipSeq += queryAlignment[i];                   
312                 }
313                 
314                 return flipSeq;
315         }
316         catch(exception& e) {
317                 m->errorOut(e, "ChimeraReAligner", "getNewAlignment");
318                 exit(1);
319         }
320 }
321
322 /***************************************************************************************************************/
323
324 // Sequence* ChimeraReAligner::getSequence(string name) {
325 //      try{
326 //              Sequence* temp;
327 //              
328 //              //look through templateSeqs til you find it
329 //              int spot = -1;
330 //              for (int i = 0; i < templateSeqs.size(); i++) {
331 //                      if (name == templateSeqs[i]->getName()) {  
332 //                              spot = i;
333 //                              break;
334 //                      }
335 //              }
336 //              
337 //              if(spot == -1) { m->mothurOut("Error: Could not find sequence."); m->mothurOutEndLine(); return NULL; }
338 //              
339 //              temp = new Sequence(templateSeqs[spot]->getName(), templateSeqs[spot]->getAligned());
340 //              
341 //              return temp;
342 //      }
343 //      catch(exception& e) {
344 //              m->errorOut(e, "ChimeraReAligner", "getSequence");
345 //              exit(1);
346 //      }
347 //}
348
349 //***************************************************************************************************************/